Code snippets
2-7-25 from ChatGPT
import os
import click
from pdfminer.high_level import extract_text
from markdownify import markdownify as md
def pdf_to_markdown(pdf_path, output_dir):
"""Convert a PDF file to Markdown and save it."""
text = extract_text(pdf_path)
markdown_text = md(text)
base_name = os.path.basename(pdf_path).replace('.pdf', '.md')
output_path = os.path.join(output_dir, base_name)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(markdown_text)
click.echo(f"Converted {pdf_path} -> {output_path}")
@click.command()
@click.argument('input_dir')
@click.argument('output_dir')
def convert_directory(input_dir, output_dir):
"""Convert all PDF files in a directory to Markdown."""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
for file_name in os.listdir(input_dir):
if file_name.lower().endswith('.pdf'):
pdf_path = os.path.join(input_dir, file_name)
pdf_to_markdown(pdf_path, output_dir)
if __name__ == '__main__':
convert_directory()