Code snippets

2-7-25 from ChatGPT


import os
import click
from pdfminer.high_level import extract_text
from markdownify import markdownify as md

def pdf_to_markdown(pdf_path, output_dir):
    """Convert a PDF file to Markdown and save it."""
    text = extract_text(pdf_path)
    markdown_text = md(text)
    
    base_name = os.path.basename(pdf_path).replace('.pdf', '.md')
    output_path = os.path.join(output_dir, base_name)
    
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(markdown_text)
    
    click.echo(f"Converted {pdf_path} -> {output_path}")

@click.command()
@click.argument('input_dir')
@click.argument('output_dir')
def convert_directory(input_dir, output_dir):
    """Convert all PDF files in a directory to Markdown."""
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    for file_name in os.listdir(input_dir):
        if file_name.lower().endswith('.pdf'):
            pdf_path = os.path.join(input_dir, file_name)
            pdf_to_markdown(pdf_path, output_dir)

if __name__ == '__main__':
    convert_directory()