import os
import subprocess

# === CONFIGURATION ===
ocrd_dir = r"D:\temp\rfkfiles\OCRd"
txt_output_dir = r"D:\temp\rfkfiles\OCRd\txt"
poppler_path = r"D:\poppler-24.08.0\Library\bin"  # Update this to your actual path

# === SETUP ===
os.makedirs(txt_output_dir, exist_ok=True)
converted = 0
skipped = 0

for filename in os.listdir(ocrd_dir):
    if filename.lower().endswith('.pdf'):
        base = os.path.splitext(filename)[0]
        txt_path = os.path.join(txt_output_dir, base + ".txt")

        if os.path.exists(txt_path):
            skipped += 1
            continue

        pdf_path = os.path.join(ocrd_dir, filename)
        cmd = f'"{os.path.join(poppler_path, "pdftotext.exe")}" -layout "{pdf_path}" "{txt_path}"'

        try:
            subprocess.run(cmd, shell=True, check=True)
            print(f"✅ Extracted: {filename}")
            converted += 1
        except subprocess.CalledProcessError as e:
            print(f"❌ Failed: {filename} — {e}")

print(f"\nDone. {converted} new .txt files created. {skipped} already existed.")
