imag/migrations/3.1.1-case-sensitive-ocr.py
2025-01-30 22:43:32 -06:00

46 lines
1.2 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""ocr case-sensitivity migration"""
import os
import sqlite3
import sys
from warnings import filterwarnings as filter_warnings
import PIL
import pytesseract # type: ignore
def main() -> int:
"""entry / main function"""
if len(sys.argv) < 3:
print(
f"Usage: {sys.argv[0]} <images directory> <max ocr size>", file=sys.stderr
)
return 1
s: int = int(sys.argv[2])
print("-- Migration for version 3.1.1: OCR case sensitivity")
print("BEGIN TRANSACTION;")
conn: sqlite3.Connection = sqlite3.connect(":memory:")
for image in os.listdir(sys.argv[1]):
with PIL.Image.open(os.path.join(sys.argv[1], image)) as img: # type: ignore
ocr: str = str(pytesseract.image_to_string(img)).strip()[:s].strip() # type: ignore
ocre: str = conn.execute("SELECT quote(?);", (ocr,)).fetchone()[0]
print(f"UPDATE image SET ocr={ocre} WHERE iid={os.path.basename(image)};")
print("COMMIT;")
return 0
if __name__ == "__main__":
assert main.__annotations__.get("return") is int, "main() should return an integer"
filter_warnings("error", category=Warning)
raise SystemExit(main())