"""Celery tasks for OCR processing of uploaded screenshots.""" import asyncio import logging from uuid import UUID from app.workers.celery_app import celery_app logger = logging.getLogger(__name__) def _run_async(coro): """Run an async coroutine from synchronous Celery task context.""" loop = asyncio.new_event_loop() try: return loop.run_until_complete(coro) finally: loop.close() @celery_app.task(name="app.workers.ocr_tasks.process_image_ocr", bind=True, max_retries=3) def process_image_ocr(self, image_id: str): """Process a single image: classify page, extract fields, save to DB.""" _run_async(_process(image_id)) async def _process(image_id_str: str): from app.core.database import async_session_factory from app.models.evidence_image import EvidenceImage, OcrStatus from app.models.ocr_block import OcrBlock from app.services.ocr_service import classify_page, extract_transaction_fields from app.services.parser_service import parse_extracted_fields image_id = UUID(image_id_str) async with async_session_factory() as db: image = await db.get(EvidenceImage, image_id) if not image: logger.error("Image %s not found", image_id) return image.ocr_status = OcrStatus.processing await db.flush() try: source_app, page_type = await classify_page(image.file_path) image.source_app = source_app image.page_type = page_type raw_fields = await extract_transaction_fields(image.file_path, source_app, page_type) # save raw OCR block block = OcrBlock( image_id=image.id, content=str(raw_fields), bbox={}, seq_order=0, confidence=raw_fields.get("confidence", 0.5) if isinstance(raw_fields, dict) else 0.5, ) db.add(block) # parse into transaction records records = parse_extracted_fields(raw_fields, image.case_id, image.id, source_app) for r in records: db.add(r) image.ocr_status = OcrStatus.done await db.commit() logger.info("Image %s processed: %d transactions", image_id, len(records)) except Exception as e: image.ocr_status = OcrStatus.failed await db.commit() logger.error("Image %s OCR failed: %s", image_id, e) raise