Files
fund-tracer/backend/app/workers/ocr_tasks.py
2026-03-11 16:28:04 +08:00

75 lines
2.4 KiB
Python

"""Celery tasks for OCR processing of uploaded screenshots."""
import asyncio
import logging
from uuid import UUID
from app.workers.celery_app import celery_app
logger = logging.getLogger(__name__)
def _run_async(coro):
"""Run an async coroutine from synchronous Celery task context."""
loop = asyncio.new_event_loop()
try:
return loop.run_until_complete(coro)
finally:
loop.close()
@celery_app.task(name="app.workers.ocr_tasks.process_image_ocr", bind=True, max_retries=3)
def process_image_ocr(self, image_id: str):
"""Process a single image: classify page, extract fields, save to DB."""
_run_async(_process(image_id))
async def _process(image_id_str: str):
from app.core.database import async_session_factory
from app.models.evidence_image import EvidenceImage, OcrStatus
from app.models.ocr_block import OcrBlock
from app.services.ocr_service import classify_page, extract_transaction_fields
from app.services.parser_service import parse_extracted_fields
image_id = UUID(image_id_str)
async with async_session_factory() as db:
image = await db.get(EvidenceImage, image_id)
if not image:
logger.error("Image %s not found", image_id)
return
image.ocr_status = OcrStatus.processing
await db.flush()
try:
source_app, page_type = await classify_page(image.file_path)
image.source_app = source_app
image.page_type = page_type
raw_fields = await extract_transaction_fields(image.file_path, source_app, page_type)
# save raw OCR block
block = OcrBlock(
image_id=image.id,
content=str(raw_fields),
bbox={},
seq_order=0,
confidence=raw_fields.get("confidence", 0.5) if isinstance(raw_fields, dict) else 0.5,
)
db.add(block)
# parse into transaction records
records = parse_extracted_fields(raw_fields, image.case_id, image.id, source_app)
for r in records:
db.add(r)
image.ocr_status = OcrStatus.done
await db.commit()
logger.info("Image %s processed: %d transactions", image_id, len(records))
except Exception as e:
image.ocr_status = OcrStatus.failed
await db.commit()
logger.error("Image %s OCR failed: %s", image_id, e)
raise