fix ocr
This commit is contained in:
@@ -1,15 +1,16 @@
|
||||
from uuid import UUID
|
||||
import asyncio
|
||||
|
||||
from fastapi import APIRouter, Depends, UploadFile, File, HTTPException, Query
|
||||
from fastapi import APIRouter, Depends, UploadFile, File, HTTPException
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.database import get_db
|
||||
from app.models.evidence_image import EvidenceImage, SourceApp, PageType
|
||||
from app.models.evidence_image import EvidenceImage, SourceApp, PageType, OcrStatus
|
||||
from app.repositories.image_repo import ImageRepository
|
||||
from app.repositories.case_repo import CaseRepository
|
||||
from app.schemas.image import ImageOut, ImageDetailOut, OcrFieldCorrection
|
||||
from app.schemas.image import ImageOut, ImageDetailOut, OcrFieldCorrection, CaseOcrStartIn
|
||||
from app.utils.hash import sha256_file
|
||||
from app.utils.file_storage import save_upload
|
||||
|
||||
@@ -32,9 +33,11 @@ async def upload_images(
|
||||
|
||||
for f in files:
|
||||
data = await f.read()
|
||||
file_hash = sha256_file(data)
|
||||
raw_hash = sha256_file(data)
|
||||
# Scope hash by case to avoid cross-case unique conflicts while still deduplicating inside one case.
|
||||
scoped_hash = f"{raw_hash}:{case_id}"
|
||||
|
||||
existing = await img_repo.find_by_hash(file_hash)
|
||||
existing = await img_repo.find_by_hash_in_case(case_id, [raw_hash, scoped_hash])
|
||||
if existing:
|
||||
results.append(existing)
|
||||
continue
|
||||
@@ -44,7 +47,7 @@ async def upload_images(
|
||||
case_id=case_id,
|
||||
file_path=file_path,
|
||||
thumb_path=thumb_path,
|
||||
file_hash=file_hash,
|
||||
file_hash=scoped_hash,
|
||||
file_size=len(data),
|
||||
)
|
||||
image = await img_repo.create(image)
|
||||
@@ -53,14 +56,11 @@ async def upload_images(
|
||||
case.image_count = await img_repo.count_by_case(case_id)
|
||||
await db.flush()
|
||||
|
||||
# trigger OCR tasks (non-blocking)
|
||||
from app.workers.ocr_tasks import process_image_ocr
|
||||
# trigger OCR tasks in-process background (non-blocking for API response)
|
||||
from app.workers.ocr_tasks import process_image_ocr_async
|
||||
for img in results:
|
||||
if img.ocr_status.value == "pending":
|
||||
try:
|
||||
process_image_ocr.delay(str(img.id))
|
||||
except Exception:
|
||||
pass
|
||||
asyncio.create_task(process_image_ocr_async(str(img.id)))
|
||||
|
||||
return results
|
||||
|
||||
@@ -73,7 +73,21 @@ async def list_images(
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
repo = ImageRepository(db)
|
||||
return await repo.list_by_case(case_id, source_app=source_app, page_type=page_type)
|
||||
images = await repo.list_by_case(case_id, source_app=source_app, page_type=page_type)
|
||||
return [
|
||||
ImageOut(
|
||||
id=img.id,
|
||||
case_id=img.case_id,
|
||||
url=f"/api/v1/images/{img.id}/file",
|
||||
thumb_url=f"/api/v1/images/{img.id}/file",
|
||||
source_app=img.source_app,
|
||||
page_type=img.page_type,
|
||||
ocr_status=img.ocr_status,
|
||||
file_hash=img.file_hash,
|
||||
uploaded_at=img.uploaded_at,
|
||||
)
|
||||
for img in images
|
||||
]
|
||||
|
||||
|
||||
@router.get("/images/{image_id}", response_model=ImageDetailOut)
|
||||
@@ -128,3 +142,43 @@ async def get_image_file(image_id: UUID, db: AsyncSession = Depends(get_db)):
|
||||
if not full_path.exists():
|
||||
raise HTTPException(404, "文件不存在")
|
||||
return FileResponse(full_path)
|
||||
|
||||
|
||||
@router.post("/cases/{case_id}/ocr/start")
|
||||
async def start_case_ocr(
|
||||
case_id: UUID,
|
||||
payload: CaseOcrStartIn | None = None,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
case_repo = CaseRepository(db)
|
||||
case = await case_repo.get(case_id)
|
||||
if not case:
|
||||
raise HTTPException(404, "案件不存在")
|
||||
|
||||
repo = ImageRepository(db)
|
||||
include_done = payload.include_done if payload else False
|
||||
image_ids = payload.image_ids if payload else []
|
||||
if image_ids:
|
||||
images = await repo.list_by_ids_in_case(case_id, image_ids)
|
||||
# For explicit re-run, mark selected images as processing immediately
|
||||
# so frontend can reflect state transition without full page refresh.
|
||||
for img in images:
|
||||
img.ocr_status = OcrStatus.processing
|
||||
await db.flush()
|
||||
await db.commit()
|
||||
else:
|
||||
images = await repo.list_for_ocr(case_id, include_done=include_done)
|
||||
|
||||
from app.workers.ocr_tasks import process_image_ocr_async
|
||||
|
||||
submitted = 0
|
||||
for img in images:
|
||||
asyncio.create_task(process_image_ocr_async(str(img.id)))
|
||||
submitted += 1
|
||||
|
||||
return {
|
||||
"caseId": str(case_id),
|
||||
"submitted": submitted,
|
||||
"totalCandidates": len(images),
|
||||
"message": f"已提交 {submitted} 张截图的 OCR 任务",
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user