Files
fund-tracer/backend/app/api/v1/images.py
2026-03-12 12:32:29 +08:00

185 lines
6.0 KiB
Python

from uuid import UUID
import asyncio
from fastapi import APIRouter, Depends, UploadFile, File, HTTPException
from fastapi.responses import FileResponse
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import settings
from app.core.database import get_db
from app.models.evidence_image import EvidenceImage, SourceApp, PageType, OcrStatus
from app.repositories.image_repo import ImageRepository
from app.repositories.case_repo import CaseRepository
from app.schemas.image import ImageOut, ImageDetailOut, OcrFieldCorrection, CaseOcrStartIn
from app.utils.hash import sha256_file
from app.utils.file_storage import save_upload
router = APIRouter()
@router.post("/cases/{case_id}/images", response_model=list[ImageOut], status_code=201)
async def upload_images(
case_id: UUID,
files: list[UploadFile] = File(...),
db: AsyncSession = Depends(get_db),
):
case_repo = CaseRepository(db)
case = await case_repo.get(case_id)
if not case:
raise HTTPException(404, "案件不存在")
img_repo = ImageRepository(db)
results: list[EvidenceImage] = []
for f in files:
data = await f.read()
raw_hash = sha256_file(data)
# Scope hash by case to avoid cross-case unique conflicts while still deduplicating inside one case.
scoped_hash = f"{raw_hash}:{case_id}"
existing = await img_repo.find_by_hash_in_case(case_id, [raw_hash, scoped_hash])
if existing:
results.append(existing)
continue
file_path, thumb_path = save_upload(data, str(case_id), f.filename or "upload.png")
image = EvidenceImage(
case_id=case_id,
file_path=file_path,
thumb_path=thumb_path,
file_hash=scoped_hash,
file_size=len(data),
)
image = await img_repo.create(image)
results.append(image)
case.image_count = await img_repo.count_by_case(case_id)
await db.flush()
# trigger OCR tasks in-process background (non-blocking for API response)
from app.workers.ocr_tasks import process_image_ocr_async
for img in results:
if img.ocr_status.value == "pending":
asyncio.create_task(process_image_ocr_async(str(img.id)))
return results
@router.get("/cases/{case_id}/images", response_model=list[ImageOut])
async def list_images(
case_id: UUID,
source_app: SourceApp | None = None,
page_type: PageType | None = None,
db: AsyncSession = Depends(get_db),
):
repo = ImageRepository(db)
images = await repo.list_by_case(case_id, source_app=source_app, page_type=page_type)
return [
ImageOut(
id=img.id,
case_id=img.case_id,
url=f"/api/v1/images/{img.id}/file",
thumb_url=f"/api/v1/images/{img.id}/file",
source_app=img.source_app,
page_type=img.page_type,
ocr_status=img.ocr_status,
file_hash=img.file_hash,
uploaded_at=img.uploaded_at,
)
for img in images
]
@router.get("/images/{image_id}", response_model=ImageDetailOut)
async def get_image_detail(image_id: UUID, db: AsyncSession = Depends(get_db)):
repo = ImageRepository(db)
image = await repo.get(image_id)
if not image:
raise HTTPException(404, "截图不存在")
return ImageDetailOut(
id=image.id,
case_id=image.case_id,
url=f"/api/v1/images/{image.id}/file",
thumb_url=f"/api/v1/images/{image.id}/file",
source_app=image.source_app,
page_type=image.page_type,
ocr_status=image.ocr_status,
file_hash=image.file_hash,
uploaded_at=image.uploaded_at,
ocr_blocks=[
{
"id": b.id,
"content": b.content,
"bbox": b.bbox,
"seq_order": b.seq_order,
"confidence": b.confidence,
}
for b in image.ocr_blocks
],
)
@router.patch("/images/{image_id}/ocr")
async def correct_ocr(
image_id: UUID,
corrections: list[OcrFieldCorrection],
db: AsyncSession = Depends(get_db),
):
repo = ImageRepository(db)
image = await repo.get(image_id)
if not image:
raise HTTPException(404, "截图不存在")
return {"message": "修正已保存", "corrections": len(corrections)}
@router.get("/images/{image_id}/file")
async def get_image_file(image_id: UUID, db: AsyncSession = Depends(get_db)):
repo = ImageRepository(db)
image = await repo.get(image_id)
if not image:
raise HTTPException(404, "截图不存在")
full_path = settings.upload_path / image.file_path
if not full_path.exists():
raise HTTPException(404, "文件不存在")
return FileResponse(full_path)
@router.post("/cases/{case_id}/ocr/start")
async def start_case_ocr(
case_id: UUID,
payload: CaseOcrStartIn | None = None,
db: AsyncSession = Depends(get_db),
):
case_repo = CaseRepository(db)
case = await case_repo.get(case_id)
if not case:
raise HTTPException(404, "案件不存在")
repo = ImageRepository(db)
include_done = payload.include_done if payload else False
image_ids = payload.image_ids if payload else []
if image_ids:
images = await repo.list_by_ids_in_case(case_id, image_ids)
# For explicit re-run, mark selected images as processing immediately
# so frontend can reflect state transition without full page refresh.
for img in images:
img.ocr_status = OcrStatus.processing
await db.flush()
await db.commit()
else:
images = await repo.list_for_ocr(case_id, include_done=include_done)
from app.workers.ocr_tasks import process_image_ocr_async
submitted = 0
for img in images:
asyncio.create_task(process_image_ocr_async(str(img.id)))
submitted += 1
return {
"caseId": str(case_id),
"submitted": submitted,
"totalCandidates": len(images),
"message": f"已提交 {submitted} 张截图的 OCR 任务",
}