Files
fund-tracer/backend/app/api/v1/images.py

185 lines
6.0 KiB
Python
Raw Normal View History

2026-03-11 16:28:04 +08:00
from uuid import UUID
2026-03-12 12:32:29 +08:00
import asyncio
2026-03-11 16:28:04 +08:00
2026-03-12 12:32:29 +08:00
from fastapi import APIRouter, Depends, UploadFile, File, HTTPException
2026-03-11 16:28:04 +08:00
from fastapi.responses import FileResponse
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import settings
from app.core.database import get_db
2026-03-12 12:32:29 +08:00
from app.models.evidence_image import EvidenceImage, SourceApp, PageType, OcrStatus
2026-03-11 16:28:04 +08:00
from app.repositories.image_repo import ImageRepository
from app.repositories.case_repo import CaseRepository
2026-03-12 12:32:29 +08:00
from app.schemas.image import ImageOut, ImageDetailOut, OcrFieldCorrection, CaseOcrStartIn
2026-03-11 16:28:04 +08:00
from app.utils.hash import sha256_file
from app.utils.file_storage import save_upload
router = APIRouter()
@router.post("/cases/{case_id}/images", response_model=list[ImageOut], status_code=201)
async def upload_images(
case_id: UUID,
files: list[UploadFile] = File(...),
db: AsyncSession = Depends(get_db),
):
case_repo = CaseRepository(db)
case = await case_repo.get(case_id)
if not case:
raise HTTPException(404, "案件不存在")
img_repo = ImageRepository(db)
results: list[EvidenceImage] = []
for f in files:
data = await f.read()
2026-03-12 12:32:29 +08:00
raw_hash = sha256_file(data)
# Scope hash by case to avoid cross-case unique conflicts while still deduplicating inside one case.
scoped_hash = f"{raw_hash}:{case_id}"
2026-03-11 16:28:04 +08:00
2026-03-12 12:32:29 +08:00
existing = await img_repo.find_by_hash_in_case(case_id, [raw_hash, scoped_hash])
2026-03-11 16:28:04 +08:00
if existing:
results.append(existing)
continue
file_path, thumb_path = save_upload(data, str(case_id), f.filename or "upload.png")
image = EvidenceImage(
case_id=case_id,
file_path=file_path,
thumb_path=thumb_path,
2026-03-12 12:32:29 +08:00
file_hash=scoped_hash,
2026-03-11 16:28:04 +08:00
file_size=len(data),
)
image = await img_repo.create(image)
results.append(image)
case.image_count = await img_repo.count_by_case(case_id)
await db.flush()
2026-03-12 12:32:29 +08:00
# trigger OCR tasks in-process background (non-blocking for API response)
from app.workers.ocr_tasks import process_image_ocr_async
2026-03-11 16:28:04 +08:00
for img in results:
if img.ocr_status.value == "pending":
2026-03-12 12:32:29 +08:00
asyncio.create_task(process_image_ocr_async(str(img.id)))
2026-03-11 16:28:04 +08:00
return results
@router.get("/cases/{case_id}/images", response_model=list[ImageOut])
async def list_images(
case_id: UUID,
source_app: SourceApp | None = None,
page_type: PageType | None = None,
db: AsyncSession = Depends(get_db),
):
repo = ImageRepository(db)
2026-03-12 12:32:29 +08:00
images = await repo.list_by_case(case_id, source_app=source_app, page_type=page_type)
return [
ImageOut(
id=img.id,
case_id=img.case_id,
url=f"/api/v1/images/{img.id}/file",
thumb_url=f"/api/v1/images/{img.id}/file",
source_app=img.source_app,
page_type=img.page_type,
ocr_status=img.ocr_status,
file_hash=img.file_hash,
uploaded_at=img.uploaded_at,
)
for img in images
]
2026-03-11 16:28:04 +08:00
@router.get("/images/{image_id}", response_model=ImageDetailOut)
async def get_image_detail(image_id: UUID, db: AsyncSession = Depends(get_db)):
repo = ImageRepository(db)
image = await repo.get(image_id)
if not image:
raise HTTPException(404, "截图不存在")
return ImageDetailOut(
id=image.id,
case_id=image.case_id,
url=f"/api/v1/images/{image.id}/file",
thumb_url=f"/api/v1/images/{image.id}/file",
source_app=image.source_app,
page_type=image.page_type,
ocr_status=image.ocr_status,
file_hash=image.file_hash,
uploaded_at=image.uploaded_at,
ocr_blocks=[
{
"id": b.id,
"content": b.content,
"bbox": b.bbox,
"seq_order": b.seq_order,
"confidence": b.confidence,
}
for b in image.ocr_blocks
],
)
@router.patch("/images/{image_id}/ocr")
async def correct_ocr(
image_id: UUID,
corrections: list[OcrFieldCorrection],
db: AsyncSession = Depends(get_db),
):
repo = ImageRepository(db)
image = await repo.get(image_id)
if not image:
raise HTTPException(404, "截图不存在")
return {"message": "修正已保存", "corrections": len(corrections)}
@router.get("/images/{image_id}/file")
async def get_image_file(image_id: UUID, db: AsyncSession = Depends(get_db)):
repo = ImageRepository(db)
image = await repo.get(image_id)
if not image:
raise HTTPException(404, "截图不存在")
full_path = settings.upload_path / image.file_path
if not full_path.exists():
raise HTTPException(404, "文件不存在")
return FileResponse(full_path)
2026-03-12 12:32:29 +08:00
@router.post("/cases/{case_id}/ocr/start")
async def start_case_ocr(
case_id: UUID,
payload: CaseOcrStartIn | None = None,
db: AsyncSession = Depends(get_db),
):
case_repo = CaseRepository(db)
case = await case_repo.get(case_id)
if not case:
raise HTTPException(404, "案件不存在")
repo = ImageRepository(db)
include_done = payload.include_done if payload else False
image_ids = payload.image_ids if payload else []
if image_ids:
images = await repo.list_by_ids_in_case(case_id, image_ids)
# For explicit re-run, mark selected images as processing immediately
# so frontend can reflect state transition without full page refresh.
for img in images:
img.ocr_status = OcrStatus.processing
await db.flush()
await db.commit()
else:
images = await repo.list_for_ocr(case_id, include_done=include_done)
from app.workers.ocr_tasks import process_image_ocr_async
submitted = 0
for img in images:
asyncio.create_task(process_image_ocr_async(str(img.id)))
submitted += 1
return {
"caseId": str(case_id),
"submitted": submitted,
"totalCandidates": len(images),
"message": f"已提交 {submitted} 张截图的 OCR 任务",
}