Files
fund-tracer/backend/app/api/v1/images.py
2026-03-13 09:57:04 +08:00

273 lines
8.9 KiB
Python

from uuid import UUID
import asyncio
from sqlalchemy import delete, select
from fastapi import APIRouter, Depends, UploadFile, File, HTTPException
from fastapi.responses import FileResponse
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import settings
from app.core.database import get_db
from app.models.evidence_image import EvidenceImage, SourceApp, PageType, OcrStatus
from app.repositories.image_repo import ImageRepository
from app.repositories.case_repo import CaseRepository
from app.schemas.image import (
ImageOut,
ImageDetailOut,
OcrFieldCorrection,
CaseOcrStartIn,
CaseImagesDeleteIn,
)
from app.utils.hash import sha256_file
from app.utils.file_storage import save_upload
router = APIRouter()
@router.post("/cases/{case_id}/images", response_model=list[ImageOut], status_code=201)
async def upload_images(
case_id: UUID,
files: list[UploadFile] = File(...),
db: AsyncSession = Depends(get_db),
):
case_repo = CaseRepository(db)
case = await case_repo.get(case_id)
if not case:
raise HTTPException(404, "案件不存在")
img_repo = ImageRepository(db)
results: list[EvidenceImage] = []
for f in files:
data = await f.read()
raw_hash = sha256_file(data)
# Scope hash by case to avoid cross-case unique conflicts while still deduplicating inside one case.
scoped_hash = f"{raw_hash}:{case_id}"
existing = await img_repo.find_by_hash_in_case(case_id, [raw_hash, scoped_hash])
if existing:
results.append(existing)
continue
file_path, thumb_path = save_upload(data, str(case_id), f.filename or "upload.png")
image = EvidenceImage(
case_id=case_id,
file_path=file_path,
thumb_path=thumb_path,
file_hash=scoped_hash,
file_size=len(data),
)
image = await img_repo.create(image)
results.append(image)
case.image_count = await img_repo.count_by_case(case_id)
await db.flush()
# trigger OCR tasks in-process background (non-blocking for API response)
from app.workers.ocr_tasks import process_images_ocr_batch_async
pending_ids = [str(img.id) for img in results if img.ocr_status.value == "pending"]
if pending_ids:
asyncio.create_task(
process_images_ocr_batch_async(
pending_ids, settings.OCR_PARALLELISM
)
)
return results
@router.get("/cases/{case_id}/images", response_model=list[ImageOut])
async def list_images(
case_id: UUID,
source_app: SourceApp | None = None,
page_type: PageType | None = None,
db: AsyncSession = Depends(get_db),
):
repo = ImageRepository(db)
images = await repo.list_by_case(case_id, source_app=source_app, page_type=page_type)
return [
ImageOut(
id=img.id,
case_id=img.case_id,
url=f"/api/v1/images/{img.id}/file",
thumb_url=f"/api/v1/images/{img.id}/file",
source_app=img.source_app,
page_type=img.page_type,
ocr_status=img.ocr_status,
file_hash=img.file_hash,
uploaded_at=img.uploaded_at,
)
for img in images
]
@router.get("/images/{image_id}", response_model=ImageDetailOut)
async def get_image_detail(image_id: UUID, db: AsyncSession = Depends(get_db)):
repo = ImageRepository(db)
image = await repo.get(image_id)
if not image:
raise HTTPException(404, "截图不存在")
return ImageDetailOut(
id=image.id,
case_id=image.case_id,
url=f"/api/v1/images/{image.id}/file",
thumb_url=f"/api/v1/images/{image.id}/file",
source_app=image.source_app,
page_type=image.page_type,
ocr_status=image.ocr_status,
file_hash=image.file_hash,
uploaded_at=image.uploaded_at,
ocr_blocks=[
{
"id": b.id,
"content": b.content,
"bbox": b.bbox,
"seq_order": b.seq_order,
"confidence": b.confidence,
}
for b in image.ocr_blocks
],
)
@router.patch("/images/{image_id}/ocr")
async def correct_ocr(
image_id: UUID,
corrections: list[OcrFieldCorrection],
db: AsyncSession = Depends(get_db),
):
repo = ImageRepository(db)
image = await repo.get(image_id)
if not image:
raise HTTPException(404, "截图不存在")
return {"message": "修正已保存", "corrections": len(corrections)}
@router.get("/images/{image_id}/file")
async def get_image_file(image_id: UUID, db: AsyncSession = Depends(get_db)):
repo = ImageRepository(db)
image = await repo.get(image_id)
if not image:
raise HTTPException(404, "截图不存在")
full_path = settings.upload_path / image.file_path
if not full_path.exists():
raise HTTPException(404, "文件不存在")
return FileResponse(full_path)
@router.post("/cases/{case_id}/ocr/start")
async def start_case_ocr(
case_id: UUID,
payload: CaseOcrStartIn | None = None,
db: AsyncSession = Depends(get_db),
):
case_repo = CaseRepository(db)
case = await case_repo.get(case_id)
if not case:
raise HTTPException(404, "案件不存在")
repo = ImageRepository(db)
include_done = payload.include_done if payload else False
image_ids = payload.image_ids if payload else []
if image_ids:
images = await repo.list_by_ids_in_case(case_id, image_ids)
# For explicit re-run, mark selected images as processing immediately
# so frontend can reflect state transition without full page refresh.
for img in images:
img.ocr_status = OcrStatus.processing
await db.flush()
await db.commit()
else:
images = await repo.list_for_ocr(case_id, include_done=include_done)
# Mark queued images as processing immediately, including when OCR is
# triggered from workspace page, so UI can show progress right away.
for img in images:
img.ocr_status = OcrStatus.processing
await db.flush()
await db.commit()
from app.workers.ocr_tasks import process_images_ocr_batch_async
image_ids_to_run = [str(img.id) for img in images]
submitted = len(image_ids_to_run)
if image_ids_to_run:
asyncio.create_task(
process_images_ocr_batch_async(
image_ids_to_run, settings.OCR_PARALLELISM
)
)
return {
"caseId": str(case_id),
"submitted": submitted,
"totalCandidates": len(images),
"message": f"已提交 {submitted} 张截图的 OCR 任务",
}
@router.delete("/cases/{case_id}/images")
async def delete_case_images(
case_id: UUID,
payload: CaseImagesDeleteIn,
db: AsyncSession = Depends(get_db),
):
case_repo = CaseRepository(db)
case = await case_repo.get(case_id)
if not case:
raise HTTPException(404, "案件不存在")
if not payload.image_ids:
return {"caseId": str(case_id), "deleted": 0, "message": "未选择需要删除的截图"}
repo = ImageRepository(db)
images = await repo.list_by_ids_in_case(case_id, payload.image_ids)
if not images:
return {"caseId": str(case_id), "deleted": 0, "message": "未找到可删除的截图"}
from app.models.ocr_block import OcrBlock
from app.models.transaction import TransactionRecord
from app.models.assessment import FraudAssessment
deleted = 0
try:
for image in images:
# remove related OCR blocks and extracted transactions first
# assessments reference transaction_records.transaction_id, so they
# must be deleted before deleting transaction records.
await db.execute(
delete(FraudAssessment).where(
FraudAssessment.transaction_id.in_(
select(TransactionRecord.id).where(
TransactionRecord.evidence_image_id == image.id
)
)
)
)
await db.execute(delete(OcrBlock).where(OcrBlock.image_id == image.id))
await db.execute(delete(TransactionRecord).where(TransactionRecord.evidence_image_id == image.id))
await repo.delete(image)
deleted += 1
# best-effort remove local files
for rel in [image.file_path, image.thumb_path]:
if rel:
try:
p = settings.upload_path / rel
if p.exists():
p.unlink()
except Exception:
pass
case.image_count = await repo.count_by_case(case_id)
await db.flush()
await db.commit()
except Exception as e:
await db.rollback()
raise
return {
"caseId": str(case_id),
"deleted": deleted,
"message": f"已删除 {deleted} 张截图",
}