backend/app/api/v1/images.py

from uuid import UUID
import asyncio

from fastapi import APIRouter, Depends, UploadFile, File, HTTPException
from fastapi.responses import FileResponse
from sqlalchemy.ext.asyncio import AsyncSession

from app.core.config import settings
from app.core.database import get_db
from app.models.evidence_image import EvidenceImage, SourceApp, PageType, OcrStatus
from app.repositories.image_repo import ImageRepository
from app.repositories.case_repo import CaseRepository
from app.schemas.image import ImageOut, ImageDetailOut, OcrFieldCorrection, CaseOcrStartIn
from app.utils.hash import sha256_file
from app.utils.file_storage import save_upload

router = APIRouter()


@router.post("/cases/{case_id}/images", response_model=list[ImageOut], status_code=201)
async def upload_images(
    case_id: UUID,
    files: list[UploadFile] = File(...),
    db: AsyncSession = Depends(get_db),
):
    case_repo = CaseRepository(db)
    case = await case_repo.get(case_id)
    if not case:
        raise HTTPException(404, "案件不存在")

    img_repo = ImageRepository(db)
    results: list[EvidenceImage] = []

    for f in files:
        data = await f.read()
        raw_hash = sha256_file(data)
        # Scope hash by case to avoid cross-case unique conflicts while still deduplicating inside one case.
        scoped_hash = f"{raw_hash}:{case_id}"

        existing = await img_repo.find_by_hash_in_case(case_id, [raw_hash, scoped_hash])
        if existing:
            results.append(existing)
            continue

        file_path, thumb_path = save_upload(data, str(case_id), f.filename or "upload.png")
        image = EvidenceImage(
            case_id=case_id,
            file_path=file_path,
            thumb_path=thumb_path,
            file_hash=scoped_hash,
            file_size=len(data),
        )
        image = await img_repo.create(image)
        results.append(image)

    case.image_count = await img_repo.count_by_case(case_id)
    await db.flush()

    # trigger OCR tasks in-process background (non-blocking for API response)
    from app.workers.ocr_tasks import process_image_ocr_async
    for img in results:
        if img.ocr_status.value == "pending":
            asyncio.create_task(process_image_ocr_async(str(img.id)))

    return results


@router.get("/cases/{case_id}/images", response_model=list[ImageOut])
async def list_images(
    case_id: UUID,
    source_app: SourceApp | None = None,
    page_type: PageType | None = None,
    db: AsyncSession = Depends(get_db),
):
    repo = ImageRepository(db)
    images = await repo.list_by_case(case_id, source_app=source_app, page_type=page_type)
    return [
        ImageOut(
            id=img.id,
            case_id=img.case_id,
            url=f"/api/v1/images/{img.id}/file",
            thumb_url=f"/api/v1/images/{img.id}/file",
            source_app=img.source_app,
            page_type=img.page_type,
            ocr_status=img.ocr_status,
            file_hash=img.file_hash,
            uploaded_at=img.uploaded_at,
        )
        for img in images
    ]


@router.get("/images/{image_id}", response_model=ImageDetailOut)
async def get_image_detail(image_id: UUID, db: AsyncSession = Depends(get_db)):
    repo = ImageRepository(db)
    image = await repo.get(image_id)
    if not image:
        raise HTTPException(404, "截图不存在")
    return ImageDetailOut(
        id=image.id,
        case_id=image.case_id,
        url=f"/api/v1/images/{image.id}/file",
        thumb_url=f"/api/v1/images/{image.id}/file",
        source_app=image.source_app,
        page_type=image.page_type,
        ocr_status=image.ocr_status,
        file_hash=image.file_hash,
        uploaded_at=image.uploaded_at,
        ocr_blocks=[
            {
                "id": b.id,
                "content": b.content,
                "bbox": b.bbox,
                "seq_order": b.seq_order,
                "confidence": b.confidence,
            }
            for b in image.ocr_blocks
        ],
    )


@router.patch("/images/{image_id}/ocr")
async def correct_ocr(
    image_id: UUID,
    corrections: list[OcrFieldCorrection],
    db: AsyncSession = Depends(get_db),
):
    repo = ImageRepository(db)
    image = await repo.get(image_id)
    if not image:
        raise HTTPException(404, "截图不存在")
    return {"message": "修正已保存", "corrections": len(corrections)}


@router.get("/images/{image_id}/file")
async def get_image_file(image_id: UUID, db: AsyncSession = Depends(get_db)):
    repo = ImageRepository(db)
    image = await repo.get(image_id)
    if not image:
        raise HTTPException(404, "截图不存在")
    full_path = settings.upload_path / image.file_path
    if not full_path.exists():
        raise HTTPException(404, "文件不存在")
    return FileResponse(full_path)


@router.post("/cases/{case_id}/ocr/start")
async def start_case_ocr(
    case_id: UUID,
    payload: CaseOcrStartIn | None = None,
    db: AsyncSession = Depends(get_db),
):
    case_repo = CaseRepository(db)
    case = await case_repo.get(case_id)
    if not case:
        raise HTTPException(404, "案件不存在")

    repo = ImageRepository(db)
    include_done = payload.include_done if payload else False
    image_ids = payload.image_ids if payload else []
    if image_ids:
        images = await repo.list_by_ids_in_case(case_id, image_ids)
        # For explicit re-run, mark selected images as processing immediately
        # so frontend can reflect state transition without full page refresh.
        for img in images:
            img.ocr_status = OcrStatus.processing
        await db.flush()
        await db.commit()
    else:
        images = await repo.list_for_ocr(case_id, include_done=include_done)

    from app.workers.ocr_tasks import process_image_ocr_async

    submitted = 0
    for img in images:
        asyncio.create_task(process_image_ocr_async(str(img.id)))
        submitted += 1

    return {
        "caseId": str(case_id),
        "submitted": submitted,
        "totalCandidates": len(images),
        "message": f"已提交 {submitted} 张截图的 OCR 任务",
    }
first commit 2026-03-11 16:28:04 +08:00			`from uuid import UUID`
fix ocr 2026-03-12 12:32:29 +08:00			`import asyncio`
first commit 2026-03-11 16:28:04 +08:00
fix ocr 2026-03-12 12:32:29 +08:00			`from fastapi import APIRouter, Depends, UploadFile, File, HTTPException`
first commit 2026-03-11 16:28:04 +08:00			`from fastapi.responses import FileResponse`
			`from sqlalchemy.ext.asyncio import AsyncSession`

			`from app.core.config import settings`
			`from app.core.database import get_db`
fix ocr 2026-03-12 12:32:29 +08:00			`from app.models.evidence_image import EvidenceImage, SourceApp, PageType, OcrStatus`
first commit 2026-03-11 16:28:04 +08:00			`from app.repositories.image_repo import ImageRepository`
			`from app.repositories.case_repo import CaseRepository`
fix ocr 2026-03-12 12:32:29 +08:00			`from app.schemas.image import ImageOut, ImageDetailOut, OcrFieldCorrection, CaseOcrStartIn`
first commit 2026-03-11 16:28:04 +08:00			`from app.utils.hash import sha256_file`
			`from app.utils.file_storage import save_upload`

			`router = APIRouter()`


			`@router.post("/cases/{case_id}/images", response_model=list[ImageOut], status_code=201)`
			`async def upload_images(`
			`case_id: UUID,`
			`files: list[UploadFile] = File(...),`
			`db: AsyncSession = Depends(get_db),`
			`):`
			`case_repo = CaseRepository(db)`
			`case = await case_repo.get(case_id)`
			`if not case:`
			`raise HTTPException(404, "案件不存在")`

			`img_repo = ImageRepository(db)`
			`results: list[EvidenceImage] = []`

			`for f in files:`
			`data = await f.read()`
fix ocr 2026-03-12 12:32:29 +08:00			`raw_hash = sha256_file(data)`
			`# Scope hash by case to avoid cross-case unique conflicts while still deduplicating inside one case.`
			`scoped_hash = f"{raw_hash}:{case_id}"`
first commit 2026-03-11 16:28:04 +08:00
fix ocr 2026-03-12 12:32:29 +08:00			`existing = await img_repo.find_by_hash_in_case(case_id, [raw_hash, scoped_hash])`
first commit 2026-03-11 16:28:04 +08:00			`if existing:`
			`results.append(existing)`
			`continue`

			`file_path, thumb_path = save_upload(data, str(case_id), f.filename or "upload.png")`
			`image = EvidenceImage(`
			`case_id=case_id,`
			`file_path=file_path,`
			`thumb_path=thumb_path,`
fix ocr 2026-03-12 12:32:29 +08:00			`file_hash=scoped_hash,`
first commit 2026-03-11 16:28:04 +08:00			`file_size=len(data),`
			`)`
			`image = await img_repo.create(image)`
			`results.append(image)`

			`case.image_count = await img_repo.count_by_case(case_id)`
			`await db.flush()`

fix ocr 2026-03-12 12:32:29 +08:00			`# trigger OCR tasks in-process background (non-blocking for API response)`
			`from app.workers.ocr_tasks import process_image_ocr_async`
first commit 2026-03-11 16:28:04 +08:00			`for img in results:`
			`if img.ocr_status.value == "pending":`
fix ocr 2026-03-12 12:32:29 +08:00			`asyncio.create_task(process_image_ocr_async(str(img.id)))`
first commit 2026-03-11 16:28:04 +08:00
			`return results`


			`@router.get("/cases/{case_id}/images", response_model=list[ImageOut])`
			`async def list_images(`
			`case_id: UUID,`
			`source_app: SourceApp \| None = None,`
			`page_type: PageType \| None = None,`
			`db: AsyncSession = Depends(get_db),`
			`):`
			`repo = ImageRepository(db)`
fix ocr 2026-03-12 12:32:29 +08:00			`images = await repo.list_by_case(case_id, source_app=source_app, page_type=page_type)`
			`return [`
			`ImageOut(`
			`id=img.id,`
			`case_id=img.case_id,`
			`url=f"/api/v1/images/{img.id}/file",`
			`thumb_url=f"/api/v1/images/{img.id}/file",`
			`source_app=img.source_app,`
			`page_type=img.page_type,`
			`ocr_status=img.ocr_status,`
			`file_hash=img.file_hash,`
			`uploaded_at=img.uploaded_at,`
			`)`
			`for img in images`
			`]`
first commit 2026-03-11 16:28:04 +08:00

			`@router.get("/images/{image_id}", response_model=ImageDetailOut)`
			`async def get_image_detail(image_id: UUID, db: AsyncSession = Depends(get_db)):`
			`repo = ImageRepository(db)`
			`image = await repo.get(image_id)`
			`if not image:`
			`raise HTTPException(404, "截图不存在")`
			`return ImageDetailOut(`
			`id=image.id,`
			`case_id=image.case_id,`
			`url=f"/api/v1/images/{image.id}/file",`
			`thumb_url=f"/api/v1/images/{image.id}/file",`
			`source_app=image.source_app,`
			`page_type=image.page_type,`
			`ocr_status=image.ocr_status,`
			`file_hash=image.file_hash,`
			`uploaded_at=image.uploaded_at,`
			`ocr_blocks=[`
			`{`
			`"id": b.id,`
			`"content": b.content,`
			`"bbox": b.bbox,`
			`"seq_order": b.seq_order,`
			`"confidence": b.confidence,`
			`}`
			`for b in image.ocr_blocks`
			`],`
			`)`


			`@router.patch("/images/{image_id}/ocr")`
			`async def correct_ocr(`
			`image_id: UUID,`
			`corrections: list[OcrFieldCorrection],`
			`db: AsyncSession = Depends(get_db),`
			`):`
			`repo = ImageRepository(db)`
			`image = await repo.get(image_id)`
			`if not image:`
			`raise HTTPException(404, "截图不存在")`
			`return {"message": "修正已保存", "corrections": len(corrections)}`


			`@router.get("/images/{image_id}/file")`
			`async def get_image_file(image_id: UUID, db: AsyncSession = Depends(get_db)):`
			`repo = ImageRepository(db)`
			`image = await repo.get(image_id)`
			`if not image:`
			`raise HTTPException(404, "截图不存在")`
			`full_path = settings.upload_path / image.file_path`
			`if not full_path.exists():`
			`raise HTTPException(404, "文件不存在")`
			`return FileResponse(full_path)`
fix ocr 2026-03-12 12:32:29 +08:00

			`@router.post("/cases/{case_id}/ocr/start")`
			`async def start_case_ocr(`
			`case_id: UUID,`
			`payload: CaseOcrStartIn \| None = None,`
			`db: AsyncSession = Depends(get_db),`
			`):`
			`case_repo = CaseRepository(db)`
			`case = await case_repo.get(case_id)`
			`if not case:`
			`raise HTTPException(404, "案件不存在")`

			`repo = ImageRepository(db)`
			`include_done = payload.include_done if payload else False`
			`image_ids = payload.image_ids if payload else []`
			`if image_ids:`
			`images = await repo.list_by_ids_in_case(case_id, image_ids)`
			`# For explicit re-run, mark selected images as processing immediately`
			`# so frontend can reflect state transition without full page refresh.`
			`for img in images:`
			`img.ocr_status = OcrStatus.processing`
			`await db.flush()`
			`await db.commit()`
			`else:`
			`images = await repo.list_for_ocr(case_id, include_done=include_done)`

			`from app.workers.ocr_tasks import process_image_ocr_async`

			`submitted = 0`
			`for img in images:`
			`asyncio.create_task(process_image_ocr_async(str(img.id)))`
			`submitted += 1`

			`return {`
			`"caseId": str(case_id),`
			`"submitted": submitted,`
			`"totalCandidates": len(images),`
			`"message": f"已提交 {submitted} 张截图的 OCR 任务",`
			`}`