backend/app/api/v1/images.py

from uuid import UUID
import asyncio
from sqlalchemy import delete, select

from fastapi import APIRouter, Depends, UploadFile, File, HTTPException
from fastapi.responses import FileResponse
from sqlalchemy.ext.asyncio import AsyncSession

from app.core.config import settings
from app.core.database import get_db
from app.models.evidence_image import EvidenceImage, SourceApp, PageType, OcrStatus
from app.repositories.image_repo import ImageRepository
from app.repositories.case_repo import CaseRepository
from app.schemas.image import (
    ImageOut,
    ImageDetailOut,
    OcrFieldCorrection,
    CaseOcrStartIn,
    CaseImagesDeleteIn,
)
from app.utils.hash import sha256_file
from app.utils.file_storage import save_upload

router = APIRouter()


@router.post("/cases/{case_id}/images", response_model=list[ImageOut], status_code=201)
async def upload_images(
    case_id: UUID,
    files: list[UploadFile] = File(...),
    db: AsyncSession = Depends(get_db),
):
    case_repo = CaseRepository(db)
    case = await case_repo.get(case_id)
    if not case:
        raise HTTPException(404, "案件不存在")

    img_repo = ImageRepository(db)
    results: list[EvidenceImage] = []

    for f in files:
        data = await f.read()
        raw_hash = sha256_file(data)
        # Scope hash by case to avoid cross-case unique conflicts while still deduplicating inside one case.
        scoped_hash = f"{raw_hash}:{case_id}"

        existing = await img_repo.find_by_hash_in_case(case_id, [raw_hash, scoped_hash])
        if existing:
            results.append(existing)
            continue

        file_path, thumb_path = save_upload(data, str(case_id), f.filename or "upload.png")
        image = EvidenceImage(
            case_id=case_id,
            file_path=file_path,
            thumb_path=thumb_path,
            file_hash=scoped_hash,
            file_size=len(data),
        )
        image = await img_repo.create(image)
        results.append(image)

    case.image_count = await img_repo.count_by_case(case_id)
    await db.flush()

    # trigger OCR tasks in-process background (non-blocking for API response)
    from app.workers.ocr_tasks import process_images_ocr_batch_async
    pending_imgs = [img for img in results if img.ocr_status.value == "pending"]
    for img in pending_imgs:
        img.ocr_status = OcrStatus.processing
    if pending_imgs:
        await db.flush()
        await db.commit()
    pending_ids = [str(img.id) for img in pending_imgs]
    if pending_ids:
        asyncio.create_task(
            process_images_ocr_batch_async(
                pending_ids, settings.OCR_PARALLELISM
            )
        )

    return results


@router.get("/cases/{case_id}/images", response_model=list[ImageOut])
async def list_images(
    case_id: UUID,
    source_app: SourceApp | None = None,
    page_type: PageType | None = None,
    db: AsyncSession = Depends(get_db),
):
    repo = ImageRepository(db)
    images = await repo.list_by_case(case_id, source_app=source_app, page_type=page_type)
    return [
        ImageOut(
            id=img.id,
            case_id=img.case_id,
            url=f"/api/v1/images/{img.id}/file",
            thumb_url=f"/api/v1/images/{img.id}/file",
            source_app=img.source_app,
            page_type=img.page_type,
            ocr_status=img.ocr_status,
            file_hash=img.file_hash,
            uploaded_at=img.uploaded_at,
        )
        for img in images
    ]


@router.get("/images/{image_id}", response_model=ImageDetailOut)
async def get_image_detail(image_id: UUID, db: AsyncSession = Depends(get_db)):
    repo = ImageRepository(db)
    image = await repo.get(image_id)
    if not image:
        raise HTTPException(404, "截图不存在")
    return ImageDetailOut(
        id=image.id,
        case_id=image.case_id,
        url=f"/api/v1/images/{image.id}/file",
        thumb_url=f"/api/v1/images/{image.id}/file",
        source_app=image.source_app,
        page_type=image.page_type,
        ocr_status=image.ocr_status,
        file_hash=image.file_hash,
        uploaded_at=image.uploaded_at,
        ocr_blocks=[
            {
                "id": b.id,
                "content": b.content,
                "bbox": b.bbox,
                "seq_order": b.seq_order,
                "confidence": b.confidence,
            }
            for b in image.ocr_blocks
        ],
    )


@router.patch("/images/{image_id}/ocr")
async def correct_ocr(
    image_id: UUID,
    corrections: list[OcrFieldCorrection],
    db: AsyncSession = Depends(get_db),
):
    repo = ImageRepository(db)
    image = await repo.get(image_id)
    if not image:
        raise HTTPException(404, "截图不存在")
    return {"message": "修正已保存", "corrections": len(corrections)}


@router.get("/images/{image_id}/file")
async def get_image_file(image_id: UUID, db: AsyncSession = Depends(get_db)):
    repo = ImageRepository(db)
    image = await repo.get(image_id)
    if not image:
        raise HTTPException(404, "截图不存在")
    full_path = settings.upload_path / image.file_path
    if not full_path.exists():
        raise HTTPException(404, "文件不存在")
    return FileResponse(full_path)


@router.post("/cases/{case_id}/ocr/start")
async def start_case_ocr(
    case_id: UUID,
    payload: CaseOcrStartIn | None = None,
    db: AsyncSession = Depends(get_db),
):
    case_repo = CaseRepository(db)
    case = await case_repo.get(case_id)
    if not case:
        raise HTTPException(404, "案件不存在")

    repo = ImageRepository(db)
    include_done = payload.include_done if payload else False
    image_ids = payload.image_ids if payload else []
    if image_ids:
        images = await repo.list_by_ids_in_case(case_id, image_ids)
        # Never submit images that are already processing: this prevents
        # duplicate OCR tasks when users trigger OCR from multiple pages.
        images = [img for img in images if img.ocr_status != OcrStatus.processing]
        # For explicit re-run, mark selected images as processing immediately
        # so frontend can reflect state transition without full page refresh.
        for img in images:
            img.ocr_status = OcrStatus.processing
        if images:
            await db.flush()
            await db.commit()
    else:
        images = await repo.list_for_ocr(case_id, include_done=include_done)
        # Mark queued images as processing immediately, including when OCR is
        # triggered from workspace page, so UI can show progress right away.
        for img in images:
            img.ocr_status = OcrStatus.processing
        if images:
            await db.flush()
            await db.commit()

    from app.workers.ocr_tasks import process_images_ocr_batch_async

    image_ids_to_run = [str(img.id) for img in images]
    submitted = len(image_ids_to_run)
    if image_ids_to_run:
        asyncio.create_task(
            process_images_ocr_batch_async(
                image_ids_to_run, settings.OCR_PARALLELISM
            )
        )

    return {
        "caseId": str(case_id),
        "submitted": submitted,
        "totalCandidates": len(images),
        "message": f"已提交 {submitted} 张截图的 OCR 任务",
    }


@router.delete("/cases/{case_id}/images")
async def delete_case_images(
    case_id: UUID,
    payload: CaseImagesDeleteIn,
    db: AsyncSession = Depends(get_db),
):
    case_repo = CaseRepository(db)
    case = await case_repo.get(case_id)
    if not case:
        raise HTTPException(404, "案件不存在")

    if not payload.image_ids:
        return {"caseId": str(case_id), "deleted": 0, "message": "未选择需要删除的截图"}

    repo = ImageRepository(db)
    images = await repo.list_by_ids_in_case(case_id, payload.image_ids)
    if not images:
        return {"caseId": str(case_id), "deleted": 0, "message": "未找到可删除的截图"}

    from app.models.ocr_block import OcrBlock
    from app.models.transaction import TransactionRecord
    from app.models.assessment import FraudAssessment

    deleted = 0
    try:
        for image in images:
            # remove related OCR blocks and extracted transactions first
            # assessments reference transaction_records.transaction_id, so they
            # must be deleted before deleting transaction records.
            await db.execute(
                delete(FraudAssessment).where(
                    FraudAssessment.transaction_id.in_(
                        select(TransactionRecord.id).where(
                            TransactionRecord.evidence_image_id == image.id
                        )
                    )
                )
            )
            await db.execute(delete(OcrBlock).where(OcrBlock.image_id == image.id))
            await db.execute(delete(TransactionRecord).where(TransactionRecord.evidence_image_id == image.id))
            await repo.delete(image)
            deleted += 1

            # best-effort remove local files
            for rel in [image.file_path, image.thumb_path]:
                if rel:
                    try:
                        p = settings.upload_path / rel
                        if p.exists():
                            p.unlink()
                    except Exception:
                        pass

        case.image_count = await repo.count_by_case(case_id)
        await db.flush()
        await db.commit()
    except Exception as e:
        await db.rollback()
        raise

    return {
        "caseId": str(case_id),
        "deleted": deleted,
        "message": f"已删除 {deleted} 张截图",
    }
first commit 2026-03-11 16:28:04 +08:00			`from uuid import UUID`
fix ocr 2026-03-12 12:32:29 +08:00			`import asyncio`
update: fix-02 2026-03-13 09:57:04 +08:00			`from sqlalchemy import delete, select`
first commit 2026-03-11 16:28:04 +08:00
fix ocr 2026-03-12 12:32:29 +08:00			`from fastapi import APIRouter, Depends, UploadFile, File, HTTPException`
first commit 2026-03-11 16:28:04 +08:00			`from fastapi.responses import FileResponse`
			`from sqlalchemy.ext.asyncio import AsyncSession`

			`from app.core.config import settings`
			`from app.core.database import get_db`
fix ocr 2026-03-12 12:32:29 +08:00			`from app.models.evidence_image import EvidenceImage, SourceApp, PageType, OcrStatus`
first commit 2026-03-11 16:28:04 +08:00			`from app.repositories.image_repo import ImageRepository`
			`from app.repositories.case_repo import CaseRepository`
update: fix-02 2026-03-13 09:57:04 +08:00			`from app.schemas.image import (`
			`ImageOut,`
			`ImageDetailOut,`
			`OcrFieldCorrection,`
			`CaseOcrStartIn,`
			`CaseImagesDeleteIn,`
			`)`
first commit 2026-03-11 16:28:04 +08:00			`from app.utils.hash import sha256_file`
			`from app.utils.file_storage import save_upload`

			`router = APIRouter()`


			`@router.post("/cases/{case_id}/images", response_model=list[ImageOut], status_code=201)`
			`async def upload_images(`
			`case_id: UUID,`
			`files: list[UploadFile] = File(...),`
			`db: AsyncSession = Depends(get_db),`
			`):`
			`case_repo = CaseRepository(db)`
			`case = await case_repo.get(case_id)`
			`if not case:`
			`raise HTTPException(404, "案件不存在")`

			`img_repo = ImageRepository(db)`
			`results: list[EvidenceImage] = []`

			`for f in files:`
			`data = await f.read()`
fix ocr 2026-03-12 12:32:29 +08:00			`raw_hash = sha256_file(data)`
			`# Scope hash by case to avoid cross-case unique conflicts while still deduplicating inside one case.`
			`scoped_hash = f"{raw_hash}:{case_id}"`
first commit 2026-03-11 16:28:04 +08:00
fix ocr 2026-03-12 12:32:29 +08:00			`existing = await img_repo.find_by_hash_in_case(case_id, [raw_hash, scoped_hash])`
first commit 2026-03-11 16:28:04 +08:00			`if existing:`
			`results.append(existing)`
			`continue`

			`file_path, thumb_path = save_upload(data, str(case_id), f.filename or "upload.png")`
			`image = EvidenceImage(`
			`case_id=case_id,`
			`file_path=file_path,`
			`thumb_path=thumb_path,`
fix ocr 2026-03-12 12:32:29 +08:00			`file_hash=scoped_hash,`
first commit 2026-03-11 16:28:04 +08:00			`file_size=len(data),`
			`)`
			`image = await img_repo.create(image)`
			`results.append(image)`

			`case.image_count = await img_repo.count_by_case(case_id)`
			`await db.flush()`

fix ocr 2026-03-12 12:32:29 +08:00			`# trigger OCR tasks in-process background (non-blocking for API response)`
fix: bugs-01 2026-03-12 19:57:30 +08:00			`from app.workers.ocr_tasks import process_images_ocr_batch_async`
fix: mock 2026-03-13 23:29:55 +08:00			`pending_imgs = [img for img in results if img.ocr_status.value == "pending"]`
			`for img in pending_imgs:`
			`img.ocr_status = OcrStatus.processing`
			`if pending_imgs:`
			`await db.flush()`
			`await db.commit()`
			`pending_ids = [str(img.id) for img in pending_imgs]`
fix: bugs-01 2026-03-12 19:57:30 +08:00			`if pending_ids:`
			`asyncio.create_task(`
			`process_images_ocr_batch_async(`
			`pending_ids, settings.OCR_PARALLELISM`
			`)`
			`)`
first commit 2026-03-11 16:28:04 +08:00
			`return results`


			`@router.get("/cases/{case_id}/images", response_model=list[ImageOut])`
			`async def list_images(`
			`case_id: UUID,`
			`source_app: SourceApp \| None = None,`
			`page_type: PageType \| None = None,`
			`db: AsyncSession = Depends(get_db),`
			`):`
			`repo = ImageRepository(db)`
fix ocr 2026-03-12 12:32:29 +08:00			`images = await repo.list_by_case(case_id, source_app=source_app, page_type=page_type)`
			`return [`
			`ImageOut(`
			`id=img.id,`
			`case_id=img.case_id,`
			`url=f"/api/v1/images/{img.id}/file",`
			`thumb_url=f"/api/v1/images/{img.id}/file",`
			`source_app=img.source_app,`
			`page_type=img.page_type,`
			`ocr_status=img.ocr_status,`
			`file_hash=img.file_hash,`
			`uploaded_at=img.uploaded_at,`
			`)`
			`for img in images`
			`]`
first commit 2026-03-11 16:28:04 +08:00

			`@router.get("/images/{image_id}", response_model=ImageDetailOut)`
			`async def get_image_detail(image_id: UUID, db: AsyncSession = Depends(get_db)):`
			`repo = ImageRepository(db)`
			`image = await repo.get(image_id)`
			`if not image:`
			`raise HTTPException(404, "截图不存在")`
			`return ImageDetailOut(`
			`id=image.id,`
			`case_id=image.case_id,`
			`url=f"/api/v1/images/{image.id}/file",`
			`thumb_url=f"/api/v1/images/{image.id}/file",`
			`source_app=image.source_app,`
			`page_type=image.page_type,`
			`ocr_status=image.ocr_status,`
			`file_hash=image.file_hash,`
			`uploaded_at=image.uploaded_at,`
			`ocr_blocks=[`
			`{`
			`"id": b.id,`
			`"content": b.content,`
			`"bbox": b.bbox,`
			`"seq_order": b.seq_order,`
			`"confidence": b.confidence,`
			`}`
			`for b in image.ocr_blocks`
			`],`
			`)`


			`@router.patch("/images/{image_id}/ocr")`
			`async def correct_ocr(`
			`image_id: UUID,`
			`corrections: list[OcrFieldCorrection],`
			`db: AsyncSession = Depends(get_db),`
			`):`
			`repo = ImageRepository(db)`
			`image = await repo.get(image_id)`
			`if not image:`
			`raise HTTPException(404, "截图不存在")`
			`return {"message": "修正已保存", "corrections": len(corrections)}`


			`@router.get("/images/{image_id}/file")`
			`async def get_image_file(image_id: UUID, db: AsyncSession = Depends(get_db)):`
			`repo = ImageRepository(db)`
			`image = await repo.get(image_id)`
			`if not image:`
			`raise HTTPException(404, "截图不存在")`
			`full_path = settings.upload_path / image.file_path`
			`if not full_path.exists():`
			`raise HTTPException(404, "文件不存在")`
			`return FileResponse(full_path)`
fix ocr 2026-03-12 12:32:29 +08:00

			`@router.post("/cases/{case_id}/ocr/start")`
			`async def start_case_ocr(`
			`case_id: UUID,`
			`payload: CaseOcrStartIn \| None = None,`
			`db: AsyncSession = Depends(get_db),`
			`):`
			`case_repo = CaseRepository(db)`
			`case = await case_repo.get(case_id)`
			`if not case:`
			`raise HTTPException(404, "案件不存在")`

			`repo = ImageRepository(db)`
			`include_done = payload.include_done if payload else False`
			`image_ids = payload.image_ids if payload else []`
			`if image_ids:`
			`images = await repo.list_by_ids_in_case(case_id, image_ids)`
fix: mock 2026-03-13 23:29:55 +08:00			`# Never submit images that are already processing: this prevents`
			`# duplicate OCR tasks when users trigger OCR from multiple pages.`
			`images = [img for img in images if img.ocr_status != OcrStatus.processing]`
fix ocr 2026-03-12 12:32:29 +08:00			`# For explicit re-run, mark selected images as processing immediately`
			`# so frontend can reflect state transition without full page refresh.`
			`for img in images:`
			`img.ocr_status = OcrStatus.processing`
fix: mock 2026-03-13 23:29:55 +08:00			`if images:`
			`await db.flush()`
			`await db.commit()`
fix ocr 2026-03-12 12:32:29 +08:00			`else:`
			`images = await repo.list_for_ocr(case_id, include_done=include_done)`
update: fix-02 2026-03-13 09:57:04 +08:00			`# Mark queued images as processing immediately, including when OCR is`
			`# triggered from workspace page, so UI can show progress right away.`
			`for img in images:`
			`img.ocr_status = OcrStatus.processing`
fix: mock 2026-03-13 23:29:55 +08:00			`if images:`
			`await db.flush()`
			`await db.commit()`
fix ocr 2026-03-12 12:32:29 +08:00
fix: bugs-01 2026-03-12 19:57:30 +08:00			`from app.workers.ocr_tasks import process_images_ocr_batch_async`
fix ocr 2026-03-12 12:32:29 +08:00
fix: bugs-01 2026-03-12 19:57:30 +08:00			`image_ids_to_run = [str(img.id) for img in images]`
			`submitted = len(image_ids_to_run)`
			`if image_ids_to_run:`
			`asyncio.create_task(`
			`process_images_ocr_batch_async(`
			`image_ids_to_run, settings.OCR_PARALLELISM`
			`)`
			`)`
fix ocr 2026-03-12 12:32:29 +08:00
			`return {`
			`"caseId": str(case_id),`
			`"submitted": submitted,`
			`"totalCandidates": len(images),`
			`"message": f"已提交 {submitted} 张截图的 OCR 任务",`
			`}`
update: fix-02 2026-03-13 09:57:04 +08:00

			`@router.delete("/cases/{case_id}/images")`
			`async def delete_case_images(`
			`case_id: UUID,`
			`payload: CaseImagesDeleteIn,`
			`db: AsyncSession = Depends(get_db),`
			`):`
			`case_repo = CaseRepository(db)`
			`case = await case_repo.get(case_id)`
			`if not case:`
			`raise HTTPException(404, "案件不存在")`

			`if not payload.image_ids:`
			`return {"caseId": str(case_id), "deleted": 0, "message": "未选择需要删除的截图"}`

			`repo = ImageRepository(db)`
			`images = await repo.list_by_ids_in_case(case_id, payload.image_ids)`
			`if not images:`
			`return {"caseId": str(case_id), "deleted": 0, "message": "未找到可删除的截图"}`

			`from app.models.ocr_block import OcrBlock`
			`from app.models.transaction import TransactionRecord`
			`from app.models.assessment import FraudAssessment`

			`deleted = 0`
			`try:`
			`for image in images:`
			`# remove related OCR blocks and extracted transactions first`
			`# assessments reference transaction_records.transaction_id, so they`
			`# must be deleted before deleting transaction records.`
			`await db.execute(`
			`delete(FraudAssessment).where(`
			`FraudAssessment.transaction_id.in_(`
			`select(TransactionRecord.id).where(`
			`TransactionRecord.evidence_image_id == image.id`
			`)`
			`)`
			`)`
			`)`
			`await db.execute(delete(OcrBlock).where(OcrBlock.image_id == image.id))`
			`await db.execute(delete(TransactionRecord).where(TransactionRecord.evidence_image_id == image.id))`
			`await repo.delete(image)`
			`deleted += 1`

			`# best-effort remove local files`
			`for rel in [image.file_path, image.thumb_path]:`
			`if rel:`
			`try:`
			`p = settings.upload_path / rel`
			`if p.exists():`
			`p.unlink()`
			`except Exception:`
			`pass`

			`case.image_count = await repo.count_by_case(case_id)`
			`await db.flush()`
			`await db.commit()`
			`except Exception as e:`
			`await db.rollback()`
			`raise`

			`return {`
			`"caseId": str(case_id),`
			`"deleted": deleted,`
			`"message": f"已删除 {deleted} 张截图",`
			`}`