first commit

This commit is contained in:
2026-03-09 14:46:56 +08:00
commit 62236eb80e
63 changed files with 6143 additions and 0 deletions

9
backend/Dockerfile Normal file
View File

@@ -0,0 +1,9 @@
FROM python:3.11-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
ENV PYTHONPATH=/app
EXPOSE 8000
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

1
backend/app/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Fund Tracer backend application."""

View File

@@ -0,0 +1 @@
# API routes

View File

@@ -0,0 +1,41 @@
"""Analysis API: get flow graph and summary for a case."""
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.database import get_db
from app.models import Case, Transaction
from app.schemas import TransactionResponse, AnalysisSummaryResponse, FlowGraphResponse
from app.services.analyzer import build_flow_graph
router = APIRouter()
@router.get("/{case_id}/transactions", response_model=list[TransactionResponse])
async def list_transactions(case_id: int, db: AsyncSession = Depends(get_db)):
r = await db.execute(select(Case).where(Case.id == case_id))
if not r.scalar_one_or_none():
raise HTTPException(status_code=404, detail="Case not found")
r = await db.execute(
select(Transaction).where(Transaction.case_id == case_id).order_by(Transaction.transaction_time, Transaction.id)
)
txns = r.scalars().all()
return [TransactionResponse.model_validate(t) for t in txns]
@router.get("/{case_id}/analysis")
async def get_analysis(case_id: int, db: AsyncSession = Depends(get_db)):
r = await db.execute(select(Case).where(Case.id == case_id))
if not r.scalar_one_or_none():
raise HTTPException(status_code=404, detail="Case not found")
r = await db.execute(select(Transaction).where(Transaction.case_id == case_id))
txns = r.scalars().all()
items = [TransactionResponse.model_validate(t) for t in txns]
graph, summary = build_flow_graph(items)
return {"summary": summary.model_dump(), "graph": graph.model_dump()}
@router.post("/{case_id}/analysis")
async def run_analysis(case_id: int, db: AsyncSession = Depends(get_db)):
return await get_analysis(case_id, db)

72
backend/app/api/cases.py Normal file
View File

@@ -0,0 +1,72 @@
"""Case CRUD API."""
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.database import get_db
from app.models import Case
from app.schemas import CaseCreate, CaseUpdate, CaseResponse, CaseListResponse
router = APIRouter()
@router.get("", response_model=CaseListResponse)
async def list_cases(db: AsyncSession = Depends(get_db)):
r = await db.execute(select(Case).order_by(Case.created_at.desc()))
cases = r.scalars().all()
return CaseListResponse(items=[CaseResponse.model_validate(c) for c in cases])
@router.post("", response_model=CaseResponse)
async def create_case(body: CaseCreate, db: AsyncSession = Depends(get_db)):
case = Case(
case_number=body.case_number,
victim_name=body.victim_name,
description=body.description or "",
)
db.add(case)
await db.commit()
await db.refresh(case)
return CaseResponse.model_validate(case)
@router.get("/{case_id}", response_model=CaseResponse)
async def get_case(case_id: int, db: AsyncSession = Depends(get_db)):
r = await db.execute(select(Case).where(Case.id == case_id))
case = r.scalar_one_or_none()
if not case:
raise HTTPException(status_code=404, detail="Case not found")
return CaseResponse.model_validate(case)
@router.put("/{case_id}", response_model=CaseResponse)
async def update_case(case_id: int, body: CaseUpdate, db: AsyncSession = Depends(get_db)):
r = await db.execute(select(Case).where(Case.id == case_id))
case = r.scalar_one_or_none()
if not case:
raise HTTPException(status_code=404, detail="Case not found")
if body.case_number is not None:
case.case_number = body.case_number
if body.victim_name is not None:
case.victim_name = body.victim_name
if body.description is not None:
case.description = body.description
if body.total_loss is not None:
case.total_loss = body.total_loss
if body.status is not None:
case.status = body.status
await db.commit()
await db.refresh(case)
return CaseResponse.model_validate(case)
@router.delete("/{case_id}")
async def delete_case(case_id: int, db: AsyncSession = Depends(get_db)):
r = await db.execute(select(Case).where(Case.id == case_id))
case = r.scalar_one_or_none()
if not case:
raise HTTPException(status_code=404, detail="Case not found")
await db.delete(case)
await db.commit()
return {"ok": True}

47
backend/app/api/export.py Normal file
View File

@@ -0,0 +1,47 @@
"""Export API: Excel and PDF report download."""
from io import BytesIO
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import StreamingResponse
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.database import get_db
from app.models import Case, Transaction
from app.schemas import TransactionResponse
router = APIRouter()
@router.get("/{case_id}/export/excel")
async def export_excel(case_id: int, db: AsyncSession = Depends(get_db)):
from app.services.report import build_excel_report
r = await db.execute(select(Case).where(Case.id == case_id))
case = r.scalar_one_or_none()
if not case:
raise HTTPException(status_code=404, detail="Case not found")
r = await db.execute(select(Transaction).where(Transaction.case_id == case_id).order_by(Transaction.transaction_time))
txns = [TransactionResponse.model_validate(t) for t in r.scalars().all()]
data = await build_excel_report(case, txns)
return StreamingResponse(
BytesIO(data),
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
headers={"Content-Disposition": f"attachment; filename=case_{case_id}_report.xlsx"},
)
@router.get("/{case_id}/export/pdf")
async def export_pdf(case_id: int, db: AsyncSession = Depends(get_db)):
from app.services.report import build_pdf_report
r = await db.execute(select(Case).where(Case.id == case_id))
case = r.scalar_one_or_none()
if not case:
raise HTTPException(status_code=404, detail="Case not found")
r = await db.execute(select(Transaction).where(Transaction.case_id == case_id).order_by(Transaction.transaction_time))
txns = [TransactionResponse.model_validate(t) for t in r.scalars().all()]
data = await build_pdf_report(case, txns)
return StreamingResponse(
BytesIO(data),
media_type="application/pdf",
headers={"Content-Disposition": f"attachment; filename=case_{case_id}_report.pdf"},
)

View File

@@ -0,0 +1,101 @@
"""Screenshot upload and extraction API."""
import uuid
from pathlib import Path
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import get_settings
from app.models.database import get_db
from app.models import Case, Screenshot, Transaction
from app.schemas import ScreenshotResponse, ScreenshotListResponse, TransactionListResponse
from app.services.extractor import extract_and_save
router = APIRouter()
def _allowed(filename: str) -> bool:
ext = (Path(filename).suffix or "").lstrip(".").lower()
return ext in get_settings().allowed_extensions
@router.get("/{case_id}/screenshots", response_model=ScreenshotListResponse)
async def list_screenshots(case_id: int, db: AsyncSession = Depends(get_db)):
r = await db.execute(select(Case).where(Case.id == case_id))
if not r.scalar_one_or_none():
raise HTTPException(status_code=404, detail="Case not found")
r = await db.execute(select(Screenshot).where(Screenshot.case_id == case_id).order_by(Screenshot.created_at))
screenshots = r.scalars().all()
return ScreenshotListResponse(items=[ScreenshotResponse.model_validate(s) for s in screenshots])
@router.post("/{case_id}/screenshots", response_model=ScreenshotListResponse)
async def upload_screenshots(
case_id: int,
files: list[UploadFile] = File(...),
db: AsyncSession = Depends(get_db),
):
r = await db.execute(select(Case).where(Case.id == case_id))
case = r.scalar_one_or_none()
if not case:
raise HTTPException(status_code=404, detail="Case not found")
settings = get_settings()
upload_dir = settings.upload_dir.resolve()
case_dir = upload_dir / str(case_id)
case_dir.mkdir(parents=True, exist_ok=True)
created: list[Screenshot] = []
for f in files:
if not f.filename or not _allowed(f.filename):
continue
stem = uuid.uuid4().hex[:12]
suffix = Path(f.filename).suffix
path = case_dir / f"{stem}{suffix}"
content = await f.read()
path.write_bytes(content)
rel_path = str(path.relative_to(upload_dir))
screenshot = Screenshot(
case_id=case_id,
filename=f.filename,
file_path=rel_path,
status="pending",
)
db.add(screenshot)
created.append(screenshot)
await db.commit()
for s in created:
await db.refresh(s)
return ScreenshotListResponse(items=[ScreenshotResponse.model_validate(s) for s in created])
@router.post("/{case_id}/screenshots/{screenshot_id}/extract", response_model=TransactionListResponse)
async def extract_transactions(
case_id: int,
screenshot_id: int,
db: AsyncSession = Depends(get_db),
):
r = await db.execute(select(Screenshot).where(Screenshot.id == screenshot_id, Screenshot.case_id == case_id))
screenshot = r.scalar_one_or_none()
if not screenshot:
raise HTTPException(status_code=404, detail="Screenshot not found")
settings = get_settings()
full_path = settings.upload_dir.resolve() / screenshot.file_path
if not full_path.exists():
raise HTTPException(status_code=404, detail="File not found on disk")
image_bytes = full_path.read_bytes()
try:
transactions = await extract_and_save(case_id, screenshot_id, image_bytes)
except Exception as e:
r = await db.execute(select(Screenshot).where(Screenshot.id == screenshot_id))
sc = r.scalar_one_or_none()
if sc:
sc.status = "failed"
await db.commit()
raise HTTPException(status_code=502, detail=f"Extraction failed: {e!s}")
r = await db.execute(select(Screenshot).where(Screenshot.id == screenshot_id))
sc = r.scalar_one_or_none()
if sc:
sc.status = "extracted"
await db.commit()
return TransactionListResponse(items=transactions)

View File

@@ -0,0 +1,30 @@
"""Runtime settings API for LLM provider and API keys."""
from pydantic import BaseModel
from fastapi import APIRouter
from app.config import public_settings, update_runtime_settings
router = APIRouter()
class SettingsUpdate(BaseModel):
llm_provider: str | None = None
openai_api_key: str | None = None
anthropic_api_key: str | None = None
deepseek_api_key: str | None = None
custom_openai_api_key: str | None = None
custom_openai_base_url: str | None = None
custom_openai_model: str | None = None
@router.get("")
async def get_runtime_settings():
return public_settings()
@router.put("")
async def update_settings(body: SettingsUpdate):
payload = body.model_dump(exclude_unset=True)
update_runtime_settings(payload)
return public_settings()

94
backend/app/config.py Normal file
View File

@@ -0,0 +1,94 @@
"""Application configuration from environment + runtime overrides."""
from functools import lru_cache
from pathlib import Path
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
"""App settings loaded from env."""
app_name: str = "Fund Tracer API"
debug: bool = False
# Database
database_url: str = "sqlite+aiosqlite:///./fund_tracer.db"
# Uploads
upload_dir: Path = Path("./uploads")
max_upload_size_mb: int = 20
allowed_extensions: set[str] = {"png", "jpg", "jpeg", "webp"}
# LLM
llm_provider: str = "openai" # openai | anthropic | deepseek | custom_openai
openai_api_key: str | None = None
anthropic_api_key: str | None = None
deepseek_api_key: str | None = None
custom_openai_api_key: str | None = None
custom_openai_base_url: str | None = None
openai_model: str = "gpt-4o"
anthropic_model: str = "claude-3-5-sonnet-20241022"
deepseek_model: str = "deepseek-chat"
custom_openai_model: str = "gpt-4o-mini"
class Config:
env_file = ".env"
env_file_encoding = "utf-8"
extra = "ignore"
_runtime_overrides: dict[str, str | None] = {}
def _apply_overrides(settings: Settings) -> Settings:
for key, value in _runtime_overrides.items():
if hasattr(settings, key):
setattr(settings, key, value)
return settings
@lru_cache
def get_settings() -> Settings:
return _apply_overrides(Settings())
def update_runtime_settings(payload: dict[str, str | None]) -> Settings:
"""Update runtime settings and refresh cached Settings object."""
allowed = {
"llm_provider",
"openai_api_key",
"anthropic_api_key",
"deepseek_api_key",
"custom_openai_api_key",
"custom_openai_base_url",
"custom_openai_model",
}
for key, value in payload.items():
if key in allowed:
_runtime_overrides[key] = value
get_settings.cache_clear()
return get_settings()
def public_settings() -> dict:
s = get_settings()
return {
"llm_provider": s.llm_provider,
"providers": ["openai", "anthropic", "deepseek", "custom_openai"],
"models": {
"openai": s.openai_model,
"anthropic": s.anthropic_model,
"deepseek": s.deepseek_model,
"custom_openai": s.custom_openai_model,
},
"base_urls": {
"custom_openai": s.custom_openai_base_url or "",
},
"has_keys": {
"openai": bool(s.openai_api_key),
"anthropic": bool(s.anthropic_api_key),
"deepseek": bool(s.deepseek_api_key),
"custom_openai": bool(s.custom_openai_api_key),
},
}

52
backend/app/main.py Normal file
View File

@@ -0,0 +1,52 @@
"""FastAPI application entry point."""
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from app.config import get_settings
from app.models.database import init_db
from app.api import cases, screenshots, analysis, export, settings
@asynccontextmanager
async def lifespan(app: FastAPI):
cfg = get_settings()
cfg.upload_dir.mkdir(parents=True, exist_ok=True)
await init_db()
yield
def create_app() -> FastAPI:
cfg = get_settings()
app = FastAPI(
title=cfg.app_name,
lifespan=lifespan,
)
app.add_middleware(
CORSMiddleware,
allow_origins=[
"http://localhost:3000",
"http://localhost:5173",
"http://127.0.0.1:3000",
"http://127.0.0.1:5173",
],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.include_router(cases.router, prefix="/api/cases", tags=["cases"])
app.include_router(screenshots.router, prefix="/api/cases", tags=["screenshots"])
app.include_router(analysis.router, prefix="/api/cases", tags=["analysis"])
app.include_router(export.router, prefix="/api/cases", tags=["export"])
app.include_router(settings.router, prefix="/api/settings", tags=["settings"])
return app
app = create_app()
@app.get("/health")
def health():
return {"status": "ok"}

View File

@@ -0,0 +1,8 @@
"""SQLAlchemy models - export Base and all models for create_all."""
from app.models.database import Base, get_db, init_db, engine, async_session_maker
from app.models.case import Case
from app.models.screenshot import Screenshot
from app.models.transaction import Transaction
__all__ = ["Base", "Case", "Screenshot", "Transaction", "get_db", "init_db", "engine", "async_session_maker"]

View File

@@ -0,0 +1,28 @@
"""Case model - 案件."""
from __future__ import annotations
from datetime import datetime
from decimal import Decimal
from sqlalchemy import String, Text, DateTime, Numeric
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.models.database import Base
class Case(Base):
__tablename__ = "cases"
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
case_number: Mapped[str] = mapped_column(String(64), unique=True, index=True)
victim_name: Mapped[str] = mapped_column(String(128))
description: Mapped[str] = mapped_column(Text, default="")
total_loss: Mapped[Decimal] = mapped_column(Numeric(18, 2), default=0)
status: Mapped[str] = mapped_column(String(32), default="in_progress") # in_progress | completed
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
screenshots: Mapped[list["Screenshot"]] = relationship("Screenshot", back_populates="case", cascade="all, delete-orphan")
transactions: Mapped[list["Transaction"]] = relationship("Transaction", back_populates="case", cascade="all, delete-orphan")
def __repr__(self) -> str:
return f"<Case(id={self.id}, case_number={self.case_number})>"

View File

@@ -0,0 +1,33 @@
"""Database session and initialization."""
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
from sqlalchemy.orm import DeclarativeBase
from app.config import get_settings
class Base(DeclarativeBase):
pass
engine = None
async_session_maker = None
async def init_db():
global engine, async_session_maker
settings = get_settings()
engine = create_async_engine(
settings.database_url,
echo=settings.debug,
)
async_session_maker = async_sessionmaker(
engine, class_=AsyncSession, expire_on_commit=False
)
from app.models import Case, Screenshot, Transaction # noqa: F401
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
async def get_db():
async with async_session_maker() as session:
yield session

View File

@@ -0,0 +1,27 @@
"""Screenshot model - 截图记录."""
from __future__ import annotations
from datetime import datetime
from sqlalchemy import String, DateTime, ForeignKey
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.models.database import Base
class Screenshot(Base):
__tablename__ = "screenshots"
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
case_id: Mapped[int] = mapped_column(ForeignKey("cases.id", ondelete="CASCADE"), index=True)
filename: Mapped[str] = mapped_column(String(255))
file_path: Mapped[str] = mapped_column(String(512))
status: Mapped[str] = mapped_column(String(32), default="pending") # pending | extracted | failed
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
case: Mapped["Case"] = relationship("Case", back_populates="screenshots")
transactions: Mapped[list["Transaction"]] = relationship(
"Transaction", back_populates="screenshot", cascade="all, delete-orphan"
)
def __repr__(self) -> str:
return f"<Screenshot(id={self.id}, filename={self.filename})>"

View File

@@ -0,0 +1,35 @@
"""Transaction model - 交易记录."""
from __future__ import annotations
from datetime import datetime
from decimal import Decimal
from sqlalchemy import String, Text, DateTime, Numeric, ForeignKey
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.models.database import Base
class Transaction(Base):
__tablename__ = "transactions"
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
case_id: Mapped[int] = mapped_column(ForeignKey("cases.id", ondelete="CASCADE"), index=True)
screenshot_id: Mapped[int] = mapped_column(ForeignKey("screenshots.id", ondelete="CASCADE"), index=True)
app_source: Mapped[str] = mapped_column(String(128))
transaction_type: Mapped[str] = mapped_column(String(32)) # 转出/转入/消费/收款/提现/充值
amount: Mapped[Decimal] = mapped_column(Numeric(18, 2))
currency: Mapped[str] = mapped_column(String(16), default="CNY")
counterparty_name: Mapped[str | None] = mapped_column(String(256), nullable=True)
counterparty_account: Mapped[str | None] = mapped_column(String(512), nullable=True)
order_number: Mapped[str | None] = mapped_column(String(128), nullable=True)
transaction_time: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
remark: Mapped[str | None] = mapped_column(Text, nullable=True)
raw_text: Mapped[str | None] = mapped_column(Text, nullable=True)
confidence: Mapped[str] = mapped_column(String(16), default="medium") # high | medium | low
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
case: Mapped["Case"] = relationship("Case", back_populates="transactions")
screenshot: Mapped["Screenshot"] = relationship("Screenshot", back_populates="transactions")
def __repr__(self) -> str:
return f"<Transaction(id={self.id}, amount={self.amount}, app={self.app_source})>"

View File

@@ -0,0 +1 @@
# Prompts

View File

@@ -0,0 +1,41 @@
"""Prompt for extracting transactions from billing screenshots."""
EXTRACT_TRANSACTION_SYSTEM = """你是一个专业的金融交易数据提取助手专门用于从手机APP账单或交易记录截图中提取结构化信息。"""
EXTRACT_TRANSACTION_USER = """请分析这张手机APP账单/交易记录截图,提取所有可见的交易记录。
要求:
1. 只返回一个JSON数组不要包含其他说明文字。
2. 数组的每个元素是一条交易,包含以下字段(若截图中无该信息则填 null
- app_source: stringAPP来源"微信支付""支付宝""XX银行""XX钱包"
- transaction_type: string交易类型"转出""转入""消费""收款""提现""充值"
- amount: number金额数字不含货币符号
- currency: string币种"CNY""USDT",默认 "CNY"
- counterparty_name: string | null对方名称/姓名
- counterparty_account: string | null对方账号、卡号尾号、钱包地址等
- order_number: string | null订单号/交易号
- transaction_time: string | null交易时间请用 ISO 8601 格式,如 "2024-01-15T14:30:00"
- remark: string | null备注/摘要
- confidence: string识别置信度"high""medium""low" 之一
3. 注意区分转入和转出方向;金额统一为正数,方向由 transaction_type 体现。
4. 若截图中没有交易记录或无法识别,返回空数组 []。
直接输出JSON数组不要用 markdown 代码块包裹。"""
def get_extract_messages(image_b64: str) -> list[dict]:
"""Build messages for vision API: system + user with image."""
return [
{"role": "system", "content": EXTRACT_TRANSACTION_SYSTEM},
{
"role": "user",
"content": [
{"type": "text", "text": EXTRACT_TRANSACTION_USER},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{image_b64}"},
},
],
},
]

View File

@@ -0,0 +1,37 @@
"""Pydantic schemas for API request/response."""
from app.schemas.case import (
CaseCreate,
CaseUpdate,
CaseResponse,
CaseListResponse,
)
from app.schemas.screenshot import (
ScreenshotResponse,
ScreenshotListResponse,
)
from app.schemas.transaction import (
TransactionCreate,
TransactionResponse,
TransactionListResponse,
TransactionExtractItem,
)
from app.schemas.analysis import (
AnalysisSummaryResponse,
FlowGraphResponse,
)
__all__ = [
"CaseCreate",
"CaseUpdate",
"CaseResponse",
"CaseListResponse",
"ScreenshotResponse",
"ScreenshotListResponse",
"TransactionCreate",
"TransactionResponse",
"TransactionListResponse",
"TransactionExtractItem",
"AnalysisSummaryResponse",
"FlowGraphResponse",
]

View File

@@ -0,0 +1,35 @@
"""Analysis response schemas."""
from decimal import Decimal
from pydantic import BaseModel
class AppSummary(BaseModel):
in_amount: Decimal
out_amount: Decimal
class AnalysisSummaryResponse(BaseModel):
total_out: Decimal
total_in: Decimal
net_loss: Decimal
by_app: dict[str, AppSummary]
counterparty_count: int
class FlowNode(BaseModel):
id: str
label: str
type: str | None = None # victim_app | counterparty
class FlowEdge(BaseModel):
source: str
target: str
amount: Decimal
count: int = 1
class FlowGraphResponse(BaseModel):
nodes: list[FlowNode]
edges: list[FlowEdge]

View File

@@ -0,0 +1,36 @@
"""Case schemas."""
from datetime import datetime
from decimal import Decimal
from pydantic import BaseModel, ConfigDict
class CaseBase(BaseModel):
case_number: str
victim_name: str
description: str = ""
class CaseCreate(CaseBase):
pass
class CaseUpdate(BaseModel):
case_number: str | None = None
victim_name: str | None = None
description: str | None = None
total_loss: Decimal | None = None
status: str | None = None
class CaseResponse(CaseBase):
model_config = ConfigDict(from_attributes=True)
id: int
total_loss: Decimal
status: str
created_at: datetime
updated_at: datetime
class CaseListResponse(BaseModel):
items: list[CaseResponse]

View File

@@ -0,0 +1,18 @@
"""Screenshot schemas."""
from datetime import datetime
from pydantic import BaseModel, ConfigDict
class ScreenshotResponse(BaseModel):
model_config = ConfigDict(from_attributes=True)
id: int
case_id: int
filename: str
file_path: str
status: str
created_at: datetime
class ScreenshotListResponse(BaseModel):
items: list[ScreenshotResponse]

View File

@@ -0,0 +1,51 @@
"""Transaction schemas."""
from datetime import datetime
from decimal import Decimal
from pydantic import BaseModel, ConfigDict
class TransactionBase(BaseModel):
app_source: str
transaction_type: str
amount: Decimal
currency: str = "CNY"
counterparty_name: str | None = None
counterparty_account: str | None = None
order_number: str | None = None
transaction_time: datetime | None = None
remark: str | None = None
confidence: str = "medium"
class TransactionCreate(TransactionBase):
case_id: int
screenshot_id: int
raw_text: str | None = None
class TransactionResponse(TransactionBase):
model_config = ConfigDict(from_attributes=True)
id: int
case_id: int
screenshot_id: int
raw_text: str | None = None
created_at: datetime
class TransactionListResponse(BaseModel):
items: list[TransactionResponse]
class TransactionExtractItem(BaseModel):
"""Single item as returned by LLM extraction (before DB insert)."""
app_source: str
transaction_type: str
amount: Decimal
currency: str = "CNY"
counterparty_name: str | None = None
counterparty_account: str | None = None
order_number: str | None = None
transaction_time: datetime | None = None
remark: str | None = None
confidence: str = "medium"

View File

@@ -0,0 +1 @@
# Services

View File

@@ -0,0 +1,107 @@
"""Fund flow analysis: build directed graph and summary from transactions."""
from collections import defaultdict
from decimal import Decimal
import networkx as nx
from app.schemas.analysis import (
AnalysisSummaryResponse,
AppSummary,
FlowGraphResponse,
FlowNode,
FlowEdge,
)
from app.schemas.transaction import TransactionResponse
# Transaction types that mean money leaving victim's app (outflow)
OUT_TYPES = {"转出", "消费", "付款", "提现"}
# Transaction types that mean money entering victim's app (inflow)
IN_TYPES = {"转入", "收款", "充值"}
def _is_out(t: TransactionResponse) -> bool:
return t.transaction_type in OUT_TYPES or "转出" in (t.transaction_type or "") or "消费" in (t.transaction_type or "")
def _is_in(t: TransactionResponse) -> bool:
return t.transaction_type in IN_TYPES or "转入" in (t.transaction_type or "") or "收款" in (t.transaction_type or "")
def _node_id(app_or_counterparty: str, kind: str) -> str:
"""Generate stable node id; kind in ('victim_app', 'counterparty')."""
import hashlib
safe = (app_or_counterparty or "").strip() or "unknown"
h = hashlib.sha256(f"{kind}:{safe}".encode()).hexdigest()[:12]
return f"{kind}_{h}"
def build_flow_graph(transactions: list[TransactionResponse]) -> tuple[FlowGraphResponse, AnalysisSummaryResponse]:
"""
Build directed graph and summary from transaction list.
Node: victim's app (app_source when outflow) or counterparty (counterparty_name or counterparty_account).
Edge: source -> target with total amount and count.
"""
out_by_app: dict[str, Decimal] = defaultdict(Decimal)
in_by_app: dict[str, Decimal] = defaultdict(Decimal)
total_out = Decimal(0)
total_in = Decimal(0)
counterparties: set[str] = set()
# (source_id, target_id) -> (amount, count)
edges_agg: dict[tuple[str, str], tuple[Decimal, int]] = defaultdict(lambda: (Decimal(0), 0))
node_labels: dict[str, str] = {}
node_types: dict[str, str] = {}
for t in transactions:
amount = t.amount if isinstance(t.amount, Decimal) else Decimal(str(t.amount))
app = (t.app_source or "").strip() or "未知APP"
counterparty = (t.counterparty_name or t.counterparty_account or "未知对方").strip() or "未知对方"
counterparties.add(counterparty)
victim_node_id = _node_id(app, "victim_app")
node_labels[victim_node_id] = app
node_types[victim_node_id] = "victim_app"
cp_node_id = _node_id(counterparty, "counterparty")
node_labels[cp_node_id] = counterparty
node_types[cp_node_id] = "counterparty"
if _is_out(t):
out_by_app[app] += amount
total_out += amount
key = (victim_node_id, cp_node_id)
am, cnt = edges_agg[key]
edges_agg[key] = (am + amount, cnt + 1)
elif _is_in(t):
in_by_app[app] += amount
total_in += amount
key = (cp_node_id, victim_node_id)
am, cnt = edges_agg[key]
edges_agg[key] = (am + amount, cnt + 1)
all_apps = set(out_by_app.keys()) | set(in_by_app.keys())
by_app = {
app: AppSummary(
in_amount=in_by_app.get(app, Decimal(0)),
out_amount=out_by_app.get(app, Decimal(0)),
)
for app in all_apps
}
summary = AnalysisSummaryResponse(
total_out=total_out,
total_in=total_in,
net_loss=total_out - total_in,
by_app=by_app,
counterparty_count=len(counterparties),
)
nodes = [
FlowNode(id=nid, label=node_labels[nid], type=node_types.get(nid))
for nid in node_labels
]
edges = [
FlowEdge(source=src, target=tgt, amount=am, count=cnt)
for (src, tgt), (am, cnt) in edges_agg.items()
]
graph = FlowGraphResponse(nodes=nodes, edges=edges)
return graph, summary

View File

@@ -0,0 +1,42 @@
"""Transaction data extraction: LLM Vision + persistence."""
from app.models import Transaction
from app.models.database import async_session_maker
from app.schemas.transaction import TransactionExtractItem, TransactionResponse
from app.services.llm import get_llm_provider
async def extract_and_save(
case_id: int,
screenshot_id: int,
image_bytes: bytes,
) -> list[TransactionResponse]:
"""
Run vision extraction on image and persist transactions to DB.
Returns list of created transactions; low-confidence items are still saved but flagged.
"""
provider = get_llm_provider()
items: list[TransactionExtractItem] = await provider.extract_from_image(image_bytes)
results: list[TransactionResponse] = []
async with async_session_maker() as session:
for it in items:
t = Transaction(
case_id=case_id,
screenshot_id=screenshot_id,
app_source=it.app_source,
transaction_type=it.transaction_type,
amount=it.amount,
currency=it.currency or "CNY",
counterparty_name=it.counterparty_name,
counterparty_account=it.counterparty_account,
order_number=it.order_number,
transaction_time=it.transaction_time,
remark=it.remark,
confidence=it.confidence if it.confidence in ("high", "medium", "low") else "medium",
raw_text=None,
)
session.add(t)
await session.flush()
results.append(TransactionResponse.model_validate(t))
await session.commit()
return results

View File

@@ -0,0 +1,16 @@
# LLM providers
from app.services.llm.base import BaseLLMProvider
from app.services.llm.router import get_llm_provider
from app.services.llm.openai_vision import OpenAIVisionProvider
from app.services.llm.claude_vision import ClaudeVisionProvider
from app.services.llm.deepseek_vision import DeepSeekVisionProvider
from app.services.llm.custom_openai_vision import CustomOpenAICompatibleProvider
__all__ = [
"BaseLLMProvider",
"get_llm_provider",
"OpenAIVisionProvider",
"ClaudeVisionProvider",
"DeepSeekVisionProvider",
"CustomOpenAICompatibleProvider",
]

View File

@@ -0,0 +1,18 @@
"""Base LLM provider - abstract interface for vision extraction."""
from abc import ABC, abstractmethod
from app.schemas.transaction import TransactionExtractItem
class BaseLLMProvider(ABC):
"""Abstract base for LLM vision providers. Each provider implements extract_from_image."""
@abstractmethod
async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
"""
Analyze a billing screenshot and return structured transaction list.
:param image_bytes: Raw image file content (PNG/JPEG)
:return: List of extracted transactions (may be empty or partial on failure)
"""
pass

View File

@@ -0,0 +1,49 @@
"""Anthropic Claude Vision provider."""
import base64
import json
import re
from anthropic import AsyncAnthropic
from app.config import get_settings
from app.schemas.transaction import TransactionExtractItem
from app.services.llm.base import BaseLLMProvider
from app.prompts.extract_transaction import get_extract_messages
from app.services.llm.openai_vision import _parse_json_array
class ClaudeVisionProvider(BaseLLMProvider):
async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
settings = get_settings()
if not settings.anthropic_api_key:
raise ValueError("ANTHROPIC_API_KEY is not set")
client = AsyncAnthropic(api_key=settings.anthropic_api_key)
b64 = base64.standard_b64encode(image_bytes).decode("ascii")
messages = get_extract_messages(b64)
# Claude API: user message with content block list
user_content = messages[1]["content"]
content_blocks = []
for block in user_content:
if block["type"] == "text":
content_blocks.append({"type": "text", "text": block["text"]})
elif block["type"] == "image_url":
# Claude expects base64 without data URL prefix
content_blocks.append({
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": block["image_url"]["url"].split(",", 1)[-1],
},
})
response = await client.messages.create(
model=settings.anthropic_model,
max_tokens=4096,
system=messages[0]["content"],
messages=[{"role": "user", "content": content_blocks}],
)
text = ""
for block in response.content:
if hasattr(block, "text"):
text += block.text
return _parse_json_array(text or "[]")

View File

@@ -0,0 +1,32 @@
"""Custom OpenAI-compatible vision provider."""
import base64
from openai import AsyncOpenAI
from app.config import get_settings
from app.schemas.transaction import TransactionExtractItem
from app.services.llm.base import BaseLLMProvider
from app.prompts.extract_transaction import get_extract_messages
from app.services.llm.openai_vision import _parse_json_array
class CustomOpenAICompatibleProvider(BaseLLMProvider):
async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
settings = get_settings()
if not settings.custom_openai_api_key:
raise ValueError("CUSTOM_OPENAI_API_KEY is not set")
if not settings.custom_openai_base_url:
raise ValueError("CUSTOM_OPENAI_BASE_URL is not set")
client = AsyncOpenAI(
api_key=settings.custom_openai_api_key,
base_url=settings.custom_openai_base_url,
)
b64 = base64.standard_b64encode(image_bytes).decode("ascii")
messages = get_extract_messages(b64)
response = await client.chat.completions.create(
model=settings.custom_openai_model,
messages=messages,
max_tokens=4096,
)
text = response.choices[0].message.content or "[]"
return _parse_json_array(text)

View File

@@ -0,0 +1,34 @@
"""DeepSeek Vision provider (uses OpenAI-compatible API)."""
import base64
from openai import AsyncOpenAI
from app.config import get_settings
from app.schemas.transaction import TransactionExtractItem
from app.services.llm.base import BaseLLMProvider
from app.prompts.extract_transaction import get_extract_messages
from app.services.llm.openai_vision import _parse_json_array
# DeepSeek vision endpoint (OpenAI-compatible)
DEEPSEEK_BASE = "https://api.deepseek.com"
class DeepSeekVisionProvider(BaseLLMProvider):
async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
settings = get_settings()
if not settings.deepseek_api_key:
raise ValueError("DEEPSEEK_API_KEY is not set")
client = AsyncOpenAI(
api_key=settings.deepseek_api_key,
base_url=DEEPSEEK_BASE,
)
b64 = base64.standard_b64encode(image_bytes).decode("ascii")
messages = get_extract_messages(b64)
response = await client.chat.completions.create(
model=settings.deepseek_model,
messages=messages,
max_tokens=4096,
)
text = response.choices[0].message.content or "[]"
return _parse_json_array(text)

View File

@@ -0,0 +1,56 @@
"""OpenAI Vision provider (GPT-4o)."""
import base64
import json
import re
from openai import AsyncOpenAI
from app.config import get_settings
from app.schemas.transaction import TransactionExtractItem
from app.services.llm.base import BaseLLMProvider
from app.prompts.extract_transaction import get_extract_messages
class OpenAIVisionProvider(BaseLLMProvider):
async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
settings = get_settings()
if not settings.openai_api_key:
raise ValueError("OPENAI_API_KEY is not set")
client = AsyncOpenAI(api_key=settings.openai_api_key)
b64 = base64.standard_b64encode(image_bytes).decode("ascii")
messages = get_extract_messages(b64)
response = await client.chat.completions.create(
model=settings.openai_model,
messages=messages,
max_tokens=4096,
)
text = response.choices[0].message.content or "[]"
return _parse_json_array(text)
def _parse_json_array(text: str) -> list[TransactionExtractItem]:
"""Parse LLM response into list of TransactionExtractItem. Tolerates markdown and extra text."""
text = text.strip()
# Remove optional markdown code block
if text.startswith("```"):
text = re.sub(r"^```(?:json)?\s*", "", text)
text = re.sub(r"\s*```\s*$", "", text)
try:
data = json.loads(text)
except json.JSONDecodeError:
return []
if not isinstance(data, list):
return []
result: list[TransactionExtractItem] = []
for item in data:
if not isinstance(item, dict):
continue
try:
# Normalize transaction_time: allow string -> datetime
if isinstance(item.get("transaction_time"), str) and item["transaction_time"]:
from dateutil import parser as date_parser
item["transaction_time"] = date_parser.isoparse(item["transaction_time"])
result.append(TransactionExtractItem.model_validate(item))
except Exception:
continue
return result

View File

@@ -0,0 +1,22 @@
"""LLM provider factory - returns provider by config."""
from app.config import get_settings
from app.services.llm.base import BaseLLMProvider
from app.services.llm.openai_vision import OpenAIVisionProvider
from app.services.llm.claude_vision import ClaudeVisionProvider
from app.services.llm.deepseek_vision import DeepSeekVisionProvider
from app.services.llm.custom_openai_vision import CustomOpenAICompatibleProvider
def get_llm_provider() -> BaseLLMProvider:
settings = get_settings()
provider = (settings.llm_provider or "openai").lower()
if provider == "openai":
return OpenAIVisionProvider()
if provider == "anthropic":
return ClaudeVisionProvider()
if provider == "deepseek":
return DeepSeekVisionProvider()
if provider == "custom_openai":
return CustomOpenAICompatibleProvider()
return OpenAIVisionProvider()

View File

@@ -0,0 +1,125 @@
"""Report generation: Excel and PDF export."""
from io import BytesIO
from decimal import Decimal
from datetime import datetime
from openpyxl import Workbook
from openpyxl.styles import Font, Alignment
from openpyxl.utils import get_column_letter
# WeasyPrint optional for PDF
try:
from weasyprint import HTML, CSS
HAS_WEASYPRINT = True
except ImportError:
HAS_WEASYPRINT = False
async def build_excel_report(case, transactions: list) -> bytes:
"""Build Excel workbook: summary sheet + transaction detail sheet. Returns file bytes."""
wb = Workbook()
ws_summary = wb.active
ws_summary.title = "汇总"
ws_summary.append(["案件编号", case.case_number])
ws_summary.append(["受害人", case.victim_name])
ws_summary.append(["总损失", str(case.total_loss)])
ws_summary.append(["交易笔数", len(transactions)])
total_out = sum(
(t.amount if isinstance(t.amount, Decimal) else Decimal(str(t.amount)))
for t in transactions
if t.transaction_type in ("转出", "消费", "付款", "提现") or "转出" in (t.transaction_type or "") or "消费" in (t.transaction_type or "")
)
total_in = sum(
(t.amount if isinstance(t.amount, Decimal) else Decimal(str(t.amount)))
for t in transactions
if t.transaction_type in ("转入", "收款", "充值") or "转入" in (t.transaction_type or "") or "收款" in (t.transaction_type or "")
)
ws_summary.append(["转出合计", str(total_out)])
ws_summary.append(["转入合计", str(total_in)])
ws_summary.append(["净损失", str(total_out - total_in)])
for row in range(1, 8):
ws_summary.cell(row=row, column=1).font = Font(bold=True)
ws_detail = wb.create_sheet("交易明细")
headers = ["APP来源", "类型", "金额", "币种", "对方名称", "对方账号", "订单号", "交易时间", "备注", "置信度"]
ws_detail.append(headers)
for t in transactions:
ws_detail.append([
t.app_source,
t.transaction_type or "",
str(t.amount),
t.currency or "CNY",
t.counterparty_name or "",
t.counterparty_account or "",
t.order_number or "",
t.transaction_time.isoformat() if t.transaction_time else "",
t.remark or "",
t.confidence or "",
])
for col in range(1, len(headers) + 1):
ws_detail.cell(row=1, column=col).font = Font(bold=True)
for col in range(1, ws_detail.max_column + 1):
ws_detail.column_dimensions[get_column_letter(col)].width = 16
buf = BytesIO()
wb.save(buf)
buf.seek(0)
return buf.getvalue()
def _pdf_html(case, transactions: list) -> str:
rows = []
for t in transactions:
time_str = t.transaction_time.strftime("%Y-%m-%d %H:%M") if t.transaction_time else ""
rows.append(
f"<tr><td>{t.app_source}</td><td>{t.transaction_type or ''}</td><td>{t.amount}</td>"
f"<td>{t.counterparty_name or ''}</td><td>{t.counterparty_account or ''}</td><td>{time_str}</td></tr>"
)
table_rows = "\n".join(rows)
return f"""
<!DOCTYPE html>
<html>
<head><meta charset="utf-8"/><title>案件报告</title></head>
<body>
<h1>资金追踪报告</h1>
<p><strong>案件编号:</strong>{case.case_number}</p>
<p><strong>受害人:</strong>{case.victim_name}</p>
<p><strong>总损失:</strong>{case.total_loss}</p>
<p><strong>交易笔数:</strong>{len(transactions)}</p>
<h2>交易明细</h2>
<table border="1" cellpadding="4" cellspacing="0" style="border-collapse: collapse; width: 100%;">
<thead><tr><th>APP</th><th>类型</th><th>金额</th><th>对方名称</th><th>对方账号</th><th>时间</th></tr></thead>
<tbody>{table_rows}</tbody>
</table>
</body>
</html>
"""
async def build_pdf_report(case, transactions: list) -> bytes:
"""Build PDF report. Returns file bytes. Falls back to empty PDF if weasyprint not available."""
if not HAS_WEASYPRINT:
return b"%PDF-1.4 (WeasyPrint not installed)"
html_str = _pdf_html(case, transactions)
html = HTML(string=html_str)
buf = BytesIO()
html.write_pdf(buf)
buf.seek(0)
return buf.getvalue()
async def build_excel_report_path(case, transactions: list, path: str) -> str:
"""Write Excel to file path; return path."""
data = await build_excel_report(case, transactions)
with open(path, "wb") as f:
f.write(data)
return path
async def build_pdf_report_path(case, transactions: list, path: str) -> str:
"""Write PDF to file path; return path."""
data = await build_pdf_report(case, transactions)
with open(path, "wb") as f:
f.write(data)
return path

29
backend/requirements.txt Normal file
View File

@@ -0,0 +1,29 @@
# FastAPI & server
fastapi==0.109.0
uvicorn[standard]==0.27.0
# Database
sqlalchemy==2.0.25
aiosqlite==0.19.0
greenlet
# Validation & config
pydantic==2.5.3
pydantic-settings==2.1.0
# LLM providers
openai==1.12.0
anthropic==0.18.1
httpx==0.26.0
# Analysis
networkx==3.2.1
# Export
openpyxl==3.1.2
weasyprint==60.2
jinja2==3.1.3
# Utils
python-multipart==0.0.6
python-dateutil==2.8.2

0
backend/uploads/.gitkeep Normal file
View File