first commit

2026-03-09 14:46:56 +08:00
commit 62236eb80e
63 changed files with 6143 additions and 0 deletions
--- a/backend/app/services/init.py
+++ b/backend/app/services/init.py
@@ -0,0 +1 @@
+# Services
--- a/backend/app/services/analyzer.py
+++ b/backend/app/services/analyzer.py
@@ -0,0 +1,107 @@
+"""Fund flow analysis: build directed graph and summary from transactions."""
+
+from collections import defaultdict
+from decimal import Decimal
+
+import networkx as nx
+
+from app.schemas.analysis import (
+    AnalysisSummaryResponse,
+    AppSummary,
+    FlowGraphResponse,
+    FlowNode,
+    FlowEdge,
+)
+from app.schemas.transaction import TransactionResponse
+
+# Transaction types that mean money leaving victim's app (outflow)
+OUT_TYPES = {"转出", "消费", "付款", "提现"}
+# Transaction types that mean money entering victim's app (inflow)
+IN_TYPES = {"转入", "收款", "充值"}
+
+
+def _is_out(t: TransactionResponse) -> bool:
+    return t.transaction_type in OUT_TYPES or "转出" in (t.transaction_type or "") or "消费" in (t.transaction_type or "")
+
+
+def _is_in(t: TransactionResponse) -> bool:
+    return t.transaction_type in IN_TYPES or "转入" in (t.transaction_type or "") or "收款" in (t.transaction_type or "")
+
+
+def _node_id(app_or_counterparty: str, kind: str) -> str:
+    """Generate stable node id; kind in ('victim_app', 'counterparty')."""
+    import hashlib
+    safe = (app_or_counterparty or "").strip() or "unknown"
+    h = hashlib.sha256(f"{kind}:{safe}".encode()).hexdigest()[:12]
+    return f"{kind}_{h}"
+
+
+def build_flow_graph(transactions: list[TransactionResponse]) -> tuple[FlowGraphResponse, AnalysisSummaryResponse]:
+    """
+    Build directed graph and summary from transaction list.
+    Node: victim's app (app_source when outflow) or counterparty (counterparty_name or counterparty_account).
+    Edge: source -> target with total amount and count.
+    """
+    out_by_app: dict[str, Decimal] = defaultdict(Decimal)
+    in_by_app: dict[str, Decimal] = defaultdict(Decimal)
+    total_out = Decimal(0)
+    total_in = Decimal(0)
+    counterparties: set[str] = set()
+    # (source_id, target_id) -> (amount, count)
+    edges_agg: dict[tuple[str, str], tuple[Decimal, int]] = defaultdict(lambda: (Decimal(0), 0))
+    node_labels: dict[str, str] = {}
+    node_types: dict[str, str] = {}
+
+    for t in transactions:
+        amount = t.amount if isinstance(t.amount, Decimal) else Decimal(str(t.amount))
+        app = (t.app_source or "").strip() or "未知APP"
+        counterparty = (t.counterparty_name or t.counterparty_account or "未知对方").strip() or "未知对方"
+        counterparties.add(counterparty)
+
+        victim_node_id = _node_id(app, "victim_app")
+        node_labels[victim_node_id] = app
+        node_types[victim_node_id] = "victim_app"
+
+        cp_node_id = _node_id(counterparty, "counterparty")
+        node_labels[cp_node_id] = counterparty
+        node_types[cp_node_id] = "counterparty"
+
+        if _is_out(t):
+            out_by_app[app] += amount
+            total_out += amount
+            key = (victim_node_id, cp_node_id)
+            am, cnt = edges_agg[key]
+            edges_agg[key] = (am + amount, cnt + 1)
+        elif _is_in(t):
+            in_by_app[app] += amount
+            total_in += amount
+            key = (cp_node_id, victim_node_id)
+            am, cnt = edges_agg[key]
+            edges_agg[key] = (am + amount, cnt + 1)
+
+    all_apps = set(out_by_app.keys()) | set(in_by_app.keys())
+    by_app = {
+        app: AppSummary(
+            in_amount=in_by_app.get(app, Decimal(0)),
+            out_amount=out_by_app.get(app, Decimal(0)),
+        )
+        for app in all_apps
+    }
+    summary = AnalysisSummaryResponse(
+        total_out=total_out,
+        total_in=total_in,
+        net_loss=total_out - total_in,
+        by_app=by_app,
+        counterparty_count=len(counterparties),
+    )
+
+    nodes = [
+        FlowNode(id=nid, label=node_labels[nid], type=node_types.get(nid))
+        for nid in node_labels
+    ]
+    edges = [
+        FlowEdge(source=src, target=tgt, amount=am, count=cnt)
+        for (src, tgt), (am, cnt) in edges_agg.items()
+    ]
+    graph = FlowGraphResponse(nodes=nodes, edges=edges)
+    return graph, summary
--- a/backend/app/services/extractor.py
+++ b/backend/app/services/extractor.py
@@ -0,0 +1,42 @@
+"""Transaction data extraction: LLM Vision + persistence."""
+
+from app.models import Transaction
+from app.models.database import async_session_maker
+from app.schemas.transaction import TransactionExtractItem, TransactionResponse
+from app.services.llm import get_llm_provider
+
+
+async def extract_and_save(
+    case_id: int,
+    screenshot_id: int,
+    image_bytes: bytes,
+) -> list[TransactionResponse]:
+    """
+    Run vision extraction on image and persist transactions to DB.
+    Returns list of created transactions; low-confidence items are still saved but flagged.
+    """
+    provider = get_llm_provider()
+    items: list[TransactionExtractItem] = await provider.extract_from_image(image_bytes)
+    results: list[TransactionResponse] = []
+    async with async_session_maker() as session:
+        for it in items:
+            t = Transaction(
+                case_id=case_id,
+                screenshot_id=screenshot_id,
+                app_source=it.app_source,
+                transaction_type=it.transaction_type,
+                amount=it.amount,
+                currency=it.currency or "CNY",
+                counterparty_name=it.counterparty_name,
+                counterparty_account=it.counterparty_account,
+                order_number=it.order_number,
+                transaction_time=it.transaction_time,
+                remark=it.remark,
+                confidence=it.confidence if it.confidence in ("high", "medium", "low") else "medium",
+                raw_text=None,
+            )
+            session.add(t)
+            await session.flush()
+            results.append(TransactionResponse.model_validate(t))
+        await session.commit()
+    return results
--- a/backend/app/services/llm/init.py
+++ b/backend/app/services/llm/init.py
@@ -0,0 +1,16 @@
+# LLM providers
+from app.services.llm.base import BaseLLMProvider
+from app.services.llm.router import get_llm_provider
+from app.services.llm.openai_vision import OpenAIVisionProvider
+from app.services.llm.claude_vision import ClaudeVisionProvider
+from app.services.llm.deepseek_vision import DeepSeekVisionProvider
+from app.services.llm.custom_openai_vision import CustomOpenAICompatibleProvider
+
+__all__ = [
+    "BaseLLMProvider",
+    "get_llm_provider",
+    "OpenAIVisionProvider",
+    "ClaudeVisionProvider",
+    "DeepSeekVisionProvider",
+    "CustomOpenAICompatibleProvider",
+]
--- a/backend/app/services/llm/base.py
+++ b/backend/app/services/llm/base.py
@@ -0,0 +1,18 @@
+"""Base LLM provider - abstract interface for vision extraction."""
+
+from abc import ABC, abstractmethod
+
+from app.schemas.transaction import TransactionExtractItem
+
+
+class BaseLLMProvider(ABC):
+    """Abstract base for LLM vision providers. Each provider implements extract_from_image."""
+
+    @abstractmethod
+    async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
+        """
+        Analyze a billing screenshot and return structured transaction list.
+        :param image_bytes: Raw image file content (PNG/JPEG)
+        :return: List of extracted transactions (may be empty or partial on failure)
+        """
+        pass
--- a/backend/app/services/llm/claude_vision.py
+++ b/backend/app/services/llm/claude_vision.py
@@ -0,0 +1,49 @@
+"""Anthropic Claude Vision provider."""
+
+import base64
+import json
+import re
+from anthropic import AsyncAnthropic
+
+from app.config import get_settings
+from app.schemas.transaction import TransactionExtractItem
+from app.services.llm.base import BaseLLMProvider
+from app.prompts.extract_transaction import get_extract_messages
+from app.services.llm.openai_vision import _parse_json_array
+
+
+class ClaudeVisionProvider(BaseLLMProvider):
+    async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
+        settings = get_settings()
+        if not settings.anthropic_api_key:
+            raise ValueError("ANTHROPIC_API_KEY is not set")
+        client = AsyncAnthropic(api_key=settings.anthropic_api_key)
+        b64 = base64.standard_b64encode(image_bytes).decode("ascii")
+        messages = get_extract_messages(b64)
+        # Claude API: user message with content block list
+        user_content = messages[1]["content"]
+        content_blocks = []
+        for block in user_content:
+            if block["type"] == "text":
+                content_blocks.append({"type": "text", "text": block["text"]})
+            elif block["type"] == "image_url":
+                # Claude expects base64 without data URL prefix
+                content_blocks.append({
+                    "type": "image",
+                    "source": {
+                        "type": "base64",
+                        "media_type": "image/jpeg",
+                        "data": block["image_url"]["url"].split(",", 1)[-1],
+                    },
+                })
+        response = await client.messages.create(
+            model=settings.anthropic_model,
+            max_tokens=4096,
+            system=messages[0]["content"],
+            messages=[{"role": "user", "content": content_blocks}],
+        )
+        text = ""
+        for block in response.content:
+            if hasattr(block, "text"):
+                text += block.text
+        return _parse_json_array(text or "[]")
--- a/backend/app/services/llm/custom_openai_vision.py
+++ b/backend/app/services/llm/custom_openai_vision.py
@@ -0,0 +1,32 @@
+"""Custom OpenAI-compatible vision provider."""
+
+import base64
+from openai import AsyncOpenAI
+
+from app.config import get_settings
+from app.schemas.transaction import TransactionExtractItem
+from app.services.llm.base import BaseLLMProvider
+from app.prompts.extract_transaction import get_extract_messages
+from app.services.llm.openai_vision import _parse_json_array
+
+
+class CustomOpenAICompatibleProvider(BaseLLMProvider):
+    async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
+        settings = get_settings()
+        if not settings.custom_openai_api_key:
+            raise ValueError("CUSTOM_OPENAI_API_KEY is not set")
+        if not settings.custom_openai_base_url:
+            raise ValueError("CUSTOM_OPENAI_BASE_URL is not set")
+        client = AsyncOpenAI(
+            api_key=settings.custom_openai_api_key,
+            base_url=settings.custom_openai_base_url,
+        )
+        b64 = base64.standard_b64encode(image_bytes).decode("ascii")
+        messages = get_extract_messages(b64)
+        response = await client.chat.completions.create(
+            model=settings.custom_openai_model,
+            messages=messages,
+            max_tokens=4096,
+        )
+        text = response.choices[0].message.content or "[]"
+        return _parse_json_array(text)
--- a/backend/app/services/llm/deepseek_vision.py
+++ b/backend/app/services/llm/deepseek_vision.py
@@ -0,0 +1,34 @@
+"""DeepSeek Vision provider (uses OpenAI-compatible API)."""
+
+import base64
+from openai import AsyncOpenAI
+
+from app.config import get_settings
+from app.schemas.transaction import TransactionExtractItem
+from app.services.llm.base import BaseLLMProvider
+from app.prompts.extract_transaction import get_extract_messages
+from app.services.llm.openai_vision import _parse_json_array
+
+
+# DeepSeek vision endpoint (OpenAI-compatible)
+DEEPSEEK_BASE = "https://api.deepseek.com"
+
+
+class DeepSeekVisionProvider(BaseLLMProvider):
+    async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
+        settings = get_settings()
+        if not settings.deepseek_api_key:
+            raise ValueError("DEEPSEEK_API_KEY is not set")
+        client = AsyncOpenAI(
+            api_key=settings.deepseek_api_key,
+            base_url=DEEPSEEK_BASE,
+        )
+        b64 = base64.standard_b64encode(image_bytes).decode("ascii")
+        messages = get_extract_messages(b64)
+        response = await client.chat.completions.create(
+            model=settings.deepseek_model,
+            messages=messages,
+            max_tokens=4096,
+        )
+        text = response.choices[0].message.content or "[]"
+        return _parse_json_array(text)
--- a/backend/app/services/llm/openai_vision.py
+++ b/backend/app/services/llm/openai_vision.py
@@ -0,0 +1,56 @@
+"""OpenAI Vision provider (GPT-4o)."""
+
+import base64
+import json
+import re
+from openai import AsyncOpenAI
+
+from app.config import get_settings
+from app.schemas.transaction import TransactionExtractItem
+from app.services.llm.base import BaseLLMProvider
+from app.prompts.extract_transaction import get_extract_messages
+
+
+class OpenAIVisionProvider(BaseLLMProvider):
+    async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
+        settings = get_settings()
+        if not settings.openai_api_key:
+            raise ValueError("OPENAI_API_KEY is not set")
+        client = AsyncOpenAI(api_key=settings.openai_api_key)
+        b64 = base64.standard_b64encode(image_bytes).decode("ascii")
+        messages = get_extract_messages(b64)
+        response = await client.chat.completions.create(
+            model=settings.openai_model,
+            messages=messages,
+            max_tokens=4096,
+        )
+        text = response.choices[0].message.content or "[]"
+        return _parse_json_array(text)
+
+
+def _parse_json_array(text: str) -> list[TransactionExtractItem]:
+    """Parse LLM response into list of TransactionExtractItem. Tolerates markdown and extra text."""
+    text = text.strip()
+    # Remove optional markdown code block
+    if text.startswith("```"):
+        text = re.sub(r"^```(?:json)?\s*", "", text)
+        text = re.sub(r"\s*```\s*$", "", text)
+    try:
+        data = json.loads(text)
+    except json.JSONDecodeError:
+        return []
+    if not isinstance(data, list):
+        return []
+    result: list[TransactionExtractItem] = []
+    for item in data:
+        if not isinstance(item, dict):
+            continue
+        try:
+            # Normalize transaction_time: allow string -> datetime
+            if isinstance(item.get("transaction_time"), str) and item["transaction_time"]:
+                from dateutil import parser as date_parser
+                item["transaction_time"] = date_parser.isoparse(item["transaction_time"])
+            result.append(TransactionExtractItem.model_validate(item))
+        except Exception:
+            continue
+    return result
--- a/backend/app/services/llm/router.py
+++ b/backend/app/services/llm/router.py
@@ -0,0 +1,22 @@
+"""LLM provider factory - returns provider by config."""
+
+from app.config import get_settings
+from app.services.llm.base import BaseLLMProvider
+from app.services.llm.openai_vision import OpenAIVisionProvider
+from app.services.llm.claude_vision import ClaudeVisionProvider
+from app.services.llm.deepseek_vision import DeepSeekVisionProvider
+from app.services.llm.custom_openai_vision import CustomOpenAICompatibleProvider
+
+
+def get_llm_provider() -> BaseLLMProvider:
+    settings = get_settings()
+    provider = (settings.llm_provider or "openai").lower()
+    if provider == "openai":
+        return OpenAIVisionProvider()
+    if provider == "anthropic":
+        return ClaudeVisionProvider()
+    if provider == "deepseek":
+        return DeepSeekVisionProvider()
+    if provider == "custom_openai":
+        return CustomOpenAICompatibleProvider()
+    return OpenAIVisionProvider()
--- a/backend/app/services/report.py
+++ b/backend/app/services/report.py
@@ -0,0 +1,125 @@
+"""Report generation: Excel and PDF export."""
+
+from io import BytesIO
+from decimal import Decimal
+from datetime import datetime
+
+from openpyxl import Workbook
+from openpyxl.styles import Font, Alignment
+from openpyxl.utils import get_column_letter
+
+# WeasyPrint optional for PDF
+try:
+    from weasyprint import HTML, CSS
+    HAS_WEASYPRINT = True
+except ImportError:
+    HAS_WEASYPRINT = False
+
+
+async def build_excel_report(case, transactions: list) -> bytes:
+    """Build Excel workbook: summary sheet + transaction detail sheet. Returns file bytes."""
+    wb = Workbook()
+    ws_summary = wb.active
+    ws_summary.title = "汇总"
+    ws_summary.append(["案件编号", case.case_number])
+    ws_summary.append(["受害人", case.victim_name])
+    ws_summary.append(["总损失", str(case.total_loss)])
+    ws_summary.append(["交易笔数", len(transactions)])
+    total_out = sum(
+        (t.amount if isinstance(t.amount, Decimal) else Decimal(str(t.amount)))
+        for t in transactions
+        if t.transaction_type in ("转出", "消费", "付款", "提现") or "转出" in (t.transaction_type or "") or "消费" in (t.transaction_type or "")
+    )
+    total_in = sum(
+        (t.amount if isinstance(t.amount, Decimal) else Decimal(str(t.amount)))
+        for t in transactions
+        if t.transaction_type in ("转入", "收款", "充值") or "转入" in (t.transaction_type or "") or "收款" in (t.transaction_type or "")
+    )
+    ws_summary.append(["转出合计", str(total_out)])
+    ws_summary.append(["转入合计", str(total_in)])
+    ws_summary.append(["净损失", str(total_out - total_in)])
+    for row in range(1, 8):
+        ws_summary.cell(row=row, column=1).font = Font(bold=True)
+
+    ws_detail = wb.create_sheet("交易明细")
+    headers = ["APP来源", "类型", "金额", "币种", "对方名称", "对方账号", "订单号", "交易时间", "备注", "置信度"]
+    ws_detail.append(headers)
+    for t in transactions:
+        ws_detail.append([
+            t.app_source,
+            t.transaction_type or "",
+            str(t.amount),
+            t.currency or "CNY",
+            t.counterparty_name or "",
+            t.counterparty_account or "",
+            t.order_number or "",
+            t.transaction_time.isoformat() if t.transaction_time else "",
+            t.remark or "",
+            t.confidence or "",
+        ])
+    for col in range(1, len(headers) + 1):
+        ws_detail.cell(row=1, column=col).font = Font(bold=True)
+    for col in range(1, ws_detail.max_column + 1):
+        ws_detail.column_dimensions[get_column_letter(col)].width = 16
+
+    buf = BytesIO()
+    wb.save(buf)
+    buf.seek(0)
+    return buf.getvalue()
+
+
+def _pdf_html(case, transactions: list) -> str:
+    rows = []
+    for t in transactions:
+        time_str = t.transaction_time.strftime("%Y-%m-%d %H:%M") if t.transaction_time else ""
+        rows.append(
+            f"<tr><td>{t.app_source}</td><td>{t.transaction_type or ''}</td><td>{t.amount}</td>"
+            f"<td>{t.counterparty_name or ''}</td><td>{t.counterparty_account or ''}</td><td>{time_str}</td></tr>"
+        )
+    table_rows = "\n".join(rows)
+    return f"""
+<!DOCTYPE html>
+<html>
+<head><meta charset="utf-8"/><title>案件报告</title></head>
+<body>
+<h1>资金追踪报告</h1>
+<p><strong>案件编号：</strong>{case.case_number}</p>
+<p><strong>受害人：</strong>{case.victim_name}</p>
+<p><strong>总损失：</strong>{case.total_loss}</p>
+<p><strong>交易笔数：</strong>{len(transactions)}</p>
+<h2>交易明细</h2>
+<table border="1" cellpadding="4" cellspacing="0" style="border-collapse: collapse; width: 100%;">
+<thead><tr><th>APP</th><th>类型</th><th>金额</th><th>对方名称</th><th>对方账号</th><th>时间</th></tr></thead>
+<tbody>{table_rows}</tbody>
+</table>
+</body>
+</html>
+"""
+
+
+async def build_pdf_report(case, transactions: list) -> bytes:
+    """Build PDF report. Returns file bytes. Falls back to empty PDF if weasyprint not available."""
+    if not HAS_WEASYPRINT:
+        return b"%PDF-1.4 (WeasyPrint not installed)"
+    html_str = _pdf_html(case, transactions)
+    html = HTML(string=html_str)
+    buf = BytesIO()
+    html.write_pdf(buf)
+    buf.seek(0)
+    return buf.getvalue()
+
+
+async def build_excel_report_path(case, transactions: list, path: str) -> str:
+    """Write Excel to file path; return path."""
+    data = await build_excel_report(case, transactions)
+    with open(path, "wb") as f:
+        f.write(data)
+    return path
+
+
+async def build_pdf_report_path(case, transactions: list, path: str) -> str:
+    """Write PDF to file path; return path."""
+    data = await build_pdf_report(case, transactions)
+    with open(path, "wb") as f:
+        f.write(data)
+    return path