first commit

This commit is contained in:
2026-03-09 14:46:56 +08:00
commit 62236eb80e
63 changed files with 6143 additions and 0 deletions

View File

@@ -0,0 +1 @@
# Services

View File

@@ -0,0 +1,107 @@
"""Fund flow analysis: build directed graph and summary from transactions."""
from collections import defaultdict
from decimal import Decimal
import networkx as nx
from app.schemas.analysis import (
AnalysisSummaryResponse,
AppSummary,
FlowGraphResponse,
FlowNode,
FlowEdge,
)
from app.schemas.transaction import TransactionResponse
# Transaction types that mean money leaving victim's app (outflow)
OUT_TYPES = {"转出", "消费", "付款", "提现"}
# Transaction types that mean money entering victim's app (inflow)
IN_TYPES = {"转入", "收款", "充值"}
def _is_out(t: TransactionResponse) -> bool:
return t.transaction_type in OUT_TYPES or "转出" in (t.transaction_type or "") or "消费" in (t.transaction_type or "")
def _is_in(t: TransactionResponse) -> bool:
return t.transaction_type in IN_TYPES or "转入" in (t.transaction_type or "") or "收款" in (t.transaction_type or "")
def _node_id(app_or_counterparty: str, kind: str) -> str:
"""Generate stable node id; kind in ('victim_app', 'counterparty')."""
import hashlib
safe = (app_or_counterparty or "").strip() or "unknown"
h = hashlib.sha256(f"{kind}:{safe}".encode()).hexdigest()[:12]
return f"{kind}_{h}"
def build_flow_graph(transactions: list[TransactionResponse]) -> tuple[FlowGraphResponse, AnalysisSummaryResponse]:
"""
Build directed graph and summary from transaction list.
Node: victim's app (app_source when outflow) or counterparty (counterparty_name or counterparty_account).
Edge: source -> target with total amount and count.
"""
out_by_app: dict[str, Decimal] = defaultdict(Decimal)
in_by_app: dict[str, Decimal] = defaultdict(Decimal)
total_out = Decimal(0)
total_in = Decimal(0)
counterparties: set[str] = set()
# (source_id, target_id) -> (amount, count)
edges_agg: dict[tuple[str, str], tuple[Decimal, int]] = defaultdict(lambda: (Decimal(0), 0))
node_labels: dict[str, str] = {}
node_types: dict[str, str] = {}
for t in transactions:
amount = t.amount if isinstance(t.amount, Decimal) else Decimal(str(t.amount))
app = (t.app_source or "").strip() or "未知APP"
counterparty = (t.counterparty_name or t.counterparty_account or "未知对方").strip() or "未知对方"
counterparties.add(counterparty)
victim_node_id = _node_id(app, "victim_app")
node_labels[victim_node_id] = app
node_types[victim_node_id] = "victim_app"
cp_node_id = _node_id(counterparty, "counterparty")
node_labels[cp_node_id] = counterparty
node_types[cp_node_id] = "counterparty"
if _is_out(t):
out_by_app[app] += amount
total_out += amount
key = (victim_node_id, cp_node_id)
am, cnt = edges_agg[key]
edges_agg[key] = (am + amount, cnt + 1)
elif _is_in(t):
in_by_app[app] += amount
total_in += amount
key = (cp_node_id, victim_node_id)
am, cnt = edges_agg[key]
edges_agg[key] = (am + amount, cnt + 1)
all_apps = set(out_by_app.keys()) | set(in_by_app.keys())
by_app = {
app: AppSummary(
in_amount=in_by_app.get(app, Decimal(0)),
out_amount=out_by_app.get(app, Decimal(0)),
)
for app in all_apps
}
summary = AnalysisSummaryResponse(
total_out=total_out,
total_in=total_in,
net_loss=total_out - total_in,
by_app=by_app,
counterparty_count=len(counterparties),
)
nodes = [
FlowNode(id=nid, label=node_labels[nid], type=node_types.get(nid))
for nid in node_labels
]
edges = [
FlowEdge(source=src, target=tgt, amount=am, count=cnt)
for (src, tgt), (am, cnt) in edges_agg.items()
]
graph = FlowGraphResponse(nodes=nodes, edges=edges)
return graph, summary

View File

@@ -0,0 +1,42 @@
"""Transaction data extraction: LLM Vision + persistence."""
from app.models import Transaction
from app.models.database import async_session_maker
from app.schemas.transaction import TransactionExtractItem, TransactionResponse
from app.services.llm import get_llm_provider
async def extract_and_save(
case_id: int,
screenshot_id: int,
image_bytes: bytes,
) -> list[TransactionResponse]:
"""
Run vision extraction on image and persist transactions to DB.
Returns list of created transactions; low-confidence items are still saved but flagged.
"""
provider = get_llm_provider()
items: list[TransactionExtractItem] = await provider.extract_from_image(image_bytes)
results: list[TransactionResponse] = []
async with async_session_maker() as session:
for it in items:
t = Transaction(
case_id=case_id,
screenshot_id=screenshot_id,
app_source=it.app_source,
transaction_type=it.transaction_type,
amount=it.amount,
currency=it.currency or "CNY",
counterparty_name=it.counterparty_name,
counterparty_account=it.counterparty_account,
order_number=it.order_number,
transaction_time=it.transaction_time,
remark=it.remark,
confidence=it.confidence if it.confidence in ("high", "medium", "low") else "medium",
raw_text=None,
)
session.add(t)
await session.flush()
results.append(TransactionResponse.model_validate(t))
await session.commit()
return results

View File

@@ -0,0 +1,16 @@
# LLM providers
from app.services.llm.base import BaseLLMProvider
from app.services.llm.router import get_llm_provider
from app.services.llm.openai_vision import OpenAIVisionProvider
from app.services.llm.claude_vision import ClaudeVisionProvider
from app.services.llm.deepseek_vision import DeepSeekVisionProvider
from app.services.llm.custom_openai_vision import CustomOpenAICompatibleProvider
__all__ = [
"BaseLLMProvider",
"get_llm_provider",
"OpenAIVisionProvider",
"ClaudeVisionProvider",
"DeepSeekVisionProvider",
"CustomOpenAICompatibleProvider",
]

View File

@@ -0,0 +1,18 @@
"""Base LLM provider - abstract interface for vision extraction."""
from abc import ABC, abstractmethod
from app.schemas.transaction import TransactionExtractItem
class BaseLLMProvider(ABC):
"""Abstract base for LLM vision providers. Each provider implements extract_from_image."""
@abstractmethod
async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
"""
Analyze a billing screenshot and return structured transaction list.
:param image_bytes: Raw image file content (PNG/JPEG)
:return: List of extracted transactions (may be empty or partial on failure)
"""
pass

View File

@@ -0,0 +1,49 @@
"""Anthropic Claude Vision provider."""
import base64
import json
import re
from anthropic import AsyncAnthropic
from app.config import get_settings
from app.schemas.transaction import TransactionExtractItem
from app.services.llm.base import BaseLLMProvider
from app.prompts.extract_transaction import get_extract_messages
from app.services.llm.openai_vision import _parse_json_array
class ClaudeVisionProvider(BaseLLMProvider):
async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
settings = get_settings()
if not settings.anthropic_api_key:
raise ValueError("ANTHROPIC_API_KEY is not set")
client = AsyncAnthropic(api_key=settings.anthropic_api_key)
b64 = base64.standard_b64encode(image_bytes).decode("ascii")
messages = get_extract_messages(b64)
# Claude API: user message with content block list
user_content = messages[1]["content"]
content_blocks = []
for block in user_content:
if block["type"] == "text":
content_blocks.append({"type": "text", "text": block["text"]})
elif block["type"] == "image_url":
# Claude expects base64 without data URL prefix
content_blocks.append({
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": block["image_url"]["url"].split(",", 1)[-1],
},
})
response = await client.messages.create(
model=settings.anthropic_model,
max_tokens=4096,
system=messages[0]["content"],
messages=[{"role": "user", "content": content_blocks}],
)
text = ""
for block in response.content:
if hasattr(block, "text"):
text += block.text
return _parse_json_array(text or "[]")

View File

@@ -0,0 +1,32 @@
"""Custom OpenAI-compatible vision provider."""
import base64
from openai import AsyncOpenAI
from app.config import get_settings
from app.schemas.transaction import TransactionExtractItem
from app.services.llm.base import BaseLLMProvider
from app.prompts.extract_transaction import get_extract_messages
from app.services.llm.openai_vision import _parse_json_array
class CustomOpenAICompatibleProvider(BaseLLMProvider):
async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
settings = get_settings()
if not settings.custom_openai_api_key:
raise ValueError("CUSTOM_OPENAI_API_KEY is not set")
if not settings.custom_openai_base_url:
raise ValueError("CUSTOM_OPENAI_BASE_URL is not set")
client = AsyncOpenAI(
api_key=settings.custom_openai_api_key,
base_url=settings.custom_openai_base_url,
)
b64 = base64.standard_b64encode(image_bytes).decode("ascii")
messages = get_extract_messages(b64)
response = await client.chat.completions.create(
model=settings.custom_openai_model,
messages=messages,
max_tokens=4096,
)
text = response.choices[0].message.content or "[]"
return _parse_json_array(text)

View File

@@ -0,0 +1,34 @@
"""DeepSeek Vision provider (uses OpenAI-compatible API)."""
import base64
from openai import AsyncOpenAI
from app.config import get_settings
from app.schemas.transaction import TransactionExtractItem
from app.services.llm.base import BaseLLMProvider
from app.prompts.extract_transaction import get_extract_messages
from app.services.llm.openai_vision import _parse_json_array
# DeepSeek vision endpoint (OpenAI-compatible)
DEEPSEEK_BASE = "https://api.deepseek.com"
class DeepSeekVisionProvider(BaseLLMProvider):
async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
settings = get_settings()
if not settings.deepseek_api_key:
raise ValueError("DEEPSEEK_API_KEY is not set")
client = AsyncOpenAI(
api_key=settings.deepseek_api_key,
base_url=DEEPSEEK_BASE,
)
b64 = base64.standard_b64encode(image_bytes).decode("ascii")
messages = get_extract_messages(b64)
response = await client.chat.completions.create(
model=settings.deepseek_model,
messages=messages,
max_tokens=4096,
)
text = response.choices[0].message.content or "[]"
return _parse_json_array(text)

View File

@@ -0,0 +1,56 @@
"""OpenAI Vision provider (GPT-4o)."""
import base64
import json
import re
from openai import AsyncOpenAI
from app.config import get_settings
from app.schemas.transaction import TransactionExtractItem
from app.services.llm.base import BaseLLMProvider
from app.prompts.extract_transaction import get_extract_messages
class OpenAIVisionProvider(BaseLLMProvider):
async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
settings = get_settings()
if not settings.openai_api_key:
raise ValueError("OPENAI_API_KEY is not set")
client = AsyncOpenAI(api_key=settings.openai_api_key)
b64 = base64.standard_b64encode(image_bytes).decode("ascii")
messages = get_extract_messages(b64)
response = await client.chat.completions.create(
model=settings.openai_model,
messages=messages,
max_tokens=4096,
)
text = response.choices[0].message.content or "[]"
return _parse_json_array(text)
def _parse_json_array(text: str) -> list[TransactionExtractItem]:
"""Parse LLM response into list of TransactionExtractItem. Tolerates markdown and extra text."""
text = text.strip()
# Remove optional markdown code block
if text.startswith("```"):
text = re.sub(r"^```(?:json)?\s*", "", text)
text = re.sub(r"\s*```\s*$", "", text)
try:
data = json.loads(text)
except json.JSONDecodeError:
return []
if not isinstance(data, list):
return []
result: list[TransactionExtractItem] = []
for item in data:
if not isinstance(item, dict):
continue
try:
# Normalize transaction_time: allow string -> datetime
if isinstance(item.get("transaction_time"), str) and item["transaction_time"]:
from dateutil import parser as date_parser
item["transaction_time"] = date_parser.isoparse(item["transaction_time"])
result.append(TransactionExtractItem.model_validate(item))
except Exception:
continue
return result

View File

@@ -0,0 +1,22 @@
"""LLM provider factory - returns provider by config."""
from app.config import get_settings
from app.services.llm.base import BaseLLMProvider
from app.services.llm.openai_vision import OpenAIVisionProvider
from app.services.llm.claude_vision import ClaudeVisionProvider
from app.services.llm.deepseek_vision import DeepSeekVisionProvider
from app.services.llm.custom_openai_vision import CustomOpenAICompatibleProvider
def get_llm_provider() -> BaseLLMProvider:
settings = get_settings()
provider = (settings.llm_provider or "openai").lower()
if provider == "openai":
return OpenAIVisionProvider()
if provider == "anthropic":
return ClaudeVisionProvider()
if provider == "deepseek":
return DeepSeekVisionProvider()
if provider == "custom_openai":
return CustomOpenAICompatibleProvider()
return OpenAIVisionProvider()

View File

@@ -0,0 +1,125 @@
"""Report generation: Excel and PDF export."""
from io import BytesIO
from decimal import Decimal
from datetime import datetime
from openpyxl import Workbook
from openpyxl.styles import Font, Alignment
from openpyxl.utils import get_column_letter
# WeasyPrint optional for PDF
try:
from weasyprint import HTML, CSS
HAS_WEASYPRINT = True
except ImportError:
HAS_WEASYPRINT = False
async def build_excel_report(case, transactions: list) -> bytes:
"""Build Excel workbook: summary sheet + transaction detail sheet. Returns file bytes."""
wb = Workbook()
ws_summary = wb.active
ws_summary.title = "汇总"
ws_summary.append(["案件编号", case.case_number])
ws_summary.append(["受害人", case.victim_name])
ws_summary.append(["总损失", str(case.total_loss)])
ws_summary.append(["交易笔数", len(transactions)])
total_out = sum(
(t.amount if isinstance(t.amount, Decimal) else Decimal(str(t.amount)))
for t in transactions
if t.transaction_type in ("转出", "消费", "付款", "提现") or "转出" in (t.transaction_type or "") or "消费" in (t.transaction_type or "")
)
total_in = sum(
(t.amount if isinstance(t.amount, Decimal) else Decimal(str(t.amount)))
for t in transactions
if t.transaction_type in ("转入", "收款", "充值") or "转入" in (t.transaction_type or "") or "收款" in (t.transaction_type or "")
)
ws_summary.append(["转出合计", str(total_out)])
ws_summary.append(["转入合计", str(total_in)])
ws_summary.append(["净损失", str(total_out - total_in)])
for row in range(1, 8):
ws_summary.cell(row=row, column=1).font = Font(bold=True)
ws_detail = wb.create_sheet("交易明细")
headers = ["APP来源", "类型", "金额", "币种", "对方名称", "对方账号", "订单号", "交易时间", "备注", "置信度"]
ws_detail.append(headers)
for t in transactions:
ws_detail.append([
t.app_source,
t.transaction_type or "",
str(t.amount),
t.currency or "CNY",
t.counterparty_name or "",
t.counterparty_account or "",
t.order_number or "",
t.transaction_time.isoformat() if t.transaction_time else "",
t.remark or "",
t.confidence or "",
])
for col in range(1, len(headers) + 1):
ws_detail.cell(row=1, column=col).font = Font(bold=True)
for col in range(1, ws_detail.max_column + 1):
ws_detail.column_dimensions[get_column_letter(col)].width = 16
buf = BytesIO()
wb.save(buf)
buf.seek(0)
return buf.getvalue()
def _pdf_html(case, transactions: list) -> str:
rows = []
for t in transactions:
time_str = t.transaction_time.strftime("%Y-%m-%d %H:%M") if t.transaction_time else ""
rows.append(
f"<tr><td>{t.app_source}</td><td>{t.transaction_type or ''}</td><td>{t.amount}</td>"
f"<td>{t.counterparty_name or ''}</td><td>{t.counterparty_account or ''}</td><td>{time_str}</td></tr>"
)
table_rows = "\n".join(rows)
return f"""
<!DOCTYPE html>
<html>
<head><meta charset="utf-8"/><title>案件报告</title></head>
<body>
<h1>资金追踪报告</h1>
<p><strong>案件编号:</strong>{case.case_number}</p>
<p><strong>受害人:</strong>{case.victim_name}</p>
<p><strong>总损失:</strong>{case.total_loss}</p>
<p><strong>交易笔数:</strong>{len(transactions)}</p>
<h2>交易明细</h2>
<table border="1" cellpadding="4" cellspacing="0" style="border-collapse: collapse; width: 100%;">
<thead><tr><th>APP</th><th>类型</th><th>金额</th><th>对方名称</th><th>对方账号</th><th>时间</th></tr></thead>
<tbody>{table_rows}</tbody>
</table>
</body>
</html>
"""
async def build_pdf_report(case, transactions: list) -> bytes:
"""Build PDF report. Returns file bytes. Falls back to empty PDF if weasyprint not available."""
if not HAS_WEASYPRINT:
return b"%PDF-1.4 (WeasyPrint not installed)"
html_str = _pdf_html(case, transactions)
html = HTML(string=html_str)
buf = BytesIO()
html.write_pdf(buf)
buf.seek(0)
return buf.getvalue()
async def build_excel_report_path(case, transactions: list, path: str) -> str:
"""Write Excel to file path; return path."""
data = await build_excel_report(case, transactions)
with open(path, "wb") as f:
f.write(data)
return path
async def build_pdf_report_path(case, transactions: list, path: str) -> str:
"""Write PDF to file path; return path."""
data = await build_pdf_report(case, transactions)
with open(path, "wb") as f:
f.write(data)
return path