first commit

This commit is contained in:
2026-03-11 16:28:04 +08:00
commit c0f9ddabbf
101 changed files with 11601 additions and 0 deletions

View File

View File

@@ -0,0 +1,42 @@
"""Orchestrates the full analysis pipeline: matching -> flow -> assessment."""
from uuid import UUID
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.case import Case, CaseStatus
from app.services.matching_service import run_matching
from app.services.assessment_service import assess_case
from app.services.case_service import recalculate_case_total
async def run_analysis_sync(case_id: UUID, db: AsyncSession) -> None:
"""Run the full analysis pipeline synchronously (fallback when Celery is down)."""
case = await db.get(Case, case_id)
if not case:
return
case.status = CaseStatus.analyzing
await db.flush()
# Step 1: Matching & dedup
self_accounts = _extract_self_accounts(case)
await run_matching(case_id, self_accounts, db)
# Step 2: Assessment
await assess_case(case_id, db)
# Step 3: Recalculate total
await recalculate_case_total(case_id, db)
case.status = CaseStatus.reviewing
await db.flush()
def _extract_self_accounts(case: Case) -> list[str]:
"""Extract known self-account identifiers from case context.
In a full implementation this would come from user input or a
dedicated 'victim accounts' table. For now we return an empty list
and rely on heuristic rules.
"""
return []

View File

@@ -0,0 +1,150 @@
"""Fraud amount assessment and inquiry suggestion generation."""
import logging
from uuid import UUID
import httpx
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import settings
from app.models.transaction import TransactionRecord
from app.models.assessment import FraudAssessment, ConfidenceLevel, ReviewStatus
from app.rules.assessment_rules import classify_transaction
logger = logging.getLogger(__name__)
async def assess_case(case_id: UUID, db: AsyncSession) -> list[FraudAssessment]:
"""Run rule-based assessment on all non-duplicate transactions and generate reasons."""
result = await db.execute(
select(TransactionRecord)
.where(TransactionRecord.case_id == case_id)
.where(TransactionRecord.is_duplicate.is_(False))
.order_by(TransactionRecord.trade_time.asc())
)
transactions = list(result.scalars().all())
assessments: list[FraudAssessment] = []
for tx in transactions:
level, reason, exclude_reason = classify_transaction(tx)
fa = FraudAssessment(
case_id=case_id,
transaction_id=tx.id,
confidence_level=level,
assessed_amount=float(tx.amount) if level != ConfidenceLevel.low else 0,
reason=reason,
exclude_reason=exclude_reason,
review_status=ReviewStatus.pending,
)
db.add(fa)
assessments.append(fa)
await db.flush()
# try to enhance reasons via LLM
if settings.LLM_API_KEY and settings.LLM_API_URL:
for fa in assessments:
try:
enhanced = await _enhance_reason_via_llm(fa, transactions)
if enhanced:
fa.reason = enhanced
except Exception as e:
logger.debug("LLM reason enhancement skipped: %s", e)
await db.flush()
return assessments
async def generate_inquiry_suggestions(case_id: UUID, db: AsyncSession) -> list[str]:
"""Generate interview / inquiry suggestions based on assessment results."""
result = await db.execute(
select(FraudAssessment)
.where(FraudAssessment.case_id == case_id)
.order_by(FraudAssessment.created_at.asc())
)
assessments = list(result.scalars().all())
if not assessments:
return ["暂无分析结果,请先执行案件分析。"]
# try LLM generation
if settings.LLM_API_KEY and settings.LLM_API_URL:
try:
return await _generate_suggestions_via_llm(assessments)
except Exception as e:
logger.debug("LLM suggestions skipped: %s", e)
return _generate_suggestions_rule_based(assessments)
def _generate_suggestions_rule_based(assessments: list[FraudAssessment]) -> list[str]:
suggestions: list[str] = []
pending = [a for a in assessments if a.review_status == ReviewStatus.pending]
medium = [a for a in assessments if a.confidence_level == ConfidenceLevel.medium]
if pending:
suggestions.append(
f"{len(pending)} 笔交易尚未确认,建议逐笔向受害人核实是否受到诱导操作。"
)
if medium:
suggestions.append(
"部分交易置信度为中等,建议追问受害人交易的具体背景和对方的诱导话术。"
)
suggestions.append("是否还有其他未截图的转账记录或 APP 需要补充?")
suggestions.append("涉案金额中是否有已部分追回或返还的款项?")
suggestions.append(
"除了截图所示的 APP 外是否还存在银行柜台、ATM、其他支付平台等转账渠道"
)
return suggestions
async def _enhance_reason_via_llm(fa: FraudAssessment, all_tx: list) -> str | None:
prompt = (
f"这笔交易金额{fa.assessed_amount}元,置信等级{fa.confidence_level.value}"
f"原始认定理由:{fa.reason}"
"请用简洁中文优化认定理由表述,使之适合出现在办案文书中。只返回优化后的理由文字。"
)
async with httpx.AsyncClient(timeout=15) as client:
resp = await client.post(
settings.LLM_API_URL,
headers={"Authorization": f"Bearer {settings.LLM_API_KEY}"},
json={
"model": settings.LLM_MODEL,
"messages": [{"role": "user", "content": prompt}],
"max_tokens": 300,
},
)
resp.raise_for_status()
return resp.json()["choices"][0]["message"]["content"].strip()
async def _generate_suggestions_via_llm(assessments: list[FraudAssessment]) -> list[str]:
summary_lines = []
for a in assessments:
summary_lines.append(
f"- 金额{a.assessed_amount}元, 置信{a.confidence_level.value}, "
f"状态{a.review_status.value}, 理由: {a.reason[:60]}"
)
summary = "\n".join(summary_lines)
prompt = (
"你是一名反诈案件办案助手。以下是某诈骗案件的交易认定摘要:\n"
f"{summary}\n\n"
"请生成5条笔录辅助问询建议帮助民警追问受害人以完善证据链。"
"只返回JSON数组格式的5个字符串。"
)
import json
async with httpx.AsyncClient(timeout=20) as client:
resp = await client.post(
settings.LLM_API_URL,
headers={"Authorization": f"Bearer {settings.LLM_API_KEY}"},
json={
"model": settings.LLM_MODEL,
"messages": [{"role": "user", "content": prompt}],
"max_tokens": 600,
},
)
resp.raise_for_status()
text = resp.json()["choices"][0]["message"]["content"].strip()
return json.loads(text.strip().strip("`").removeprefix("json").strip())

View File

@@ -0,0 +1,23 @@
from uuid import UUID
from decimal import Decimal
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.case import Case
from app.models.assessment import FraudAssessment, ReviewStatus
async def recalculate_case_total(case_id: UUID, db: AsyncSession) -> float:
"""Recalculate and persist the total confirmed fraud amount for a case."""
result = await db.execute(
select(func.coalesce(func.sum(FraudAssessment.assessed_amount), 0))
.where(FraudAssessment.case_id == case_id)
.where(FraudAssessment.review_status == ReviewStatus.confirmed)
)
total = float(result.scalar() or 0)
case = await db.get(Case, case_id)
if case:
case.total_amount = total
await db.flush()
return total

View File

@@ -0,0 +1,72 @@
"""Build the fund-flow graph from deduplicated transactions."""
from uuid import UUID
from collections import defaultdict
from sqlalchemy.ext.asyncio import AsyncSession
from app.repositories.transaction_repo import TransactionRepository
from app.schemas.transaction import FlowGraphOut, FlowNodeOut, FlowEdgeOut
async def build_flow_graph(case_id: UUID, db: AsyncSession) -> FlowGraphOut:
repo = TransactionRepository(db)
transactions = await repo.get_all_by_case(case_id)
valid = [tx for tx in transactions if not tx.is_duplicate]
nodes_map: dict[str, str] = {} # label -> type
edge_agg: dict[tuple[str, str], dict] = defaultdict(
lambda: {"amount": 0.0, "count": 0, "trade_time": ""}
)
for tx in valid:
self_label = _self_label(tx)
counter_label = tx.counterparty_name or "未知对手方"
if self_label not in nodes_map:
nodes_map[self_label] = "self"
if counter_label not in nodes_map:
nodes_map[counter_label] = "suspect" if not tx.is_transit else "transit"
if tx.direction.value == "out":
key = (self_label, counter_label)
else:
key = (counter_label, self_label)
edge_agg[key]["amount"] += float(tx.amount)
edge_agg[key]["count"] += 1
time_str = tx.trade_time.strftime("%Y-%m-%d %H:%M") if tx.trade_time else ""
if not edge_agg[key]["trade_time"]:
edge_agg[key]["trade_time"] = time_str
nodes = [
FlowNodeOut(id=f"n-{i}", label=label, type=ntype)
for i, (label, ntype) in enumerate(nodes_map.items())
]
label_to_id = {n.label: n.id for n in nodes}
edges = [
FlowEdgeOut(
source=label_to_id[src],
target=label_to_id[tgt],
amount=info["amount"],
count=info["count"],
trade_time=info["trade_time"],
)
for (src, tgt), info in edge_agg.items()
]
return FlowGraphOut(nodes=nodes, edges=edges)
def _self_label(tx) -> str:
app_names = {
"wechat": "微信支付",
"alipay": "支付宝",
"bank": f"银行卡({tx.self_account_tail_no})" if tx.self_account_tail_no else "银行卡",
"digital_wallet": "数字钱包",
"other": "其他账户",
}
return app_names.get(tx.source_app.value, "未知账户")

View File

@@ -0,0 +1,24 @@
"""Image post-processing helpers (thumbnail generation, etc.)."""
from pathlib import Path
from PIL import Image
from app.core.config import settings
def generate_thumbnail(file_path: str, max_size: int = 400) -> str:
full = settings.upload_path / file_path
if not full.exists():
return file_path
thumb_dir = full.parent / "thumbs"
thumb_dir.mkdir(exist_ok=True)
thumb_path = thumb_dir / full.name
try:
with Image.open(full) as img:
img.thumbnail((max_size, max_size))
img.save(thumb_path)
return str(thumb_path.relative_to(settings.upload_path))
except Exception:
return file_path

View File

@@ -0,0 +1,83 @@
"""Transaction deduplication and matching engine.
Multi-layer strategy:
1. Exact order_no match
2. Amount + time-window + account-tail match
3. Fuzzy text similarity (placeholder for LLM-assisted matching)
"""
from uuid import UUID
from datetime import timedelta
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.transaction import TransactionRecord
from app.models.transaction_cluster import TransactionCluster
from app.repositories.transaction_repo import TransactionRepository
from app.rules.dedup_rules import is_duplicate_pair
from app.rules.transit_rules import is_self_transfer
async def run_matching(case_id: UUID, self_accounts: list[str], db: AsyncSession) -> None:
"""Execute the full dedup + transit-marking pipeline for a case."""
repo = TransactionRepository(db)
transactions = await repo.get_all_by_case(case_id)
if not transactions:
return
# reset flags
for tx in transactions:
tx.is_duplicate = False
tx.is_transit = False
tx.cluster_id = None
# ── Layer 1 & 2: dedup ──
matched: set[UUID] = set()
clusters: list[TransactionCluster] = []
for i, tx_a in enumerate(transactions):
if tx_a.id in matched:
continue
group = [tx_a]
for tx_b in transactions[i + 1:]:
if tx_b.id in matched:
continue
if is_duplicate_pair(tx_a, tx_b):
group.append(tx_b)
matched.add(tx_b.id)
if len(group) > 1:
primary = max(group, key=lambda t: t.confidence)
cluster = TransactionCluster(
case_id=case_id,
primary_tx_id=primary.id,
match_reason=_match_reason(primary, group),
)
db.add(cluster)
await db.flush()
for tx in group:
tx.cluster_id = cluster.id
if tx.id != primary.id:
tx.is_duplicate = True
clusters.append(cluster)
# ── Layer 3: transit detection ──
for tx in transactions:
if tx.is_duplicate:
continue
if is_self_transfer(tx, self_accounts):
tx.is_transit = True
await db.flush()
def _match_reason(primary: TransactionRecord, group: list[TransactionRecord]) -> str:
reasons: list[str] = []
orders = {tx.order_no for tx in group if tx.order_no}
if len(orders) == 1:
reasons.append("订单号一致")
amounts = {float(tx.amount) for tx in group}
if len(amounts) == 1:
reasons.append("金额一致")
return "; ".join(reasons) if reasons else "时间和金额近似"

View File

@@ -0,0 +1,145 @@
"""OCR and multimodal extraction service.
Wraps calls to cloud OCR / multimodal APIs with a provider-agnostic interface.
When API keys are not configured, falls back to a mock implementation that
returns placeholder data (sufficient for demo / competition).
"""
import json
import logging
from pathlib import Path
import httpx
from app.core.config import settings
from app.models.evidence_image import SourceApp, PageType
logger = logging.getLogger(__name__)
# ── provider-agnostic interface ──────────────────────────────────────────
async def classify_page(image_path: str) -> tuple[SourceApp, PageType]:
"""Identify the source app and page type of a screenshot."""
if settings.LLM_API_KEY and settings.LLM_API_URL:
return await _classify_via_api(image_path)
return _classify_mock(image_path)
async def extract_transaction_fields(image_path: str, source_app: SourceApp, page_type: PageType) -> dict:
"""Extract structured transaction fields from a screenshot."""
if settings.LLM_API_KEY and settings.LLM_API_URL:
return await _extract_via_api(image_path, source_app, page_type)
return _extract_mock(image_path, source_app, page_type)
# ── real API implementation ──────────────────────────────────────────────
async def _classify_via_api(image_path: str) -> tuple[SourceApp, PageType]:
import base64
full_path = settings.upload_path / image_path
if not full_path.exists():
return SourceApp.other, PageType.unknown
image_b64 = base64.b64encode(full_path.read_bytes()).decode()
prompt = (
"请分析这张手机截图判断它来自哪个APPwechat/alipay/bank/digital_wallet/other"
"以及页面类型bill_list/bill_detail/transfer_receipt/sms_notice/balance/unknown"
"只返回JSON: {\"source_app\": \"...\", \"page_type\": \"...\"}"
)
try:
async with httpx.AsyncClient(timeout=30) as client:
resp = await client.post(
settings.LLM_API_URL,
headers={"Authorization": f"Bearer {settings.LLM_API_KEY}"},
json={
"model": settings.LLM_MODEL,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_b64}"}},
],
}
],
"max_tokens": 200,
},
)
resp.raise_for_status()
text = resp.json()["choices"][0]["message"]["content"]
data = json.loads(text.strip().strip("`").removeprefix("json").strip())
return SourceApp(data.get("source_app", "other")), PageType(data.get("page_type", "unknown"))
except Exception as e:
logger.warning("classify_page API failed: %s", e)
return SourceApp.other, PageType.unknown
async def _extract_via_api(image_path: str, source_app: SourceApp, page_type: PageType) -> dict:
import base64
full_path = settings.upload_path / image_path
if not full_path.exists():
return {}
image_b64 = base64.b64encode(full_path.read_bytes()).decode()
prompt = (
f"这是一张来自{source_app.value}{page_type.value}截图。"
"请提取其中的交易信息返回JSON格式字段包括"
"trade_time(交易时间,格式YYYY-MM-DD HH:MM:SS), amount(金额,数字), "
"direction(in或out), counterparty_name(对方名称), counterparty_account(对方账号), "
"self_account_tail_no(本方账户尾号), order_no(订单号), remark(备注), confidence(0-1)。"
"如果截图包含多笔交易返回JSON数组。否则返回单个JSON对象。"
)
try:
async with httpx.AsyncClient(timeout=60) as client:
resp = await client.post(
settings.LLM_API_URL,
headers={"Authorization": f"Bearer {settings.LLM_API_KEY}"},
json={
"model": settings.LLM_MODEL,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_b64}"}},
],
}
],
"max_tokens": 2000,
},
)
resp.raise_for_status()
text = resp.json()["choices"][0]["message"]["content"]
return json.loads(text.strip().strip("`").removeprefix("json").strip())
except Exception as e:
logger.warning("extract_transaction_fields API failed: %s", e)
return {}
# ── mock fallback ────────────────────────────────────────────────────────
def _classify_mock(image_path: str) -> tuple[SourceApp, PageType]:
lower = image_path.lower()
if "wechat" in lower or "wx" in lower:
return SourceApp.wechat, PageType.bill_detail
if "alipay" in lower or "ali" in lower:
return SourceApp.alipay, PageType.bill_list
if "bank" in lower:
return SourceApp.bank, PageType.bill_detail
return SourceApp.other, PageType.unknown
def _extract_mock(image_path: str, source_app: SourceApp, page_type: PageType) -> dict:
return {
"trade_time": "2026-03-08 10:00:00",
"amount": 1000.00,
"direction": "out",
"counterparty_name": "模拟对手方",
"counterparty_account": "",
"self_account_tail_no": "",
"order_no": f"MOCK-{hash(image_path) % 100000:05d}",
"remark": "模拟交易",
"confidence": 0.80,
}

View File

@@ -0,0 +1,47 @@
"""Parse raw OCR / multimodal extraction results into TransactionRecord instances."""
from datetime import datetime
from uuid import UUID
from app.models.transaction import TransactionRecord, Direction
from app.models.evidence_image import SourceApp
def parse_extracted_fields(
raw: dict | list,
case_id: UUID,
image_id: UUID,
source_app: SourceApp,
) -> list[TransactionRecord]:
"""Convert raw extraction dict(s) into TransactionRecord ORM objects."""
items = raw if isinstance(raw, list) else [raw]
records: list[TransactionRecord] = []
for item in items:
if not item or not item.get("amount"):
continue
try:
trade_time = datetime.fromisoformat(item["trade_time"])
except (ValueError, KeyError):
trade_time = datetime.now()
direction_str = item.get("direction", "out")
direction = Direction.in_ if direction_str == "in" else Direction.out
record = TransactionRecord(
case_id=case_id,
evidence_image_id=image_id,
source_app=source_app,
trade_time=trade_time,
amount=float(item.get("amount", 0)),
direction=direction,
counterparty_name=str(item.get("counterparty_name", "")),
counterparty_account=str(item.get("counterparty_account", "")),
self_account_tail_no=str(item.get("self_account_tail_no", "")),
order_no=str(item.get("order_no", "")),
remark=str(item.get("remark", "")),
confidence=float(item.get("confidence", 0.5)),
)
records.append(record)
return records

View File

@@ -0,0 +1,156 @@
"""Report generation: Excel / Word / PDF."""
import uuid
from pathlib import Path
from uuid import UUID
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import settings
from app.models.assessment import FraudAssessment, ReviewStatus
from app.models.transaction import TransactionRecord
from app.models.report import ExportReport, ReportType
from app.schemas.report import ReportCreate
async def generate_report(case_id: UUID, body: ReportCreate, db: AsyncSession) -> ExportReport:
result = await db.execute(
select(ExportReport)
.where(ExportReport.case_id == case_id, ExportReport.report_type == body.report_type)
)
existing = list(result.scalars().all())
version = len(existing) + 1
report_dir = settings.upload_path / str(case_id) / "reports"
report_dir.mkdir(parents=True, exist_ok=True)
if body.report_type == ReportType.excel:
file_path = await _gen_excel(case_id, report_dir, db)
elif body.report_type == ReportType.word:
file_path = await _gen_word(case_id, report_dir, db)
else:
file_path = await _gen_pdf_placeholder(case_id, report_dir)
relative = str(file_path.relative_to(settings.upload_path))
# snapshot confirmed assessments
snap_result = await db.execute(
select(FraudAssessment).where(
FraudAssessment.case_id == case_id,
FraudAssessment.review_status == ReviewStatus.confirmed,
)
)
snapshot = [
{"amount": float(a.assessed_amount), "reason": a.reason}
for a in snap_result.scalars().all()
]
report = ExportReport(
case_id=case_id,
report_type=body.report_type,
file_path=relative,
version=version,
content_snapshot={"assessments": snapshot},
)
db.add(report)
await db.flush()
await db.refresh(report)
return report
async def _gen_excel(case_id: UUID, report_dir: Path, db: AsyncSession) -> Path:
from openpyxl import Workbook
wb = Workbook()
# Sheet 1: Summary
ws = wb.active
ws.title = "被骗金额汇总"
ws.append(["交易时间", "金额(元)", "方向", "对方", "来源APP", "备注", "置信度", "认定理由"])
assessments_result = await db.execute(
select(FraudAssessment).where(
FraudAssessment.case_id == case_id,
FraudAssessment.review_status == ReviewStatus.confirmed,
)
)
for a in assessments_result.scalars().all():
tx = await db.get(TransactionRecord, a.transaction_id)
if tx:
ws.append([
tx.trade_time.strftime("%Y-%m-%d %H:%M:%S"),
float(a.assessed_amount),
"支出" if tx.direction.value == "out" else "收入",
tx.counterparty_name,
tx.source_app.value,
tx.remark,
tx.confidence,
a.reason[:100],
])
# Sheet 2: All transactions
ws2 = wb.create_sheet("交易明细")
ws2.append(["交易时间", "金额", "方向", "对方", "来源", "订单号", "是否重复", "是否中转"])
tx_result = await db.execute(
select(TransactionRecord).where(TransactionRecord.case_id == case_id)
)
for tx in tx_result.scalars().all():
ws2.append([
tx.trade_time.strftime("%Y-%m-%d %H:%M:%S"),
float(tx.amount),
tx.direction.value,
tx.counterparty_name,
tx.source_app.value,
tx.order_no,
"" if tx.is_duplicate else "",
"" if tx.is_transit else "",
])
file_path = report_dir / f"report_{uuid.uuid4().hex[:8]}.xlsx"
wb.save(file_path)
return file_path
async def _gen_word(case_id: UUID, report_dir: Path, db: AsyncSession) -> Path:
from docx import Document
doc = Document()
doc.add_heading("受害人被骗金额汇总报告", level=1)
assessments_result = await db.execute(
select(FraudAssessment).where(
FraudAssessment.case_id == case_id,
FraudAssessment.review_status == ReviewStatus.confirmed,
)
)
confirmed = list(assessments_result.scalars().all())
total = sum(float(a.assessed_amount) for a in confirmed)
doc.add_paragraph(f"已确认被骗金额: ¥{total:,.2f}")
doc.add_paragraph(f"已确认交易笔数: {len(confirmed)}")
table = doc.add_table(rows=1, cols=4)
table.style = "Table Grid"
hdr = table.rows[0].cells
hdr[0].text = "交易时间"
hdr[1].text = "金额(元)"
hdr[2].text = "对方"
hdr[3].text = "认定理由"
for a in confirmed:
tx = await db.get(TransactionRecord, a.transaction_id)
row = table.add_row().cells
row[0].text = tx.trade_time.strftime("%Y-%m-%d %H:%M") if tx else ""
row[1].text = f"{float(a.assessed_amount):,.2f}"
row[2].text = tx.counterparty_name if tx else ""
row[3].text = a.reason[:80]
file_path = report_dir / f"report_{uuid.uuid4().hex[:8]}.docx"
doc.save(file_path)
return file_path
async def _gen_pdf_placeholder(case_id: UUID, report_dir: Path) -> Path:
file_path = report_dir / f"report_{uuid.uuid4().hex[:8]}.pdf"
file_path.write_text("PDF report placeholder integrate weasyprint/reportlab for production.")
return file_path