84 lines
2.6 KiB
Python
84 lines
2.6 KiB
Python
"""Transaction deduplication and matching engine.
|
|
|
|
Multi-layer strategy:
|
|
1. Exact order_no match
|
|
2. Amount + time-window + account-tail match
|
|
3. Fuzzy text similarity (placeholder for LLM-assisted matching)
|
|
"""
|
|
from uuid import UUID
|
|
from datetime import timedelta
|
|
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.models.transaction import TransactionRecord
|
|
from app.models.transaction_cluster import TransactionCluster
|
|
from app.repositories.transaction_repo import TransactionRepository
|
|
from app.rules.dedup_rules import is_duplicate_pair
|
|
from app.rules.transit_rules import is_self_transfer
|
|
|
|
|
|
async def run_matching(case_id: UUID, self_accounts: list[str], db: AsyncSession) -> None:
|
|
"""Execute the full dedup + transit-marking pipeline for a case."""
|
|
repo = TransactionRepository(db)
|
|
transactions = await repo.get_all_by_case(case_id)
|
|
|
|
if not transactions:
|
|
return
|
|
|
|
# reset flags
|
|
for tx in transactions:
|
|
tx.is_duplicate = False
|
|
tx.is_transit = False
|
|
tx.cluster_id = None
|
|
|
|
# ── Layer 1 & 2: dedup ──
|
|
matched: set[UUID] = set()
|
|
clusters: list[TransactionCluster] = []
|
|
|
|
for i, tx_a in enumerate(transactions):
|
|
if tx_a.id in matched:
|
|
continue
|
|
group = [tx_a]
|
|
for tx_b in transactions[i + 1:]:
|
|
if tx_b.id in matched:
|
|
continue
|
|
if is_duplicate_pair(tx_a, tx_b):
|
|
group.append(tx_b)
|
|
matched.add(tx_b.id)
|
|
|
|
if len(group) > 1:
|
|
primary = max(group, key=lambda t: t.confidence)
|
|
cluster = TransactionCluster(
|
|
case_id=case_id,
|
|
primary_tx_id=primary.id,
|
|
match_reason=_match_reason(primary, group),
|
|
)
|
|
db.add(cluster)
|
|
await db.flush()
|
|
|
|
for tx in group:
|
|
tx.cluster_id = cluster.id
|
|
if tx.id != primary.id:
|
|
tx.is_duplicate = True
|
|
clusters.append(cluster)
|
|
|
|
# ── Layer 3: transit detection ──
|
|
for tx in transactions:
|
|
if tx.is_duplicate:
|
|
continue
|
|
if is_self_transfer(tx, self_accounts):
|
|
tx.is_transit = True
|
|
|
|
await db.flush()
|
|
|
|
|
|
def _match_reason(primary: TransactionRecord, group: list[TransactionRecord]) -> str:
|
|
reasons: list[str] = []
|
|
orders = {tx.order_no for tx in group if tx.order_no}
|
|
if len(orders) == 1:
|
|
reasons.append("订单号一致")
|
|
amounts = {float(tx.amount) for tx in group}
|
|
if len(amounts) == 1:
|
|
reasons.append("金额一致")
|
|
return "; ".join(reasons) if reasons else "时间和金额近似"
|