first commit

This commit is contained in:
2026-03-09 14:46:56 +08:00
commit 62236eb80e
63 changed files with 6143 additions and 0 deletions

View File

@@ -0,0 +1,16 @@
# LLM providers
from app.services.llm.base import BaseLLMProvider
from app.services.llm.router import get_llm_provider
from app.services.llm.openai_vision import OpenAIVisionProvider
from app.services.llm.claude_vision import ClaudeVisionProvider
from app.services.llm.deepseek_vision import DeepSeekVisionProvider
from app.services.llm.custom_openai_vision import CustomOpenAICompatibleProvider
__all__ = [
"BaseLLMProvider",
"get_llm_provider",
"OpenAIVisionProvider",
"ClaudeVisionProvider",
"DeepSeekVisionProvider",
"CustomOpenAICompatibleProvider",
]

View File

@@ -0,0 +1,18 @@
"""Base LLM provider - abstract interface for vision extraction."""
from abc import ABC, abstractmethod
from app.schemas.transaction import TransactionExtractItem
class BaseLLMProvider(ABC):
"""Abstract base for LLM vision providers. Each provider implements extract_from_image."""
@abstractmethod
async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
"""
Analyze a billing screenshot and return structured transaction list.
:param image_bytes: Raw image file content (PNG/JPEG)
:return: List of extracted transactions (may be empty or partial on failure)
"""
pass

View File

@@ -0,0 +1,49 @@
"""Anthropic Claude Vision provider."""
import base64
import json
import re
from anthropic import AsyncAnthropic
from app.config import get_settings
from app.schemas.transaction import TransactionExtractItem
from app.services.llm.base import BaseLLMProvider
from app.prompts.extract_transaction import get_extract_messages
from app.services.llm.openai_vision import _parse_json_array
class ClaudeVisionProvider(BaseLLMProvider):
async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
settings = get_settings()
if not settings.anthropic_api_key:
raise ValueError("ANTHROPIC_API_KEY is not set")
client = AsyncAnthropic(api_key=settings.anthropic_api_key)
b64 = base64.standard_b64encode(image_bytes).decode("ascii")
messages = get_extract_messages(b64)
# Claude API: user message with content block list
user_content = messages[1]["content"]
content_blocks = []
for block in user_content:
if block["type"] == "text":
content_blocks.append({"type": "text", "text": block["text"]})
elif block["type"] == "image_url":
# Claude expects base64 without data URL prefix
content_blocks.append({
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": block["image_url"]["url"].split(",", 1)[-1],
},
})
response = await client.messages.create(
model=settings.anthropic_model,
max_tokens=4096,
system=messages[0]["content"],
messages=[{"role": "user", "content": content_blocks}],
)
text = ""
for block in response.content:
if hasattr(block, "text"):
text += block.text
return _parse_json_array(text or "[]")

View File

@@ -0,0 +1,32 @@
"""Custom OpenAI-compatible vision provider."""
import base64
from openai import AsyncOpenAI
from app.config import get_settings
from app.schemas.transaction import TransactionExtractItem
from app.services.llm.base import BaseLLMProvider
from app.prompts.extract_transaction import get_extract_messages
from app.services.llm.openai_vision import _parse_json_array
class CustomOpenAICompatibleProvider(BaseLLMProvider):
async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
settings = get_settings()
if not settings.custom_openai_api_key:
raise ValueError("CUSTOM_OPENAI_API_KEY is not set")
if not settings.custom_openai_base_url:
raise ValueError("CUSTOM_OPENAI_BASE_URL is not set")
client = AsyncOpenAI(
api_key=settings.custom_openai_api_key,
base_url=settings.custom_openai_base_url,
)
b64 = base64.standard_b64encode(image_bytes).decode("ascii")
messages = get_extract_messages(b64)
response = await client.chat.completions.create(
model=settings.custom_openai_model,
messages=messages,
max_tokens=4096,
)
text = response.choices[0].message.content or "[]"
return _parse_json_array(text)

View File

@@ -0,0 +1,34 @@
"""DeepSeek Vision provider (uses OpenAI-compatible API)."""
import base64
from openai import AsyncOpenAI
from app.config import get_settings
from app.schemas.transaction import TransactionExtractItem
from app.services.llm.base import BaseLLMProvider
from app.prompts.extract_transaction import get_extract_messages
from app.services.llm.openai_vision import _parse_json_array
# DeepSeek vision endpoint (OpenAI-compatible)
DEEPSEEK_BASE = "https://api.deepseek.com"
class DeepSeekVisionProvider(BaseLLMProvider):
async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
settings = get_settings()
if not settings.deepseek_api_key:
raise ValueError("DEEPSEEK_API_KEY is not set")
client = AsyncOpenAI(
api_key=settings.deepseek_api_key,
base_url=DEEPSEEK_BASE,
)
b64 = base64.standard_b64encode(image_bytes).decode("ascii")
messages = get_extract_messages(b64)
response = await client.chat.completions.create(
model=settings.deepseek_model,
messages=messages,
max_tokens=4096,
)
text = response.choices[0].message.content or "[]"
return _parse_json_array(text)

View File

@@ -0,0 +1,56 @@
"""OpenAI Vision provider (GPT-4o)."""
import base64
import json
import re
from openai import AsyncOpenAI
from app.config import get_settings
from app.schemas.transaction import TransactionExtractItem
from app.services.llm.base import BaseLLMProvider
from app.prompts.extract_transaction import get_extract_messages
class OpenAIVisionProvider(BaseLLMProvider):
async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
settings = get_settings()
if not settings.openai_api_key:
raise ValueError("OPENAI_API_KEY is not set")
client = AsyncOpenAI(api_key=settings.openai_api_key)
b64 = base64.standard_b64encode(image_bytes).decode("ascii")
messages = get_extract_messages(b64)
response = await client.chat.completions.create(
model=settings.openai_model,
messages=messages,
max_tokens=4096,
)
text = response.choices[0].message.content or "[]"
return _parse_json_array(text)
def _parse_json_array(text: str) -> list[TransactionExtractItem]:
"""Parse LLM response into list of TransactionExtractItem. Tolerates markdown and extra text."""
text = text.strip()
# Remove optional markdown code block
if text.startswith("```"):
text = re.sub(r"^```(?:json)?\s*", "", text)
text = re.sub(r"\s*```\s*$", "", text)
try:
data = json.loads(text)
except json.JSONDecodeError:
return []
if not isinstance(data, list):
return []
result: list[TransactionExtractItem] = []
for item in data:
if not isinstance(item, dict):
continue
try:
# Normalize transaction_time: allow string -> datetime
if isinstance(item.get("transaction_time"), str) and item["transaction_time"]:
from dateutil import parser as date_parser
item["transaction_time"] = date_parser.isoparse(item["transaction_time"])
result.append(TransactionExtractItem.model_validate(item))
except Exception:
continue
return result

View File

@@ -0,0 +1,22 @@
"""LLM provider factory - returns provider by config."""
from app.config import get_settings
from app.services.llm.base import BaseLLMProvider
from app.services.llm.openai_vision import OpenAIVisionProvider
from app.services.llm.claude_vision import ClaudeVisionProvider
from app.services.llm.deepseek_vision import DeepSeekVisionProvider
from app.services.llm.custom_openai_vision import CustomOpenAICompatibleProvider
def get_llm_provider() -> BaseLLMProvider:
settings = get_settings()
provider = (settings.llm_provider or "openai").lower()
if provider == "openai":
return OpenAIVisionProvider()
if provider == "anthropic":
return ClaudeVisionProvider()
if provider == "deepseek":
return DeepSeekVisionProvider()
if provider == "custom_openai":
return CustomOpenAICompatibleProvider()
return OpenAIVisionProvider()