Files
fund-tracer/backend/app/services/llm/claude_vision.py
2026-03-09 14:46:56 +08:00

50 lines
1.9 KiB
Python

"""Anthropic Claude Vision provider."""
import base64
import json
import re
from anthropic import AsyncAnthropic
from app.config import get_settings
from app.schemas.transaction import TransactionExtractItem
from app.services.llm.base import BaseLLMProvider
from app.prompts.extract_transaction import get_extract_messages
from app.services.llm.openai_vision import _parse_json_array
class ClaudeVisionProvider(BaseLLMProvider):
async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
settings = get_settings()
if not settings.anthropic_api_key:
raise ValueError("ANTHROPIC_API_KEY is not set")
client = AsyncAnthropic(api_key=settings.anthropic_api_key)
b64 = base64.standard_b64encode(image_bytes).decode("ascii")
messages = get_extract_messages(b64)
# Claude API: user message with content block list
user_content = messages[1]["content"]
content_blocks = []
for block in user_content:
if block["type"] == "text":
content_blocks.append({"type": "text", "text": block["text"]})
elif block["type"] == "image_url":
# Claude expects base64 without data URL prefix
content_blocks.append({
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": block["image_url"]["url"].split(",", 1)[-1],
},
})
response = await client.messages.create(
model=settings.anthropic_model,
max_tokens=4096,
system=messages[0]["content"],
messages=[{"role": "user", "content": content_blocks}],
)
text = ""
for block in response.content:
if hasattr(block, "text"):
text += block.text
return _parse_json_array(text or "[]")