50 lines
1.9 KiB
Python
50 lines
1.9 KiB
Python
|
|
"""Anthropic Claude Vision provider."""
|
||
|
|
|
||
|
|
import base64
|
||
|
|
import json
|
||
|
|
import re
|
||
|
|
from anthropic import AsyncAnthropic
|
||
|
|
|
||
|
|
from app.config import get_settings
|
||
|
|
from app.schemas.transaction import TransactionExtractItem
|
||
|
|
from app.services.llm.base import BaseLLMProvider
|
||
|
|
from app.prompts.extract_transaction import get_extract_messages
|
||
|
|
from app.services.llm.openai_vision import _parse_json_array
|
||
|
|
|
||
|
|
|
||
|
|
class ClaudeVisionProvider(BaseLLMProvider):
|
||
|
|
async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
|
||
|
|
settings = get_settings()
|
||
|
|
if not settings.anthropic_api_key:
|
||
|
|
raise ValueError("ANTHROPIC_API_KEY is not set")
|
||
|
|
client = AsyncAnthropic(api_key=settings.anthropic_api_key)
|
||
|
|
b64 = base64.standard_b64encode(image_bytes).decode("ascii")
|
||
|
|
messages = get_extract_messages(b64)
|
||
|
|
# Claude API: user message with content block list
|
||
|
|
user_content = messages[1]["content"]
|
||
|
|
content_blocks = []
|
||
|
|
for block in user_content:
|
||
|
|
if block["type"] == "text":
|
||
|
|
content_blocks.append({"type": "text", "text": block["text"]})
|
||
|
|
elif block["type"] == "image_url":
|
||
|
|
# Claude expects base64 without data URL prefix
|
||
|
|
content_blocks.append({
|
||
|
|
"type": "image",
|
||
|
|
"source": {
|
||
|
|
"type": "base64",
|
||
|
|
"media_type": "image/jpeg",
|
||
|
|
"data": block["image_url"]["url"].split(",", 1)[-1],
|
||
|
|
},
|
||
|
|
})
|
||
|
|
response = await client.messages.create(
|
||
|
|
model=settings.anthropic_model,
|
||
|
|
max_tokens=4096,
|
||
|
|
system=messages[0]["content"],
|
||
|
|
messages=[{"role": "user", "content": content_blocks}],
|
||
|
|
)
|
||
|
|
text = ""
|
||
|
|
for block in response.content:
|
||
|
|
if hasattr(block, "text"):
|
||
|
|
text += block.text
|
||
|
|
return _parse_json_array(text or "[]")
|