"""Anthropic Claude Vision provider.""" import base64 import json import re from anthropic import AsyncAnthropic from app.config import get_settings from app.schemas.transaction import TransactionExtractItem from app.services.llm.base import BaseLLMProvider from app.prompts.extract_transaction import get_extract_messages from app.services.llm.openai_vision import _parse_json_array class ClaudeVisionProvider(BaseLLMProvider): async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]: settings = get_settings() if not settings.anthropic_api_key: raise ValueError("ANTHROPIC_API_KEY is not set") client = AsyncAnthropic(api_key=settings.anthropic_api_key) b64 = base64.standard_b64encode(image_bytes).decode("ascii") messages = get_extract_messages(b64) # Claude API: user message with content block list user_content = messages[1]["content"] content_blocks = [] for block in user_content: if block["type"] == "text": content_blocks.append({"type": "text", "text": block["text"]}) elif block["type"] == "image_url": # Claude expects base64 without data URL prefix content_blocks.append({ "type": "image", "source": { "type": "base64", "media_type": "image/jpeg", "data": block["image_url"]["url"].split(",", 1)[-1], }, }) response = await client.messages.create( model=settings.anthropic_model, max_tokens=4096, system=messages[0]["content"], messages=[{"role": "user", "content": content_blocks}], ) text = "" for block in response.content: if hasattr(block, "text"): text += block.text return _parse_json_array(text or "[]")