backend/app/services/llm/claude_vision.py

"""Anthropic Claude Vision provider."""

import base64
import json
import re
from anthropic import AsyncAnthropic

from app.config import get_settings
from app.schemas.transaction import TransactionExtractItem
from app.services.llm.base import BaseLLMProvider
from app.prompts.extract_transaction import get_extract_messages
from app.services.llm.openai_vision import _parse_json_array


class ClaudeVisionProvider(BaseLLMProvider):
    async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
        settings = get_settings()
        if not settings.anthropic_api_key:
            raise ValueError("ANTHROPIC_API_KEY is not set")
        client = AsyncAnthropic(api_key=settings.anthropic_api_key)
        b64 = base64.standard_b64encode(image_bytes).decode("ascii")
        messages = get_extract_messages(b64)
        # Claude API: user message with content block list
        user_content = messages[1]["content"]
        content_blocks = []
        for block in user_content:
            if block["type"] == "text":
                content_blocks.append({"type": "text", "text": block["text"]})
            elif block["type"] == "image_url":
                # Claude expects base64 without data URL prefix
                content_blocks.append({
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": "image/jpeg",
                        "data": block["image_url"]["url"].split(",", 1)[-1],
                    },
                })
        response = await client.messages.create(
            model=settings.anthropic_model,
            max_tokens=4096,
            system=messages[0]["content"],
            messages=[{"role": "user", "content": content_blocks}],
        )
        text = ""
        for block in response.content:
            if hasattr(block, "text"):
                text += block.text
        return _parse_json_array(text or "[]")
first commit 2026-03-09 14:46:56 +08:00			`"""Anthropic Claude Vision provider."""`

			`import base64`
			`import json`
			`import re`
			`from anthropic import AsyncAnthropic`

			`from app.config import get_settings`
			`from app.schemas.transaction import TransactionExtractItem`
			`from app.services.llm.base import BaseLLMProvider`
			`from app.prompts.extract_transaction import get_extract_messages`
			`from app.services.llm.openai_vision import _parse_json_array`


			`class ClaudeVisionProvider(BaseLLMProvider):`
			`async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:`
			`settings = get_settings()`
			`if not settings.anthropic_api_key:`
			`raise ValueError("ANTHROPIC_API_KEY is not set")`
			`client = AsyncAnthropic(api_key=settings.anthropic_api_key)`
			`b64 = base64.standard_b64encode(image_bytes).decode("ascii")`
			`messages = get_extract_messages(b64)`
			`# Claude API: user message with content block list`
			`user_content = messages[1]["content"]`
			`content_blocks = []`
			`for block in user_content:`
			`if block["type"] == "text":`
			`content_blocks.append({"type": "text", "text": block["text"]})`
			`elif block["type"] == "image_url":`
			`# Claude expects base64 without data URL prefix`
			`content_blocks.append({`
			`"type": "image",`
			`"source": {`
			`"type": "base64",`
			`"media_type": "image/jpeg",`
			`"data": block["image_url"]["url"].split(",", 1)[-1],`
			`},`
			`})`
			`response = await client.messages.create(`
			`model=settings.anthropic_model,`
			`max_tokens=4096,`
			`system=messages[0]["content"],`
			`messages=[{"role": "user", "content": content_blocks}],`
			`)`
			`text = ""`
			`for block in response.content:`
			`if hasattr(block, "text"):`
			`text += block.text`
			`return _parse_json_array(text or "[]")`