35 lines
1.2 KiB
Python
35 lines
1.2 KiB
Python
"""DeepSeek Vision provider (uses OpenAI-compatible API)."""
|
|
|
|
import base64
|
|
from openai import AsyncOpenAI
|
|
|
|
from app.config import get_settings
|
|
from app.schemas.transaction import TransactionExtractItem
|
|
from app.services.llm.base import BaseLLMProvider
|
|
from app.prompts.extract_transaction import get_extract_messages
|
|
from app.services.llm.openai_vision import _parse_json_array
|
|
|
|
|
|
# DeepSeek vision endpoint (OpenAI-compatible)
|
|
DEEPSEEK_BASE = "https://api.deepseek.com"
|
|
|
|
|
|
class DeepSeekVisionProvider(BaseLLMProvider):
|
|
async def extract_from_image(self, image_bytes: bytes) -> list[TransactionExtractItem]:
|
|
settings = get_settings()
|
|
if not settings.deepseek_api_key:
|
|
raise ValueError("DEEPSEEK_API_KEY is not set")
|
|
client = AsyncOpenAI(
|
|
api_key=settings.deepseek_api_key,
|
|
base_url=DEEPSEEK_BASE,
|
|
)
|
|
b64 = base64.standard_b64encode(image_bytes).decode("ascii")
|
|
messages = get_extract_messages(b64)
|
|
response = await client.chat.completions.create(
|
|
model=settings.deepseek_model,
|
|
messages=messages,
|
|
max_tokens=4096,
|
|
)
|
|
text = response.choices[0].message.content or "[]"
|
|
return _parse_json_array(text)
|