fix/ynab-transaction-limit #16

Merged
ryan merged 3 commits from fix/ynab-transaction-limit into main 2026-04-04 08:14:30 -04:00
Showing only changes of commit 142fac3a84 - Show all commits

View File

@@ -79,34 +79,45 @@ def describe_simba_image(input):
async def analyze_user_image(file_bytes: bytes) -> str:
"""Analyze an image uploaded by a user and return a text description.
Uses Ollama vision model to describe the image contents.
Works with JPEG, PNG, WebP bytes (HEIC should be converted before calling).
Uses llama-server (OpenAI-compatible API) with vision support.
Falls back to OpenAI if llama-server is not configured.
"""
import tempfile
import base64
# Write to temp file since ollama client expects a file path
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as f:
f.write(file_bytes)
temp_path = f.name
from openai import AsyncOpenAI
try:
response = client.chat(
model="gemma3:4b",
messages=[
{
"role": "system",
"content": "You are a helpful image analyst. Describe what you see in the image in detail. Be thorough but concise.",
},
{
"role": "user",
"content": "Please describe this image in detail.",
"images": [temp_path],
},
],
)
return response["message"]["content"]
finally:
os.remove(temp_path)
llama_url = os.getenv("LLAMA_SERVER_URL")
if llama_url:
aclient = AsyncOpenAI(base_url=llama_url, api_key="not-needed")
model = os.getenv("LLAMA_MODEL_NAME", "llama-3.1-8b-instruct")
else:
aclient = AsyncOpenAI()
model = "gpt-4o-mini"
b64 = base64.b64encode(file_bytes).decode("utf-8")
response = await aclient.chat.completions.create(
model=model,
messages=[
{
"role": "system",
"content": "You are a helpful image analyst. Describe what you see in the image in detail. Be thorough but concise.",
},
{
"role": "user",
"content": [
{"type": "text", "text": "Please describe this image in detail."},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{b64}",
},
},
],
},
],
)
return response.choices[0].message.content
if __name__ == "__main__":