Switch image analysis from Ollama to llama-server

Use the same llama-server (OpenAI-compatible API) for vision analysis that the main agent uses, with OpenAI fallback. Sends images as base64 in the standard OpenAI vision message format. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-04 08:06:51 -04:00
parent 0415610d64
commit 142fac3a84
1 changed files with 36 additions and 25 deletions
@@ -79,19 +79,25 @@ def describe_simba_image(input):
 async def analyze_user_image(file_bytes: bytes) -> str:
    """Analyze an image uploaded by a user and return a text description.
-    Uses Ollama vision model to describe the image contents.
+    Uses llama-server (OpenAI-compatible API) with vision support.
-    Works with JPEG, PNG, WebP bytes (HEIC should be converted before calling).
+    Falls back to OpenAI if llama-server is not configured.
    """
-    import tempfile
+    import base64
-    # Write to temp file since ollama client expects a file path
+    from openai import AsyncOpenAI
    with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as f:
        f.write(file_bytes)
        temp_path = f.name
-    try:
+    llama_url = os.getenv("LLAMA_SERVER_URL")
-        response = client.chat(
+    if llama_url:
-            model="gemma3:4b",
+        aclient = AsyncOpenAI(base_url=llama_url, api_key="not-needed")
        model = os.getenv("LLAMA_MODEL_NAME", "llama-3.1-8b-instruct")
    else:
        aclient = AsyncOpenAI()
        model = "gpt-4o-mini"
    b64 = base64.b64encode(file_bytes).decode("utf-8")
    response = await aclient.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "system",
@@ -99,14 +105,19 @@ async def analyze_user_image(file_bytes: bytes) -> str:
            },
            {
                "role": "user",
-                    "content": "Please describe this image in detail.",
+                "content": [
-                    "images": [temp_path],
+                    {"type": "text", "text": "Please describe this image in detail."},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{b64}",
                        },
                    },
                ],
            },
        ],
    )
-        return response["message"]["content"]
+    return response.choices[0].message.content
    finally:
        os.remove(temp_path)
 if __name__ == "__main__":