From 142fac3a845804c3476bb96d30e89f05d4375c6c Mon Sep 17 00:00:00 2001 From: Ryan Chen Date: Sat, 4 Apr 2026 08:06:51 -0400 Subject: [PATCH] Switch image analysis from Ollama to llama-server Use the same llama-server (OpenAI-compatible API) for vision analysis that the main agent uses, with OpenAI fallback. Sends images as base64 in the standard OpenAI vision message format. Co-Authored-By: Claude Opus 4.6 --- utils/image_process.py | 61 +++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 25 deletions(-) diff --git a/utils/image_process.py b/utils/image_process.py index 9bf14e8..3da666a 100644 --- a/utils/image_process.py +++ b/utils/image_process.py @@ -79,34 +79,45 @@ def describe_simba_image(input): async def analyze_user_image(file_bytes: bytes) -> str: """Analyze an image uploaded by a user and return a text description. - Uses Ollama vision model to describe the image contents. - Works with JPEG, PNG, WebP bytes (HEIC should be converted before calling). + Uses llama-server (OpenAI-compatible API) with vision support. + Falls back to OpenAI if llama-server is not configured. """ - import tempfile + import base64 - # Write to temp file since ollama client expects a file path - with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as f: - f.write(file_bytes) - temp_path = f.name + from openai import AsyncOpenAI - try: - response = client.chat( - model="gemma3:4b", - messages=[ - { - "role": "system", - "content": "You are a helpful image analyst. Describe what you see in the image in detail. Be thorough but concise.", - }, - { - "role": "user", - "content": "Please describe this image in detail.", - "images": [temp_path], - }, - ], - ) - return response["message"]["content"] - finally: - os.remove(temp_path) + llama_url = os.getenv("LLAMA_SERVER_URL") + if llama_url: + aclient = AsyncOpenAI(base_url=llama_url, api_key="not-needed") + model = os.getenv("LLAMA_MODEL_NAME", "llama-3.1-8b-instruct") + else: + aclient = AsyncOpenAI() + model = "gpt-4o-mini" + + b64 = base64.b64encode(file_bytes).decode("utf-8") + + response = await aclient.chat.completions.create( + model=model, + messages=[ + { + "role": "system", + "content": "You are a helpful image analyst. Describe what you see in the image in detail. Be thorough but concise.", + }, + { + "role": "user", + "content": [ + {"type": "text", "text": "Please describe this image in detail."}, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{b64}", + }, + }, + ], + }, + ], + ) + return response.choices[0].message.content if __name__ == "__main__":