Add image upload and vision analysis to Ask Simba chat

Users can now attach images in the web chat for Simba to analyze using Ollama's gemma3 vision model. Images are stored in Garage (S3-compatible) and displayed in chat history. Also fixes aerich migration config by extracting TORTOISE_CONFIG into a standalone config/db.py module, removing the stale aerich_config.py, and adding missing MODELS_STATE to migration 3. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-04 08:03:19 -04:00
parent ac9c821ec7
commit 0415610d64
17 changed files with 501 additions and 58 deletions
@@ -76,6 +76,39 @@ def describe_simba_image(input):
    return result


+async def analyze_user_image(file_bytes: bytes) -> str:
+    """Analyze an image uploaded by a user and return a text description.
+
+    Uses Ollama vision model to describe the image contents.
+    Works with JPEG, PNG, WebP bytes (HEIC should be converted before calling).
+    """
+    import tempfile
+
+    # Write to temp file since ollama client expects a file path
+    with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as f:
+        f.write(file_bytes)
+        temp_path = f.name
+
+    try:
+        response = client.chat(
+            model="gemma3:4b",
+            messages=[
+                {
+                    "role": "system",
+                    "content": "You are a helpful image analyst. Describe what you see in the image in detail. Be thorough but concise.",
+                },
+                {
+                    "role": "user",
+                    "content": "Please describe this image in detail.",
+                    "images": [temp_path],
+                },
+            ],
+        )
+        return response["message"]["content"]
+    finally:
+        os.remove(temp_path)
+
+
 if __name__ == "__main__":
    args = parser.parse_args()
    if args.filepath:
@@ -0,0 +1,62 @@
+import io
+import logging
+
+from PIL import Image
+from pillow_heif import register_heif_opener
+
+register_heif_opener()
+
+logging.basicConfig(level=logging.INFO)
+
+ALLOWED_TYPES = {"image/jpeg", "image/png", "image/webp", "image/heic", "image/heif"}
+MAX_DIMENSION = 1920
+
+
+class ImageValidationError(Exception):
+    pass
+
+
+def process_image(file_bytes: bytes, content_type: str) -> tuple[bytes, str]:
+    """Validate, resize, and strip EXIF from an uploaded image.
+
+    Returns processed bytes and the output content type (always image/jpeg or image/png or image/webp).
+    """
+    if content_type not in ALLOWED_TYPES:
+        raise ImageValidationError(
+            f"Unsupported image type: {content_type}. "
+            f"Allowed: JPEG, PNG, WebP, HEIC"
+        )
+
+    img = Image.open(io.BytesIO(file_bytes))
+
+    # Resize if too large
+    width, height = img.size
+    if max(width, height) > MAX_DIMENSION:
+        ratio = MAX_DIMENSION / max(width, height)
+        new_size = (int(width * ratio), int(height * ratio))
+        img = img.resize(new_size, Image.LANCZOS)
+        logging.info(
+            f"Resized image from {width}x{height} to {new_size[0]}x{new_size[1]}"
+        )
+
+    # Strip EXIF by copying pixel data to a new image
+    clean_img = Image.new(img.mode, img.size)
+    clean_img.putdata(list(img.getdata()))
+
+    # Convert HEIC/HEIF to JPEG; otherwise keep original format
+    if content_type in {"image/heic", "image/heif"}:
+        output_format = "JPEG"
+        output_content_type = "image/jpeg"
+    elif content_type == "image/png":
+        output_format = "PNG"
+        output_content_type = "image/png"
+    elif content_type == "image/webp":
+        output_format = "WEBP"
+        output_content_type = "image/webp"
+    else:
+        output_format = "JPEG"
+        output_content_type = "image/jpeg"
+
+    buf = io.BytesIO()
+    clean_img.save(buf, format=output_format, quality=85)
+    return buf.getvalue(), output_content_type
@@ -0,0 +1,53 @@
+import os
+import logging
+
+import aioboto3
+from dotenv import load_dotenv
+
+load_dotenv()
+
+logging.basicConfig(level=logging.INFO)
+
+S3_ENDPOINT_URL = os.getenv("S3_ENDPOINT_URL")
+S3_ACCESS_KEY_ID = os.getenv("S3_ACCESS_KEY_ID")
+S3_SECRET_ACCESS_KEY = os.getenv("S3_SECRET_ACCESS_KEY")
+S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME", "asksimba-images")
+S3_REGION = os.getenv("S3_REGION", "garage")
+
+session = aioboto3.Session()
+
+
+def _get_client():
+    return session.client(
+        "s3",
+        endpoint_url=S3_ENDPOINT_URL,
+        aws_access_key_id=S3_ACCESS_KEY_ID,
+        aws_secret_access_key=S3_SECRET_ACCESS_KEY,
+        region_name=S3_REGION,
+    )
+
+
+async def upload_image(file_bytes: bytes, key: str, content_type: str) -> str:
+    async with _get_client() as client:
+        await client.put_object(
+            Bucket=S3_BUCKET_NAME,
+            Key=key,
+            Body=file_bytes,
+            ContentType=content_type,
+        )
+    logging.info(f"Uploaded image to S3: {key}")
+    return key
+
+
+async def get_image(key: str) -> tuple[bytes, str]:
+    async with _get_client() as client:
+        response = await client.get_object(Bucket=S3_BUCKET_NAME, Key=key)
+        body = await response["Body"].read()
+        content_type = response.get("ContentType", "image/jpeg")
+    return body, content_type
+
+
+async def delete_image(key: str) -> None:
+    async with _get_client() as client:
+        await client.delete_object(Bucket=S3_BUCKET_NAME, Key=key)
+    logging.info(f"Deleted image from S3: {key}")