Users can now attach images in the web chat for Simba to analyze using Ollama's gemma3 vision model. Images are stored in Garage (S3-compatible) and displayed in chat history. Also fixes aerich migration config by extracting TORTOISE_CONFIG into a standalone config/db.py module, removing the stale aerich_config.py, and adding missing MODELS_STATE to migration 3. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
117 lines
3.7 KiB
Python
117 lines
3.7 KiB
Python
from ollama import Client
|
|
import argparse
|
|
import os
|
|
import logging
|
|
from PIL import Image, ExifTags
|
|
from pillow_heif import register_heif_opener
|
|
from pydantic import BaseModel
|
|
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv()
|
|
|
|
register_heif_opener()
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
|
|
parser = argparse.ArgumentParser(
|
|
prog="SimbaImageProcessor",
|
|
description="What the program does",
|
|
epilog="Text at the bottom of help",
|
|
)
|
|
|
|
parser.add_argument("filepath")
|
|
|
|
client = Client(host=os.getenv("OLLAMA_HOST", "http://localhost:11434"))
|
|
|
|
|
|
class SimbaImageDescription(BaseModel):
|
|
image_date: str
|
|
description: str
|
|
|
|
|
|
def describe_simba_image(input):
|
|
logging.info("Opening image of Simba ...")
|
|
if "heic" in input.lower() or "heif" in input.lower():
|
|
new_filepath = input.split(".")[0] + ".jpg"
|
|
img = Image.open(input)
|
|
img.save(new_filepath, "JPEG")
|
|
logging.info("Extracting EXIF...")
|
|
exif = {
|
|
ExifTags.TAGS[k]: v for k, v in img.getexif().items() if k in ExifTags.TAGS
|
|
}
|
|
img = Image.open(new_filepath)
|
|
input = new_filepath
|
|
else:
|
|
img = Image.open(input)
|
|
|
|
logging.info("Extracting EXIF...")
|
|
exif = {
|
|
ExifTags.TAGS[k]: v for k, v in img.getexif().items() if k in ExifTags.TAGS
|
|
}
|
|
|
|
if "MakerNote" in exif:
|
|
exif.pop("MakerNote")
|
|
|
|
logging.info(exif)
|
|
|
|
prompt = f"Simba is an orange cat belonging to Ryan Chen. In 2025, they lived in New York. In 2024, they lived in California. Analyze the following image and tell me what Simba seems to be doing. Be extremely descriptive about Simba, things in the background, and the setting of the image. I will also include the EXIF data of the image, please use it to help you determine information about Simba. EXIF: {exif}. Put the notes in the description field and the date in the image_date field."
|
|
|
|
logging.info("Sending info to Ollama ...")
|
|
response = client.chat(
|
|
model="gemma3:4b",
|
|
messages=[
|
|
{
|
|
"role": "system",
|
|
"content": "you are a very shrewd and descriptive note taker. all of your responses will be formatted like notes in bullet points. be very descriptive. do not leave a single thing out.",
|
|
},
|
|
{"role": "user", "content": prompt, "images": [input]},
|
|
],
|
|
format=SimbaImageDescription.model_json_schema(),
|
|
)
|
|
|
|
result = SimbaImageDescription.model_validate_json(response["message"]["content"])
|
|
|
|
return result
|
|
|
|
|
|
async def analyze_user_image(file_bytes: bytes) -> str:
|
|
"""Analyze an image uploaded by a user and return a text description.
|
|
|
|
Uses Ollama vision model to describe the image contents.
|
|
Works with JPEG, PNG, WebP bytes (HEIC should be converted before calling).
|
|
"""
|
|
import tempfile
|
|
|
|
# Write to temp file since ollama client expects a file path
|
|
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as f:
|
|
f.write(file_bytes)
|
|
temp_path = f.name
|
|
|
|
try:
|
|
response = client.chat(
|
|
model="gemma3:4b",
|
|
messages=[
|
|
{
|
|
"role": "system",
|
|
"content": "You are a helpful image analyst. Describe what you see in the image in detail. Be thorough but concise.",
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "Please describe this image in detail.",
|
|
"images": [temp_path],
|
|
},
|
|
],
|
|
)
|
|
return response["message"]["content"]
|
|
finally:
|
|
os.remove(temp_path)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
args = parser.parse_args()
|
|
if args.filepath:
|
|
logging.info
|
|
describe_simba_image(input=args.filepath)
|