Configure ollama to use external host instead of docker service

- Update all ollama clients to use configurable OLLAMA_URL environment variable - Remove ollama service from docker-compose.yml to use external ollama instance - Configure docker-compose to connect to host ollama via 172.17.0.1:11434 (Linux) or host.docker.internal (macOS/Windows) - Add cross-platform compatibility with extra_hosts mapping - Update embedding function fallback URL for consistency 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-02 20:29:48 -04:00
parent a640ae5fed
commit 03b033e9a4
5 changed files with 18 additions and 18 deletions
--- a/chunker.py
+++ b/chunker.py
@@ -81,7 +81,7 @@ class Chunk:

 class Chunker:
    embedding_fx = OllamaEmbeddingFunction(
-        url=os.getenv("OLLAMA_URL", ""),
+        url=os.getenv("OLLAMA_URL", "http://localhost:11434"),
        model_name="mxbai-embed-large",
    )

--- a/cleaner.py
+++ b/cleaner.py
@@ -12,6 +12,9 @@ from request import PaperlessNGXService

 load_dotenv()

+# Configure ollama client with URL from environment or default to localhost
+ollama_client = ollama.Client(host=os.getenv("OLLAMA_URL", "http://localhost:11434"))
+
 parser = argparse.ArgumentParser(description="use llm to clean documents")
 parser.add_argument("document_id", type=str, help="questions about simba's health")

@@ -131,7 +134,7 @@ Someone will kill the innocent kittens if you don't extract the text exactly. So


 def summarize_pdf_image(filepaths: list[str]):
-    res = ollama.chat(
+    res = ollama_client.chat(
        model="gemma3:4b",
        messages=[
            {
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -8,28 +8,18 @@ services:
    environment:
      - PAPERLESS_TOKEN=${PAPERLESS_TOKEN}
      - BASE_URL=${BASE_URL}
-      - OLLAMA_URL=${OLLAMA_URL:-http://ollama:11434}
+      - OLLAMA_URL=${OLLAMA_URL:-http://172.17.0.1:11434}
      - CHROMADB_PATH=/app/chromadb
      - OPENAI_API_KEY=${OPENAI_API_KEY}
    volumes:
      - chromadb_data:/app/chromadb
-    depends_on:
-      - ollama
-    networks:
-      - raggr-network
-
-  ollama:
-    image: ollama/ollama:latest
-    ports:
-      - "11434:11434"
-    volumes:
-      - ollama_data:/root/.ollama
    networks:
      - raggr-network
+    extra_hosts:
+      - "host.docker.internal:host-gateway"

 volumes:
  chromadb_data:
-  ollama_data:

 networks:
  raggr-network:
--- a/main.py
+++ b/main.py
@@ -18,6 +18,9 @@ from dotenv import load_dotenv

 load_dotenv()

+# Configure ollama client with URL from environment or default to localhost
+ollama_client = ollama.Client(host=os.getenv("OLLAMA_URL", "http://localhost:11434"))
+
 client = chromadb.PersistentClient(path=os.getenv("CHROMADB_PATH", ""))
 simba_docs = client.get_or_create_collection(name="simba_docs")
 feline_vet_lookup = client.get_or_create_collection(name="feline_vet_lookup")
@@ -128,7 +131,7 @@ def consult_oracle(input: str, collection):
    # Generate
    print("Starting LLM generation")
    llm_start = time.time()
-    # output = ollama.generate(
+    # output = ollama_client.generate(
        # model="gemma3n:e4b",
        # prompt=f"You are a helpful assistant that understandings veterinary terms. Using the following data, help answer the user's query by providing as many details as possible.  Using this data: {results}. Respond to this prompt: {input}",
    # )
--- a/query.py
+++ b/query.py
@@ -1,12 +1,16 @@
 import json
+import os
 from typing import Literal
 import datetime
-from ollama import chat, ChatResponse
+from ollama import chat, ChatResponse, Client

 from openai import OpenAI

 from pydantic import BaseModel, Field

+# Configure ollama client with URL from environment or default to localhost
+ollama_client = Client(host=os.getenv("OLLAMA_URL", "http://localhost:11434"))
+
 # This uses inferred filters — which means using LLM to create the metadata filters


@@ -109,7 +113,7 @@ class QueryGenerator:
        print(response)
        query = json.loads(response.output_parsed.extracted_metadata_fields)

-        # response: ChatResponse = chat(
+        # response: ChatResponse = ollama_client.chat(
            # model="gemma3n:e4b",
            # messages=[
                # {"role": "system", "content": PROMPT},