diff --git a/chunker.py b/chunker.py
index ec84bbe..abe6ae2 100644
--- a/chunker.py
+++ b/chunker.py
@@ -81,7 +81,7 @@ class Chunk:
 
 class Chunker:
     embedding_fx = OllamaEmbeddingFunction(
-        url=os.getenv("OLLAMA_URL", ""),
+        url=os.getenv("OLLAMA_URL", "http://localhost:11434"),
         model_name="mxbai-embed-large",
     )
 
diff --git a/cleaner.py b/cleaner.py
index 0ff3436..764251f 100644
--- a/cleaner.py
+++ b/cleaner.py
@@ -12,6 +12,9 @@ from request import PaperlessNGXService
 
 load_dotenv()
 
+# Configure ollama client with URL from environment or default to localhost
+ollama_client = ollama.Client(host=os.getenv("OLLAMA_URL", "http://localhost:11434"))
+
 parser = argparse.ArgumentParser(description="use llm to clean documents")
 parser.add_argument("document_id", type=str, help="questions about simba's health")
 
@@ -131,7 +134,7 @@ Someone will kill the innocent kittens if you don't extract the text exactly. So
 
 
 def summarize_pdf_image(filepaths: list[str]):
-    res = ollama.chat(
+    res = ollama_client.chat(
         model="gemma3:4b",
         messages=[
             {
diff --git a/docker-compose.yml b/docker-compose.yml
index cd05d9a..8263a14 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -8,28 +8,18 @@ services:
     environment:
       - PAPERLESS_TOKEN=${PAPERLESS_TOKEN}
       - BASE_URL=${BASE_URL}
-      - OLLAMA_URL=${OLLAMA_URL:-http://ollama:11434}
+      - OLLAMA_URL=${OLLAMA_URL:-http://172.17.0.1:11434}
       - CHROMADB_PATH=/app/chromadb
       - OPENAI_API_KEY=${OPENAI_API_KEY}
     volumes:
       - chromadb_data:/app/chromadb
-    depends_on:
-      - ollama
-    networks:
-      - raggr-network
-
-  ollama:
-    image: ollama/ollama:latest
-    ports:
-      - "11434:11434"
-    volumes:
-      - ollama_data:/root/.ollama
     networks:
       - raggr-network
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
 
 volumes:
   chromadb_data:
-  ollama_data:
 
 networks:
   raggr-network:
diff --git a/main.py b/main.py
index 1070113..1d13463 100644
--- a/main.py
+++ b/main.py
@@ -18,6 +18,9 @@ from dotenv import load_dotenv
 
 load_dotenv()
 
+# Configure ollama client with URL from environment or default to localhost
+ollama_client = ollama.Client(host=os.getenv("OLLAMA_URL", "http://localhost:11434"))
+
 client = chromadb.PersistentClient(path=os.getenv("CHROMADB_PATH", ""))
 simba_docs = client.get_or_create_collection(name="simba_docs")
 feline_vet_lookup = client.get_or_create_collection(name="feline_vet_lookup")
@@ -128,7 +131,7 @@ def consult_oracle(input: str, collection):
     # Generate
     print("Starting LLM generation")
     llm_start = time.time()
-    # output = ollama.generate(
+    # output = ollama_client.generate(
         # model="gemma3n:e4b",
         # prompt=f"You are a helpful assistant that understandings veterinary terms. Using the following data, help answer the user's query by providing as many details as possible.  Using this data: {results}. Respond to this prompt: {input}",
     # )
diff --git a/query.py b/query.py
index 453ed85..1e29be5 100644
--- a/query.py
+++ b/query.py
@@ -1,12 +1,16 @@
 import json
+import os
 from typing import Literal
 import datetime
-from ollama import chat, ChatResponse
+from ollama import chat, ChatResponse, Client
 
 from openai import OpenAI
 
 from pydantic import BaseModel, Field
 
+# Configure ollama client with URL from environment or default to localhost
+ollama_client = Client(host=os.getenv("OLLAMA_URL", "http://localhost:11434"))
+
 # This uses inferred filters — which means using LLM to create the metadata filters
 
 
@@ -109,7 +113,7 @@ class QueryGenerator:
         print(response)
         query = json.loads(response.output_parsed.extracted_metadata_fields)
 
-        # response: ChatResponse = chat(
+        # response: ChatResponse = ollama_client.chat(
             # model="gemma3n:e4b",
             # messages=[
                 # {"role": "system", "content": PROMPT},