Configure ollama to use external host instead of docker service

- Update all ollama clients to use configurable OLLAMA_URL environment variable
- Remove ollama service from docker-compose.yml to use external ollama instance
- Configure docker-compose to connect to host ollama via 172.17.0.1:11434 (Linux) or host.docker.internal (macOS/Windows)
- Add cross-platform compatibility with extra_hosts mapping
- Update embedding function fallback URL for consistency

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-10-02 20:29:48 -04:00
parent a640ae5fed
commit 03b033e9a4
5 changed files with 18 additions and 18 deletions

View File

@@ -81,7 +81,7 @@ class Chunk:
class Chunker: class Chunker:
embedding_fx = OllamaEmbeddingFunction( embedding_fx = OllamaEmbeddingFunction(
url=os.getenv("OLLAMA_URL", ""), url=os.getenv("OLLAMA_URL", "http://localhost:11434"),
model_name="mxbai-embed-large", model_name="mxbai-embed-large",
) )

View File

@@ -12,6 +12,9 @@ from request import PaperlessNGXService
load_dotenv() load_dotenv()
# Configure ollama client with URL from environment or default to localhost
ollama_client = ollama.Client(host=os.getenv("OLLAMA_URL", "http://localhost:11434"))
parser = argparse.ArgumentParser(description="use llm to clean documents") parser = argparse.ArgumentParser(description="use llm to clean documents")
parser.add_argument("document_id", type=str, help="questions about simba's health") parser.add_argument("document_id", type=str, help="questions about simba's health")
@@ -131,7 +134,7 @@ Someone will kill the innocent kittens if you don't extract the text exactly. So
def summarize_pdf_image(filepaths: list[str]): def summarize_pdf_image(filepaths: list[str]):
res = ollama.chat( res = ollama_client.chat(
model="gemma3:4b", model="gemma3:4b",
messages=[ messages=[
{ {

View File

@@ -8,28 +8,18 @@ services:
environment: environment:
- PAPERLESS_TOKEN=${PAPERLESS_TOKEN} - PAPERLESS_TOKEN=${PAPERLESS_TOKEN}
- BASE_URL=${BASE_URL} - BASE_URL=${BASE_URL}
- OLLAMA_URL=${OLLAMA_URL:-http://ollama:11434} - OLLAMA_URL=${OLLAMA_URL:-http://172.17.0.1:11434}
- CHROMADB_PATH=/app/chromadb - CHROMADB_PATH=/app/chromadb
- OPENAI_API_KEY=${OPENAI_API_KEY} - OPENAI_API_KEY=${OPENAI_API_KEY}
volumes: volumes:
- chromadb_data:/app/chromadb - chromadb_data:/app/chromadb
depends_on:
- ollama
networks:
- raggr-network
ollama:
image: ollama/ollama:latest
ports:
- "11434:11434"
volumes:
- ollama_data:/root/.ollama
networks: networks:
- raggr-network - raggr-network
extra_hosts:
- "host.docker.internal:host-gateway"
volumes: volumes:
chromadb_data: chromadb_data:
ollama_data:
networks: networks:
raggr-network: raggr-network:

View File

@@ -18,6 +18,9 @@ from dotenv import load_dotenv
load_dotenv() load_dotenv()
# Configure ollama client with URL from environment or default to localhost
ollama_client = ollama.Client(host=os.getenv("OLLAMA_URL", "http://localhost:11434"))
client = chromadb.PersistentClient(path=os.getenv("CHROMADB_PATH", "")) client = chromadb.PersistentClient(path=os.getenv("CHROMADB_PATH", ""))
simba_docs = client.get_or_create_collection(name="simba_docs") simba_docs = client.get_or_create_collection(name="simba_docs")
feline_vet_lookup = client.get_or_create_collection(name="feline_vet_lookup") feline_vet_lookup = client.get_or_create_collection(name="feline_vet_lookup")
@@ -128,7 +131,7 @@ def consult_oracle(input: str, collection):
# Generate # Generate
print("Starting LLM generation") print("Starting LLM generation")
llm_start = time.time() llm_start = time.time()
# output = ollama.generate( # output = ollama_client.generate(
# model="gemma3n:e4b", # model="gemma3n:e4b",
# prompt=f"You are a helpful assistant that understandings veterinary terms. Using the following data, help answer the user's query by providing as many details as possible. Using this data: {results}. Respond to this prompt: {input}", # prompt=f"You are a helpful assistant that understandings veterinary terms. Using the following data, help answer the user's query by providing as many details as possible. Using this data: {results}. Respond to this prompt: {input}",
# ) # )

View File

@@ -1,12 +1,16 @@
import json import json
import os
from typing import Literal from typing import Literal
import datetime import datetime
from ollama import chat, ChatResponse from ollama import chat, ChatResponse, Client
from openai import OpenAI from openai import OpenAI
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
# Configure ollama client with URL from environment or default to localhost
ollama_client = Client(host=os.getenv("OLLAMA_URL", "http://localhost:11434"))
# This uses inferred filters — which means using LLM to create the metadata filters # This uses inferred filters — which means using LLM to create the metadata filters
@@ -109,7 +113,7 @@ class QueryGenerator:
print(response) print(response)
query = json.loads(response.output_parsed.extracted_metadata_fields) query = json.loads(response.output_parsed.extracted_metadata_fields)
# response: ChatResponse = chat( # response: ChatResponse = ollama_client.chat(
# model="gemma3n:e4b", # model="gemma3n:e4b",
# messages=[ # messages=[
# {"role": "system", "content": PROMPT}, # {"role": "system", "content": PROMPT},