Configure ollama to use external host instead of docker service

- Update all ollama clients to use configurable OLLAMA_URL environment variable
- Remove ollama service from docker-compose.yml to use external ollama instance
- Configure docker-compose to connect to host ollama via 172.17.0.1:11434 (Linux) or host.docker.internal (macOS/Windows)
- Add cross-platform compatibility with extra_hosts mapping
- Update embedding function fallback URL for consistency

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-10-02 20:29:48 -04:00
parent a640ae5fed
commit 03b033e9a4
5 changed files with 18 additions and 18 deletions

View File

@@ -81,7 +81,7 @@ class Chunk:
class Chunker:
embedding_fx = OllamaEmbeddingFunction(
url=os.getenv("OLLAMA_URL", ""),
url=os.getenv("OLLAMA_URL", "http://localhost:11434"),
model_name="mxbai-embed-large",
)

View File

@@ -12,6 +12,9 @@ from request import PaperlessNGXService
load_dotenv()
# Configure ollama client with URL from environment or default to localhost
ollama_client = ollama.Client(host=os.getenv("OLLAMA_URL", "http://localhost:11434"))
parser = argparse.ArgumentParser(description="use llm to clean documents")
parser.add_argument("document_id", type=str, help="questions about simba's health")
@@ -131,7 +134,7 @@ Someone will kill the innocent kittens if you don't extract the text exactly. So
def summarize_pdf_image(filepaths: list[str]):
res = ollama.chat(
res = ollama_client.chat(
model="gemma3:4b",
messages=[
{

View File

@@ -8,28 +8,18 @@ services:
environment:
- PAPERLESS_TOKEN=${PAPERLESS_TOKEN}
- BASE_URL=${BASE_URL}
- OLLAMA_URL=${OLLAMA_URL:-http://ollama:11434}
- OLLAMA_URL=${OLLAMA_URL:-http://172.17.0.1:11434}
- CHROMADB_PATH=/app/chromadb
- OPENAI_API_KEY=${OPENAI_API_KEY}
volumes:
- chromadb_data:/app/chromadb
depends_on:
- ollama
networks:
- raggr-network
ollama:
image: ollama/ollama:latest
ports:
- "11434:11434"
volumes:
- ollama_data:/root/.ollama
networks:
- raggr-network
extra_hosts:
- "host.docker.internal:host-gateway"
volumes:
chromadb_data:
ollama_data:
networks:
raggr-network:

View File

@@ -18,6 +18,9 @@ from dotenv import load_dotenv
load_dotenv()
# Configure ollama client with URL from environment or default to localhost
ollama_client = ollama.Client(host=os.getenv("OLLAMA_URL", "http://localhost:11434"))
client = chromadb.PersistentClient(path=os.getenv("CHROMADB_PATH", ""))
simba_docs = client.get_or_create_collection(name="simba_docs")
feline_vet_lookup = client.get_or_create_collection(name="feline_vet_lookup")
@@ -128,7 +131,7 @@ def consult_oracle(input: str, collection):
# Generate
print("Starting LLM generation")
llm_start = time.time()
# output = ollama.generate(
# output = ollama_client.generate(
# model="gemma3n:e4b",
# prompt=f"You are a helpful assistant that understandings veterinary terms. Using the following data, help answer the user's query by providing as many details as possible. Using this data: {results}. Respond to this prompt: {input}",
# )

View File

@@ -1,12 +1,16 @@
import json
import os
from typing import Literal
import datetime
from ollama import chat, ChatResponse
from ollama import chat, ChatResponse, Client
from openai import OpenAI
from pydantic import BaseModel, Field
# Configure ollama client with URL from environment or default to localhost
ollama_client = Client(host=os.getenv("OLLAMA_URL", "http://localhost:11434"))
# This uses inferred filters — which means using LLM to create the metadata filters
@@ -109,7 +113,7 @@ class QueryGenerator:
print(response)
query = json.loads(response.output_parsed.extracted_metadata_fields)
# response: ChatResponse = chat(
# response: ChatResponse = ollama_client.chat(
# model="gemma3n:e4b",
# messages=[
# {"role": "system", "content": PROMPT},