diff --git a/chunker.py b/chunker.py index ec84bbe..abe6ae2 100644 --- a/chunker.py +++ b/chunker.py @@ -81,7 +81,7 @@ class Chunk: class Chunker: embedding_fx = OllamaEmbeddingFunction( - url=os.getenv("OLLAMA_URL", ""), + url=os.getenv("OLLAMA_URL", "http://localhost:11434"), model_name="mxbai-embed-large", ) diff --git a/cleaner.py b/cleaner.py index 0ff3436..764251f 100644 --- a/cleaner.py +++ b/cleaner.py @@ -12,6 +12,9 @@ from request import PaperlessNGXService load_dotenv() +# Configure ollama client with URL from environment or default to localhost +ollama_client = ollama.Client(host=os.getenv("OLLAMA_URL", "http://localhost:11434")) + parser = argparse.ArgumentParser(description="use llm to clean documents") parser.add_argument("document_id", type=str, help="questions about simba's health") @@ -131,7 +134,7 @@ Someone will kill the innocent kittens if you don't extract the text exactly. So def summarize_pdf_image(filepaths: list[str]): - res = ollama.chat( + res = ollama_client.chat( model="gemma3:4b", messages=[ { diff --git a/docker-compose.yml b/docker-compose.yml index cd05d9a..8263a14 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,28 +8,18 @@ services: environment: - PAPERLESS_TOKEN=${PAPERLESS_TOKEN} - BASE_URL=${BASE_URL} - - OLLAMA_URL=${OLLAMA_URL:-http://ollama:11434} + - OLLAMA_URL=${OLLAMA_URL:-http://172.17.0.1:11434} - CHROMADB_PATH=/app/chromadb - OPENAI_API_KEY=${OPENAI_API_KEY} volumes: - chromadb_data:/app/chromadb - depends_on: - - ollama - networks: - - raggr-network - - ollama: - image: ollama/ollama:latest - ports: - - "11434:11434" - volumes: - - ollama_data:/root/.ollama networks: - raggr-network + extra_hosts: + - "host.docker.internal:host-gateway" volumes: chromadb_data: - ollama_data: networks: raggr-network: diff --git a/main.py b/main.py index 1070113..1d13463 100644 --- a/main.py +++ b/main.py @@ -18,6 +18,9 @@ from dotenv import load_dotenv load_dotenv() +# Configure ollama client with URL from environment or default to localhost +ollama_client = ollama.Client(host=os.getenv("OLLAMA_URL", "http://localhost:11434")) + client = chromadb.PersistentClient(path=os.getenv("CHROMADB_PATH", "")) simba_docs = client.get_or_create_collection(name="simba_docs") feline_vet_lookup = client.get_or_create_collection(name="feline_vet_lookup") @@ -128,7 +131,7 @@ def consult_oracle(input: str, collection): # Generate print("Starting LLM generation") llm_start = time.time() - # output = ollama.generate( + # output = ollama_client.generate( # model="gemma3n:e4b", # prompt=f"You are a helpful assistant that understandings veterinary terms. Using the following data, help answer the user's query by providing as many details as possible. Using this data: {results}. Respond to this prompt: {input}", # ) diff --git a/query.py b/query.py index 453ed85..1e29be5 100644 --- a/query.py +++ b/query.py @@ -1,12 +1,16 @@ import json +import os from typing import Literal import datetime -from ollama import chat, ChatResponse +from ollama import chat, ChatResponse, Client from openai import OpenAI from pydantic import BaseModel, Field +# Configure ollama client with URL from environment or default to localhost +ollama_client = Client(host=os.getenv("OLLAMA_URL", "http://localhost:11434")) + # This uses inferred filters — which means using LLM to create the metadata filters @@ -109,7 +113,7 @@ class QueryGenerator: print(response) query = json.loads(response.output_parsed.extracted_metadata_fields) - # response: ChatResponse = chat( + # response: ChatResponse = ollama_client.chat( # model="gemma3n:e4b", # messages=[ # {"role": "system", "content": PROMPT},