Replace Ollama with llama-server (OpenAI-compatible API)

- Update llm.py to use OpenAI client with custom base_url for llama-server - Update agents.py to use ChatOpenAI instead of ChatOllama - Remove unused ollama imports from main.py, chunker.py, query.py - Add LLAMA_SERVER_URL and LLAMA_MODEL_NAME env vars - Remove ollama and langchain-ollama dependencies Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-31 21:39:23 -05:00
parent 713a058c4f
commit 32020a6c60
7 changed files with 35 additions and 71 deletions
@@ -3,7 +3,6 @@ from math import ceil
 import re
 from typing import Union
 from uuid import UUID, uuid4
-from ollama import Client
 from chromadb.utils.embedding_functions.openai_embedding_function import (
    OpenAIEmbeddingFunction,
 )
@@ -13,10 +12,6 @@ from llm import LLMClient

 load_dotenv()

-ollama_client = Client(
-    host=os.getenv("OLLAMA_HOST", "http://localhost:11434"), timeout=1.0
-)
-

 def remove_headers_footers(text, header_patterns=None, footer_patterns=None):
    if header_patterns is None: