Replace Ollama with llama-server (OpenAI-compatible API)

- Update llm.py to use OpenAI client with custom base_url for llama-server
- Update agents.py to use ChatOpenAI instead of ChatOllama
- Remove unused ollama imports from main.py, chunker.py, query.py
- Add LLAMA_SERVER_URL and LLAMA_MODEL_NAME env vars
- Remove ollama and langchain-ollama dependencies

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-31 21:39:23 -05:00
parent 713a058c4f
commit 32020a6c60
7 changed files with 35 additions and 71 deletions

View File

@@ -4,16 +4,26 @@ from typing import cast
from langchain.agents import create_agent
from langchain.chat_models import BaseChatModel
from langchain.tools import tool
from langchain_ollama import ChatOllama
from langchain_openai import ChatOpenAI
from tavily import AsyncTavilyClient
from blueprints.rag.logic import query_vector_store
openai_gpt_5_mini = ChatOpenAI(model="gpt-5-mini")
ollama_deepseek = ChatOllama(model="llama3.1:8b", base_url=os.getenv("OLLAMA_URL"))
# Configure LLM with llama-server or OpenAI fallback
llama_url = os.getenv("LLAMA_SERVER_URL")
if llama_url:
llama_chat = ChatOpenAI(
base_url=llama_url,
api_key="not-needed",
model=os.getenv("LLAMA_MODEL_NAME", "llama-3.1-8b-instruct"),
)
else:
llama_chat = None
openai_fallback = ChatOpenAI(model="gpt-5-mini")
model_with_fallback = cast(
BaseChatModel, ollama_deepseek.with_fallbacks([openai_gpt_5_mini])
BaseChatModel,
llama_chat.with_fallbacks([openai_fallback]) if llama_chat else openai_fallback,
)
client = AsyncTavilyClient(os.getenv("TAVILY_KEY"), "")