diff --git a/.env.example b/.env.example index 50829cc..c732b96 100644 --- a/.env.example +++ b/.env.example @@ -19,6 +19,12 @@ BASE_URL=192.168.1.5:8000 LLAMA_SERVER_URL=http://192.168.1.213:8080/v1 LLAMA_MODEL_NAME=llama-3.1-8b-instruct +# Embedding Server Configuration +# If set, uses a custom OpenAI-compatible embedding server (e.g. llama-server) +# Falls back to OpenAI embeddings if not set +EMBEDDING_SERVER_URL=http://192.168.1.7:8086/v1 +EMBEDDING_MODEL_NAME=all-minilm + # OpenAI Configuration OPENAI_API_KEY=your-openai-api-key diff --git a/blueprints/rag/logic.py b/blueprints/rag/logic.py index 2d5772f..ce07fe0 100644 --- a/blueprints/rag/logic.py +++ b/blueprints/rag/logic.py @@ -17,7 +17,17 @@ load_dotenv() logger = logging.getLogger(__name__) -embeddings = OpenAIEmbeddings(model="text-embedding-3-small") +_embedding_server_url = os.getenv("EMBEDDING_SERVER_URL") +_embedding_model = os.getenv("EMBEDDING_MODEL_NAME", "text-embedding-3-small") + +if _embedding_server_url: + embeddings = OpenAIEmbeddings( + model=_embedding_model, + base_url=_embedding_server_url, + api_key="not-needed", + ) +else: + embeddings = OpenAIEmbeddings(model=_embedding_model) # Convert Tortoise-style postgres:// URL to SQLAlchemy-style postgresql+psycopg:// _db_url = os.getenv( diff --git a/docker-compose.yml b/docker-compose.yml index b57806d..0136a87 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -31,6 +31,8 @@ services: - BASE_URL=${BASE_URL} - OLLAMA_URL=${OLLAMA_URL:-http://localhost:11434} - OPENAI_API_KEY=${OPENAI_API_KEY} + - EMBEDDING_SERVER_URL=${EMBEDDING_SERVER_URL} + - EMBEDDING_MODEL_NAME=${EMBEDDING_MODEL_NAME} - JWT_SECRET_KEY=${JWT_SECRET_KEY} - LLAMA_SERVER_URL=${LLAMA_SERVER_URL} - LLAMA_MODEL_NAME=${LLAMA_MODEL_NAME}