From 3ffc95a1b042118b8ff5bea6c8593ffebc06d20e Mon Sep 17 00:00:00 2001 From: Ryan Chen Date: Thu, 2 Oct 2025 21:05:17 -0400 Subject: [PATCH] Switch to OpenAI embeddings for ChromaDB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace Ollama embedding function with OpenAI's text-embedding-3-small model for improved embedding quality and consistency. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- chunker.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/chunker.py b/chunker.py index 55f3b54..0ec6b21 100644 --- a/chunker.py +++ b/chunker.py @@ -4,8 +4,8 @@ import re from typing import Union from uuid import UUID, uuid4 -from chromadb.utils.embedding_functions.ollama_embedding_function import ( - OllamaEmbeddingFunction, +from chromadb.utils.embedding_functions.openai_embedding_function import ( + OpenAIEmbeddingFunction, ) from dotenv import load_dotenv @@ -80,9 +80,9 @@ class Chunk: class Chunker: - embedding_fx = OllamaEmbeddingFunction( - url=os.getenv("OLLAMA_URL", "http://host.docker.internal:11434"), - model_name="mxbai-embed-large", + embedding_fx = OpenAIEmbeddingFunction( + api_key=os.getenv("OPENAI_API_KEY"), + model_name="text-embedding-3-small", ) def __init__(self, collection) -> None: