From 994b3fdf1f54a2d6bdf3a1fc564ba2eb8b7617d0 Mon Sep 17 00:00:00 2001
From: Ryan Chen <ryan@torrtle.co>
Date: Sat, 26 Jul 2025 19:23:08 -0400
Subject: [PATCH 1/2] Adding more embeddings

---
 main.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/main.py b/main.py
index ff890b2..9069559 100644
--- a/main.py
+++ b/main.py
@@ -7,6 +7,10 @@ from math import ceil
 
 import chromadb
 
+from chromadb.utils.embedding_functions.ollama_embedding_function import (
+    OllamaEmbeddingFunction,
+)
+
 client = chromadb.EphemeralClient()
 collection = client.create_collection(name="docs")
 
@@ -29,6 +33,11 @@ class Chunk:
 
 class Chunker:
     def __init__(self) -> None:
+        self.embedding_fx = OllamaEmbeddingFunction(
+            url="http://localhost:11434",
+            model_name="mxbai-embed-large",
+        )
+
         pass
 
     def chunk_document(self, document: str, chunk_size: int = 300) -> list[Chunk]:
@@ -47,15 +56,20 @@ class Chunker:
             )
             text_chunk = document[curr_pos:to_pos]
 
+            embedding = self.embedding_fx([text_chunk])
             collection.add(
                 ids=[str(doc_uuid) + ":" + str(i)],
                 documents=[text_chunk],
+                embeddings=embedding,
             )
 
         return chunks
 
 
-# Setup
+embedding_fx = OllamaEmbeddingFunction(
+    url="http://localhost:11434",
+    model_name="mxbai-embed-large",
+)
 
 # Step 1: Get the text
 ppngx = PaperlessNGXService()
@@ -71,8 +85,8 @@ for text in texts:
 
 # Ask
 input = "How many teeth has Simba had removed?"
-response = ollama.embed(model="mxbai-embed-large", input=input)
-results = collection.query(query_texts=[input], n_results=1)
+embeddings = embedding_fx(input=[input])
+results = collection.query(query_texts=[input], query_embeddings=embeddings)
 print(results)
 # Generate
 output = ollama.generate(
-- 
2.49.1


From 943a22401b683c19a75e2c0ba94bfdf78757996e Mon Sep 17 00:00:00 2001
From: Ryan Chen <ryan@torrtle.co>
Date: Sat, 26 Jul 2025 19:46:55 -0400
Subject: [PATCH 2/2] Adding getenv to main

---
 main.py    | 11 ++++++++---
 request.py |  5 +++++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/main.py b/main.py
index 9069559..e911e69 100644
--- a/main.py
+++ b/main.py
@@ -1,4 +1,5 @@
 import ollama
+import os
 from uuid import uuid4, UUID
 
 from request import PaperlessNGXService
@@ -11,9 +12,13 @@ from chromadb.utils.embedding_functions.ollama_embedding_function import (
     OllamaEmbeddingFunction,
 )
 
+from dotenv import load_dotenv
+
 client = chromadb.EphemeralClient()
 collection = client.create_collection(name="docs")
 
+load_dotenv()
+
 
 class Chunk:
     def __init__(
@@ -34,7 +39,7 @@ class Chunk:
 class Chunker:
     def __init__(self) -> None:
         self.embedding_fx = OllamaEmbeddingFunction(
-            url="http://localhost:11434",
+            url=os.getenv("OLLAMA_URL", ""),
             model_name="mxbai-embed-large",
         )
 
@@ -67,7 +72,7 @@ class Chunker:
 
 
 embedding_fx = OllamaEmbeddingFunction(
-    url="http://localhost:11434",
+    url=os.getenv("OLLAMA_URL", ""),
     model_name="mxbai-embed-large",
 )
 
@@ -84,7 +89,7 @@ for text in texts:
     chunker.chunk_document(document=text)
 
 # Ask
-input = "How many teeth has Simba had removed?"
+input = "How many teeth has Simba had removed? Who is his current vet?"
 embeddings = embedding_fx(input=[input])
 results = collection.query(query_texts=[input], query_embeddings=embeddings)
 print(results)
diff --git a/request.py b/request.py
index 7770fcf..229d619 100644
--- a/request.py
+++ b/request.py
@@ -17,3 +17,8 @@ class PaperlessNGXService:
         print(f"Getting data from: {self.url}")
         r = httpx.get(self.url, headers=self.headers)
         return r.json()["results"]
+
+
+if __name__ == "__main__":
+    pp = PaperlessNGXService()
+    print(pp.get_data()[0].keys())
-- 
2.49.1