Expanded context window, CLI'd the app, and added preprocessing

This commit is contained in:
2025-07-30 19:58:29 -04:00
parent b43ef63449
commit 0a88a03c90
2 changed files with 2 additions and 3 deletions

View File

@@ -32,7 +32,7 @@ def chunk_data(texts: list[str], collection):
chunker = Chunker(collection) chunker = Chunker(collection)
print(f"chunking {len(texts)} documents") print(f"chunking {len(texts)} documents")
for text in texts[: len(texts) // 2]: for text in texts:
chunker.chunk_document(document=text) chunker.chunk_document(document=text)
@@ -40,7 +40,6 @@ def consult_oracle(input: str, collection):
# Ask # Ask
embeddings = Chunker.embedding_fx(input=[input]) embeddings = Chunker.embedding_fx(input=[input])
results = collection.query(query_texts=[input], query_embeddings=embeddings) results = collection.query(query_texts=[input], query_embeddings=embeddings)
print(results)
# Generate # Generate
output = ollama.generate( output = ollama.generate(

View File

@@ -21,4 +21,4 @@ class PaperlessNGXService:
if __name__ == "__main__": if __name__ == "__main__":
pp = PaperlessNGXService() pp = PaperlessNGXService()
print(pp.get_data()[0].keys()) pp.get_data()