Expanded context window, CLI'd the app, and added preprocessing
This commit is contained in:
3
main.py
3
main.py
@@ -32,7 +32,7 @@ def chunk_data(texts: list[str], collection):
|
|||||||
chunker = Chunker(collection)
|
chunker = Chunker(collection)
|
||||||
|
|
||||||
print(f"chunking {len(texts)} documents")
|
print(f"chunking {len(texts)} documents")
|
||||||
for text in texts[: len(texts) // 2]:
|
for text in texts:
|
||||||
chunker.chunk_document(document=text)
|
chunker.chunk_document(document=text)
|
||||||
|
|
||||||
|
|
||||||
@@ -40,7 +40,6 @@ def consult_oracle(input: str, collection):
|
|||||||
# Ask
|
# Ask
|
||||||
embeddings = Chunker.embedding_fx(input=[input])
|
embeddings = Chunker.embedding_fx(input=[input])
|
||||||
results = collection.query(query_texts=[input], query_embeddings=embeddings)
|
results = collection.query(query_texts=[input], query_embeddings=embeddings)
|
||||||
print(results)
|
|
||||||
|
|
||||||
# Generate
|
# Generate
|
||||||
output = ollama.generate(
|
output = ollama.generate(
|
||||||
|
|||||||
@@ -21,4 +21,4 @@ class PaperlessNGXService:
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
pp = PaperlessNGXService()
|
pp = PaperlessNGXService()
|
||||||
print(pp.get_data()[0].keys())
|
pp.get_data()
|
||||||
|
|||||||
Reference in New Issue
Block a user