From 561bf4f71f81242d50a5100bbe861a8eb296f384 Mon Sep 17 00:00:00 2001 From: Ryan Date: Wed, 19 Nov 2025 20:53:24 -0500 Subject: [PATCH] yeet --- README.md | 3 +++ main.py | 8 +++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index e69de29..621fc2b 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,3 @@ +# simbarag-index + +This is going to be the LLM portion of SimbaRag, refactored out into this service as not to block the startup time of the main app. diff --git a/main.py b/main.py index 101d36a..e9debec 100644 --- a/main.py +++ b/main.py @@ -89,14 +89,12 @@ async def reindex_all_paperless_documents(): if all_docs["ids"]: simba_docs.delete(ids=all_docs["ids"]) - visited_documents = set( - await PaperlessDocument.all().values_list("paperless_id", flat=True) - ) - + # Fetch all documents again ppngx = PaperlessNGXService() docs = ppngx.get_data() - docs = [doc for doc in docs if doc["id"] not in visited_documents] doctype_lookup = ppngx.get_doctypes() + + # Chunk them await chunk_data(docs, collection=simba_docs, doctypes=doctype_lookup) return ""