This commit is contained in:
Ryan
2025-11-19 20:53:24 -05:00
parent 0e8ff964ae
commit 561bf4f71f
2 changed files with 6 additions and 5 deletions

View File

@@ -0,0 +1,3 @@
# simbarag-index
This is going to be the LLM portion of SimbaRag, refactored out into this service as not to block the startup time of the main app.

View File

@@ -89,14 +89,12 @@ async def reindex_all_paperless_documents():
if all_docs["ids"]:
simba_docs.delete(ids=all_docs["ids"])
visited_documents = set(
await PaperlessDocument.all().values_list("paperless_id", flat=True)
)
# Fetch all documents again
ppngx = PaperlessNGXService()
docs = ppngx.get_data()
docs = [doc for doc in docs if doc["id"] not in visited_documents]
doctype_lookup = ppngx.get_doctypes()
# Chunk them
await chunk_data(docs, collection=simba_docs, doctypes=doctype_lookup)
return ""