yeet
This commit is contained in:
@@ -0,0 +1,3 @@
|
|||||||
|
# simbarag-index
|
||||||
|
|
||||||
|
This is going to be the LLM portion of SimbaRag, refactored out into this service as not to block the startup time of the main app.
|
||||||
|
|||||||
8
main.py
8
main.py
@@ -89,14 +89,12 @@ async def reindex_all_paperless_documents():
|
|||||||
if all_docs["ids"]:
|
if all_docs["ids"]:
|
||||||
simba_docs.delete(ids=all_docs["ids"])
|
simba_docs.delete(ids=all_docs["ids"])
|
||||||
|
|
||||||
visited_documents = set(
|
# Fetch all documents again
|
||||||
await PaperlessDocument.all().values_list("paperless_id", flat=True)
|
|
||||||
)
|
|
||||||
|
|
||||||
ppngx = PaperlessNGXService()
|
ppngx = PaperlessNGXService()
|
||||||
docs = ppngx.get_data()
|
docs = ppngx.get_data()
|
||||||
docs = [doc for doc in docs if doc["id"] not in visited_documents]
|
|
||||||
doctype_lookup = ppngx.get_doctypes()
|
doctype_lookup = ppngx.get_doctypes()
|
||||||
|
|
||||||
|
# Chunk them
|
||||||
await chunk_data(docs, collection=simba_docs, doctypes=doctype_lookup)
|
await chunk_data(docs, collection=simba_docs, doctypes=doctype_lookup)
|
||||||
|
|
||||||
return ""
|
return ""
|
||||||
|
|||||||
Reference in New Issue
Block a user