linter
This commit is contained in:
121
services/raggr/manage_vectorstore.py
Normal file
121
services/raggr/manage_vectorstore.py
Normal file
@@ -0,0 +1,121 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Management script for vector store operations."""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
from blueprints.rag.logic import (
|
||||
get_vector_store_stats,
|
||||
index_documents,
|
||||
list_all_documents,
|
||||
vector_store,
|
||||
)
|
||||
|
||||
|
||||
def stats():
|
||||
"""Show vector store statistics."""
|
||||
stats = get_vector_store_stats()
|
||||
print("=== Vector Store Statistics ===")
|
||||
print(f"Collection: {stats['collection_name']}")
|
||||
print(f"Total Documents: {stats['total_documents']}")
|
||||
|
||||
|
||||
async def index():
|
||||
"""Index documents from Paperless-NGX."""
|
||||
print("Starting indexing process...")
|
||||
print("Fetching documents from Paperless-NGX...")
|
||||
await index_documents()
|
||||
print("✓ Indexing complete!")
|
||||
stats()
|
||||
|
||||
|
||||
async def reindex():
|
||||
"""Clear and reindex all documents."""
|
||||
print("Clearing existing documents...")
|
||||
collection = vector_store._collection
|
||||
all_docs = collection.get()
|
||||
|
||||
if all_docs["ids"]:
|
||||
print(f"Deleting {len(all_docs['ids'])} existing documents...")
|
||||
collection.delete(ids=all_docs["ids"])
|
||||
print("✓ Cleared")
|
||||
else:
|
||||
print("Collection is already empty")
|
||||
|
||||
await index()
|
||||
|
||||
|
||||
def list_docs(limit: int = 10, show_content: bool = False):
|
||||
"""List documents in the vector store."""
|
||||
docs = list_all_documents(limit=limit)
|
||||
print(f"\n=== Documents (showing {len(docs)}) ===\n")
|
||||
|
||||
for i, doc in enumerate(docs, 1):
|
||||
print(f"Document {i}:")
|
||||
print(f" ID: {doc['id']}")
|
||||
print(f" Metadata: {doc['metadata']}")
|
||||
if show_content:
|
||||
print(f" Content: {doc['content_preview']}")
|
||||
print()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Manage vector store for RAG system",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
%(prog)s stats # Show vector store statistics
|
||||
%(prog)s index # Index new documents from Paperless-NGX
|
||||
%(prog)s reindex # Clear and reindex all documents
|
||||
%(prog)s list 10 # List first 10 documents
|
||||
%(prog)s list 20 --show-content # List 20 documents with content preview
|
||||
""",
|
||||
)
|
||||
|
||||
subparsers = parser.add_subparsers(dest="command", help="Command to execute")
|
||||
|
||||
# Stats command
|
||||
subparsers.add_parser("stats", help="Show vector store statistics")
|
||||
|
||||
# Index command
|
||||
subparsers.add_parser("index", help="Index documents from Paperless-NGX")
|
||||
|
||||
# Reindex command
|
||||
subparsers.add_parser("reindex", help="Clear and reindex all documents")
|
||||
|
||||
# List command
|
||||
list_parser = subparsers.add_parser("list", help="List documents in vector store")
|
||||
list_parser.add_argument(
|
||||
"limit", type=int, default=10, nargs="?", help="Number of documents to list"
|
||||
)
|
||||
list_parser.add_argument(
|
||||
"--show-content", action="store_true", help="Show content preview"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
if args.command == "stats":
|
||||
stats()
|
||||
elif args.command == "index":
|
||||
asyncio.run(index())
|
||||
elif args.command == "reindex":
|
||||
asyncio.run(reindex())
|
||||
elif args.command == "list":
|
||||
list_docs(limit=args.limit, show_content=args.show_content)
|
||||
except KeyboardInterrupt:
|
||||
print("\n\nOperation cancelled by user")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"\n❌ Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user