reorganization

2026-01-31 17:13:27 -05:00
parent 1fd2e860b2
commit ad39904dda
87 changed files with 1019 additions and 237 deletions
--- a/scripts/init.py
+++ b/scripts/init.py
--- a/scripts/add_user.py
+++ b/scripts/add_user.py
@@ -0,0 +1,146 @@
+# GENERATED BY CLAUDE
+
+import os
+import sys
+import uuid
+import asyncio
+from tortoise import Tortoise
+from blueprints.users.models import User
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# Database configuration with environment variable support
+DATABASE_PATH = os.getenv("DATABASE_PATH", "database/raggr.db")
+DATABASE_URL = os.getenv("DATABASE_URL", f"sqlite://{DATABASE_PATH}")
+
+print(DATABASE_URL)
+
+
+async def add_user(username: str, email: str, password: str):
+    """Add a new user to the database"""
+    await Tortoise.init(
+        db_url=DATABASE_URL,
+        modules={
+            "models": [
+                "blueprints.users.models",
+                "blueprints.conversation.models",
+            ]
+        },
+    )
+
+    try:
+        # Check if user already exists
+        existing_user = await User.filter(email=email).first()
+        if existing_user:
+            print(f"Error: User with email '{email}' already exists!")
+            return False
+
+        existing_username = await User.filter(username=username).first()
+        if existing_username:
+            print(f"Error: Username '{username}' is already taken!")
+            return False
+
+        # Create new user
+        user = User(
+            id=uuid.uuid4(),
+            username=username,
+            email=email,
+        )
+        user.set_password(password)
+        await user.save()
+
+        print("✓ User created successfully!")
+        print(f"  Username: {username}")
+        print(f"  Email: {email}")
+        print(f"  ID: {user.id}")
+        return True
+
+    except Exception as e:
+        print(f"Error creating user: {e}")
+        return False
+    finally:
+        await Tortoise.close_connections()
+
+
+async def list_users():
+    """List all users in the database"""
+    await Tortoise.init(
+        db_url=DATABASE_URL,
+        modules={
+            "models": [
+                "blueprints.users.models",
+                "blueprints.conversation.models",
+            ]
+        },
+    )
+
+    try:
+        users = await User.all()
+        if not users:
+            print("No users found in database.")
+            return
+
+        print(f"\nFound {len(users)} user(s):")
+        print("-" * 60)
+        for user in users:
+            print(f"Username: {user.username}")
+            print(f"Email: {user.email}")
+            print(f"ID: {user.id}")
+            print(f"Created: {user.created_at}")
+            print("-" * 60)
+
+    except Exception as e:
+        print(f"Error listing users: {e}")
+    finally:
+        await Tortoise.close_connections()
+
+
+def print_usage():
+    """Print usage instructions"""
+    print("Usage:")
+    print("  python add_user.py add <username> <email> <password>")
+    print("  python add_user.py list")
+    print("\nExamples:")
+    print("  python add_user.py add ryan ryan@example.com mypassword123")
+    print("  python add_user.py list")
+    print("\nEnvironment Variables:")
+    print("  DATABASE_PATH  - Path to database file (default: database/raggr.db)")
+    print("  DATABASE_URL   - Full database URL (overrides DATABASE_PATH)")
+    print("\n  Example with custom database:")
+    print("  DATABASE_PATH=dev.db python add_user.py list")
+
+
+async def main():
+    if len(sys.argv) < 2:
+        print_usage()
+        sys.exit(1)
+
+    command = sys.argv[1].lower()
+
+    if command == "add":
+        if len(sys.argv) != 5:
+            print("Error: Missing arguments for 'add' command")
+            print_usage()
+            sys.exit(1)
+
+        username = sys.argv[2]
+        email = sys.argv[3]
+        password = sys.argv[4]
+
+        success = await add_user(username, email, password)
+        sys.exit(0 if success else 1)
+
+    elif command == "list":
+        await list_users()
+        sys.exit(0)
+
+    else:
+        print(f"Error: Unknown command '{command}'")
+        print_usage()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/scripts/index_immich.py
+++ b/scripts/index_immich.py
@@ -0,0 +1,118 @@
+import logging
+import os
+import sqlite3
+
+import httpx
+from dotenv import load_dotenv
+import sys
+from pathlib import Path
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from utils.image_process import describe_simba_image
+from utils.request import PaperlessNGXService
+
+logging.basicConfig(level=logging.INFO)
+
+
+load_dotenv()
+
+# Configuration from environment variables
+IMMICH_URL = os.getenv("IMMICH_URL", "http://localhost:2283")
+API_KEY = os.getenv("IMMICH_API_KEY")
+PERSON_NAME = os.getenv("PERSON_NAME", "Simba")  # Name of the tagged person/pet
+DOWNLOAD_DIR = os.getenv("DOWNLOAD_DIR", "./simba_photos")
+
+# Set up headers
+headers = {"x-api-key": API_KEY, "Content-Type": "application/json"}
+
+VISITED = {}
+
+if __name__ == "__main__":
+    conn = sqlite3.connect("./database/visited.db")
+    c = conn.cursor()
+    c.execute("select immich_id from visited")
+    rows = c.fetchall()
+    for row in rows:
+        VISITED.add(row[0])
+
+    ppngx = PaperlessNGXService()
+    people_url = f"{IMMICH_URL}/api/search/person?name=Simba"
+    people = httpx.get(people_url, headers=headers).json()
+
+    simba_id = people[0]["id"]
+
+    ids = {}
+
+    asset_search = f"{IMMICH_URL}/api/search/smart"
+    request_body = {"query": "orange cat"}
+    results = httpx.post(asset_search, headers=headers, json=request_body)
+
+    assets = results.json()["assets"]
+    for asset in assets["items"]:
+        if asset["type"] == "IMAGE" and asset["id"] not in VISITED:
+            ids[asset["id"]] = asset.get("originalFileName")
+    nextPage = assets.get("nextPage")
+
+    # while nextPage != None:
+    # logging.info(f"next page: {nextPage}")
+    # request_body["page"] = nextPage
+    # results = httpx.post(asset_search, headers=headers, json=request_body)
+    # assets = results.json()["assets"]
+
+    # for asset in assets["items"]:
+    # if asset["type"] == "IMAGE":
+    # ids.add(asset['id'])
+
+    # nextPage = assets.get("nextPage")
+
+    asset_search = f"{IMMICH_URL}/api/search/smart"
+    request_body = {"query": "simba"}
+    results = httpx.post(asset_search, headers=headers, json=request_body)
+    for asset in results.json()["assets"]["items"]:
+        if asset["type"] == "IMAGE":
+            ids[asset["id"]] = asset.get("originalFileName")
+
+    for immich_asset_id, immich_filename in ids.items():
+        try:
+            response = httpx.get(
+                f"{IMMICH_URL}/api/assets/{immich_asset_id}/original", headers=headers
+            )
+
+            path = os.path.join("/Users/ryanchen/Programs/raggr", immich_filename)
+            file = open(path, "wb+")
+            for chunk in response.iter_bytes(chunk_size=8192):
+                file.write(chunk)
+
+            logging.info("Processing image ...")
+            description = describe_simba_image(path)
+
+            image_description = description.description
+            image_date = description.image_date
+
+            description_filepath = os.path.join(
+                "/Users/ryanchen/Programs/raggr", "SIMBA_DESCRIBE_001.txt"
+            )
+            file = open(description_filepath, "w+")
+            file.write(image_description)
+            file.close()
+
+            file = open(description_filepath, "rb")
+            ppngx.upload_description(
+                description_filepath=description_filepath,
+                file=file,
+                title="SIMBA_DESCRIBE_001.txt",
+                exif_date=image_date,
+            )
+            file.close()
+
+            c.execute("INSERT INTO visited (immich_id) values (?)", (immich_asset_id,))
+            conn.commit()
+            logging.info("Processing complete. Deleting file.")
+            os.remove(file.name)
+        except Exception as e:
+            logging.info(f"something went wrong for {immich_filename}")
+            logging.info(e)
+
+    conn.close()
--- a/scripts/inspect_vector_store.py
+++ b/scripts/inspect_vector_store.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+"""CLI tool to inspect the vector store contents."""
+
+import argparse
+import os
+
+from dotenv import load_dotenv
+
+from blueprints.rag.logic import (
+    get_vector_store_stats,
+    index_documents,
+    list_all_documents,
+)
+
+# Load .env from the root directory
+root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
+env_path = os.path.join(root_dir, ".env")
+load_dotenv(env_path)
+
+
+def print_stats():
+    """Print vector store statistics."""
+    stats = get_vector_store_stats()
+    print("=== Vector Store Statistics ===")
+    print(f"Collection Name: {stats['collection_name']}")
+    print(f"Total Documents: {stats['total_documents']}")
+    print()
+
+
+def print_documents(limit: int = 10, show_content: bool = False):
+    """Print documents in the vector store."""
+    docs = list_all_documents(limit=limit)
+    print(f"=== Documents (showing {len(docs)} of {limit} requested) ===\n")
+
+    for i, doc in enumerate(docs, 1):
+        print(f"Document {i}:")
+        print(f"  ID: {doc['id']}")
+        print(f"  Metadata: {doc['metadata']}")
+        if show_content:
+            print(f"  Content Preview: {doc['content_preview']}")
+        print()
+
+
+async def run_index():
+    """Run the indexing process."""
+    print("Starting indexing process...")
+    await index_documents()
+    print("Indexing complete!")
+    print_stats()
+
+
+def main():
+    import asyncio
+
+    parser = argparse.ArgumentParser(description="Inspect the vector store contents")
+    parser.add_argument(
+        "--stats", action="store_true", help="Show vector store statistics"
+    )
+    parser.add_argument(
+        "--list", type=int, metavar="N", help="List N documents from the vector store"
+    )
+    parser.add_argument(
+        "--show-content",
+        action="store_true",
+        help="Show content preview when listing documents",
+    )
+    parser.add_argument(
+        "--index",
+        action="store_true",
+        help="Index documents from Paperless-NGX into the vector store",
+    )
+
+    args = parser.parse_args()
+
+    # Handle indexing first if requested
+    if args.index:
+        asyncio.run(run_index())
+        return
+
+    # If no arguments provided, show stats by default
+    if not any([args.stats, args.list]):
+        args.stats = True
+
+    if args.stats:
+        print_stats()
+
+    if args.list:
+        print_documents(limit=args.list, show_content=args.show_content)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/manage_vectorstore.py
+++ b/scripts/manage_vectorstore.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+"""Management script for vector store operations."""
+
+import argparse
+import asyncio
+import sys
+
+from blueprints.rag.logic import (
+    get_vector_store_stats,
+    index_documents,
+    list_all_documents,
+    vector_store,
+)
+
+
+def stats():
+    """Show vector store statistics."""
+    stats = get_vector_store_stats()
+    print("=== Vector Store Statistics ===")
+    print(f"Collection: {stats['collection_name']}")
+    print(f"Total Documents: {stats['total_documents']}")
+
+
+async def index():
+    """Index documents from Paperless-NGX."""
+    print("Starting indexing process...")
+    print("Fetching documents from Paperless-NGX...")
+    await index_documents()
+    print("✓ Indexing complete!")
+    stats()
+
+
+async def reindex():
+    """Clear and reindex all documents."""
+    print("Clearing existing documents...")
+    collection = vector_store._collection
+    all_docs = collection.get()
+
+    if all_docs["ids"]:
+        print(f"Deleting {len(all_docs['ids'])} existing documents...")
+        collection.delete(ids=all_docs["ids"])
+        print("✓ Cleared")
+    else:
+        print("Collection is already empty")
+
+    await index()
+
+
+def list_docs(limit: int = 10, show_content: bool = False):
+    """List documents in the vector store."""
+    docs = list_all_documents(limit=limit)
+    print(f"\n=== Documents (showing {len(docs)}) ===\n")
+
+    for i, doc in enumerate(docs, 1):
+        print(f"Document {i}:")
+        print(f"  ID: {doc['id']}")
+        print(f"  Metadata: {doc['metadata']}")
+        if show_content:
+            print(f"  Content: {doc['content_preview']}")
+        print()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Manage vector store for RAG system",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s stats                    # Show vector store statistics
+  %(prog)s index                    # Index new documents from Paperless-NGX
+  %(prog)s reindex                  # Clear and reindex all documents
+  %(prog)s list 10                  # List first 10 documents
+  %(prog)s list 20 --show-content   # List 20 documents with content preview
+        """,
+    )
+
+    subparsers = parser.add_subparsers(dest="command", help="Command to execute")
+
+    # Stats command
+    subparsers.add_parser("stats", help="Show vector store statistics")
+
+    # Index command
+    subparsers.add_parser("index", help="Index documents from Paperless-NGX")
+
+    # Reindex command
+    subparsers.add_parser("reindex", help="Clear and reindex all documents")
+
+    # List command
+    list_parser = subparsers.add_parser("list", help="List documents in vector store")
+    list_parser.add_argument(
+        "limit", type=int, default=10, nargs="?", help="Number of documents to list"
+    )
+    list_parser.add_argument(
+        "--show-content", action="store_true", help="Show content preview"
+    )
+
+    args = parser.parse_args()
+
+    if not args.command:
+        parser.print_help()
+        sys.exit(1)
+
+    try:
+        if args.command == "stats":
+            stats()
+        elif args.command == "index":
+            asyncio.run(index())
+        elif args.command == "reindex":
+            asyncio.run(reindex())
+        elif args.command == "list":
+            list_docs(limit=args.limit, show_content=args.show_content)
+    except KeyboardInterrupt:
+        print("\n\nOperation cancelled by user")
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n❌ Error: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/petmd_scrape_index.py
+++ b/scripts/petmd_scrape_index.py
@@ -0,0 +1,24 @@
+from bs4 import BeautifulSoup
+import chromadb
+import httpx
+
+client = chromadb.PersistentClient(path="/Users/ryanchen/Programs/raggr/chromadb")
+
+# Scrape
+BASE_URL = "https://www.vet.cornell.edu"
+LIST_URL = "/departments-centers-and-institutes/cornell-feline-health-center/health-information/feline-health-topics"
+
+QUERY_URL = BASE_URL + LIST_URL
+r = httpx.get(QUERY_URL)
+soup = BeautifulSoup(r.text)
+
+container = soup.find("div", class_="field-body")
+a_s = container.find_all("a", href=True)
+
+new_texts = []
+
+for link in a_s:
+    endpoint = link["href"]
+    query_url = BASE_URL + endpoint
+    r2 = httpx.get(query_url)
+    article_soup = BeautifulSoup(r2.text)
--- a/scripts/query.py
+++ b/scripts/query.py
@@ -0,0 +1,251 @@
+import json
+import os
+from typing import Literal
+import datetime
+from ollama import Client
+
+from openai import OpenAI
+
+from pydantic import BaseModel, Field
+
+# Configure ollama client with URL from environment or default to localhost
+ollama_client = Client(
+    host=os.getenv("OLLAMA_URL", "http://localhost:11434"), timeout=10.0
+)
+
+# This uses inferred filters — which means using LLM to create the metadata filters
+
+
+class FilterOperation(BaseModel):
+    op: Literal["$gt", "$gte", "$eq", "$ne", "$lt", "$lte", "$in", "$nin"]
+    value: str | list[str]
+
+
+class FilterQuery(BaseModel):
+    field_name: Literal["created_date, tags"]
+    op: FilterOperation
+
+
+class AndQuery(BaseModel):
+    op: Literal["$and", "$or"]
+    subqueries: list[FilterQuery]
+
+
+class GeneratedQuery(BaseModel):
+    fields: list[str]
+    extracted_metadata_fields: str
+
+
+class Time(BaseModel):
+    time: int
+
+
+DOCTYPE_OPTIONS = [
+    "Bill",
+    "Image Description",
+    "Insurance",
+    "Medical Record",
+    "Documentation",
+    "Letter",
+]
+
+QUERY_TYPE_OPTIONS = [
+    "Simba",
+    "Other",
+]
+
+
+class DocumentType(BaseModel):
+    type: list[str] = Field(description="type of document", enum=DOCTYPE_OPTIONS)
+
+
+class QueryType(BaseModel):
+    type: str = Field(desciption="type of query", enum=QUERY_TYPE_OPTIONS)
+
+
+PROMPT = """
+You are an information specialist that processes user queries. The current year is 2025. The user queries are all about
+a cat, Simba, and its records. The types of records are listed below. Using the query, extract the
+the date range the user is trying to query. You should return it as a JSON. The date tag is created_date. Return the date in epoch time.
+
+If the created_date cannot be ascertained, set it to epoch time start.
+
+You have several operators at your disposal:
+- $gt: greater than
+- $gte: greater than or equal
+- $eq: equal
+- $ne: not equal
+- $lt: less than
+- $lte: less than or equal to
+- $in: in
+- $nin: not in
+
+Logical operators:
+- $and, $or
+
+### Example 1
+Query: "Who is Simba's current vet?"
+Metadata fields: "{"created_date"}"
+Extracted metadata fields: {"created_date: {"$gt": "2025-01-01"}}
+
+### Example 2
+Query: "How many teeth has Simba had removed?"
+Metadata fields: {}
+Extracted metadata fields: {}
+
+### Example 3
+Query: "How many times has Simba been to the vet this year?"
+Metadata fields: {"created_date"}
+Extracted metadata fields: {"created_date": {"gt": "2025-01-01"}}
+
+document_types:
+- aftercare
+- bill
+- insurance claim
+- medical records
+
+Only return the extracted metadata fields. Make sure the extracted metadata fields are valid JSON
+"""
+
+
+DOCTYPE_PROMPT = f"""You are an information specialist that processes user queries. A query can have two tags attached from the following options. Based on the query, determine which of the following options is most appropriate: {",".join(DOCTYPE_OPTIONS)}
+
+### Example 1
+Query: "Who is Simba's current vet?"
+Tags: ["Bill", "Medical Record"]
+
+
+### Example 2
+Query: "Who does Simba know?"
+Tags: ["Letter", "Documentation"]
+"""
+
+QUERY_TYPE_PROMPT = f"""You are an information specialist that processes user queries.
+A query can have one tag attached from the following options. Based on the query and the transcript which is listed below, determine
+ which of the following options is most appropriate: {",".join(QUERY_TYPE_OPTIONS)}
+
+### Example 1
+Query: "Who is Simba's current vet?"
+Tags: ["Simba"]
+
+
+### Example 2
+Query: "What is the capital of Tokyo?"
+Tags: ["Other"]
+
+
+### Example 3
+Query: "Can you help me write an email?"
+Tags: ["Other"]
+
+TRANSCRIPT:
+"""
+
+
+class QueryGenerator:
+    def __init__(self) -> None:
+        pass
+
+    def date_to_epoch(self, date_str: str) -> float:
+        split_date = date_str.split("-")
+        date = datetime.datetime(
+            int(split_date[0]),
+            int(split_date[1]),
+            int(split_date[2]),
+            0,
+            0,
+            0,
+        )
+
+        return date.timestamp()
+
+    def get_doctype_query(self, input: str):
+        client = OpenAI()
+        response = client.chat.completions.create(
+            messages=[
+                {
+                    "role": "system",
+                    "content": "You are an information specialist that is really good at deciding what tags a query should have",
+                },
+                {"role": "user", "content": DOCTYPE_PROMPT + " " + input},
+            ],
+            model="gpt-4o",
+            response_format={
+                "type": "json_schema",
+                "json_schema": {
+                    "name": "document_type",
+                    "schema": DocumentType.model_json_schema(),
+                },
+            },
+        )
+
+        response_json_str = response.choices[0].message.content
+        type_data = json.loads(response_json_str)
+        metadata_query = {"document_type": {"$in": type_data["type"]}}
+        return metadata_query
+
+    def get_query_type(self, input: str, transcript: str):
+        client = OpenAI()
+        response = client.chat.completions.create(
+            messages=[
+                {
+                    "role": "system",
+                    "content": "You are an information specialist that is really good at deciding what tags a query should have",
+                },
+                {
+                    "role": "user",
+                    "content": f"{QUERY_TYPE_PROMPT}\nTRANSCRIPT:\n{transcript}\nQUERY:{input}",
+                },
+            ],
+            model="gpt-4o",
+            response_format={
+                "type": "json_schema",
+                "json_schema": {
+                    "name": "query_type",
+                    "schema": QueryType.model_json_schema(),
+                },
+            },
+        )
+
+        response_json_str = response.choices[0].message.content
+        type_data = json.loads(response_json_str)
+        return type_data["type"]
+
+    def get_query(self, input: str):
+        client = OpenAI()
+        response = client.responses.parse(
+            model="gpt-4o",
+            input=[
+                {"role": "system", "content": PROMPT},
+                {"role": "user", "content": input},
+            ],
+            text_format=GeneratedQuery,
+        )
+        print(response.output)
+        query = json.loads(response.output_parsed.extracted_metadata_fields)
+        # response: ChatResponse = ollama_client.chat(
+        # model="gemma3n:e4b",
+        # messages=[
+        # {"role": "system", "content": PROMPT},
+        # {"role": "user", "content": input},
+        # ],
+        # format=GeneratedQuery.model_json_schema(),
+        # )
+
+        # query = json.loads(
+        # json.loads(response["message"]["content"])["extracted_metadata_fields"]
+        # )
+        # date_key = list(query["created_date"].keys())[0]
+        # query["created_date"][date_key] = self.date_to_epoch(
+        # query["created_date"][date_key]
+        # )
+
+        # if "$" not in date_key:
+        # query["created_date"]["$" + date_key] = query["created_date"][date_key]
+
+        return query
+
+
+if __name__ == "__main__":
+    qg = QueryGenerator()
+    print(qg.get_doctype_query("How heavy is Simba?"))
--- a/scripts/test_query.py
+++ b/scripts/test_query.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+"""Test the query_vector_store function."""
+
+import asyncio
+import os
+
+from dotenv import load_dotenv
+
+from blueprints.rag.logic import query_vector_store
+
+# Load .env from the root directory
+root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
+env_path = os.path.join(root_dir, ".env")
+load_dotenv(env_path)
+
+
+async def test_query(query: str):
+    """Test a query against the vector store."""
+    print(f"Query: {query}\n")
+    result, docs = await query_vector_store(query)
+    print(f"Found {len(docs)} documents\n")
+    print("Serialized result:")
+    print(result)
+    print("\n" + "=" * 80 + "\n")
+
+
+async def main():
+    queries = [
+        "What is Simba's weight?",
+        "What medications is Simba taking?",
+        "Tell me about Simba's recent vet visits",
+    ]
+
+    for query in queries:
+        await test_query(query)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/scripts/user_message_stats.py
+++ b/scripts/user_message_stats.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+"""
+Script to show how many messages each user has written
+"""
+
+import asyncio
+from tortoise import Tortoise
+from blueprints.users.models import User
+from blueprints.conversation.models import Speaker
+import os
+
+
+async def get_user_message_stats():
+    """Get message count statistics per user"""
+
+    # Initialize database connection
+    database_url = os.getenv("DATABASE_URL", "sqlite://raggr.db")
+    await Tortoise.init(
+        db_url=database_url,
+        modules={
+            "models": [
+                "blueprints.users.models",
+                "blueprints.conversation.models",
+            ]
+        },
+    )
+
+    print("\n📊 User Message Statistics\n")
+    print(
+        f"{'Username':<20} {'Total Messages':<15} {'User Messages':<15} {'Conversations':<15}"
+    )
+    print("=" * 70)
+
+    # Get all users
+    users = await User.all()
+
+    total_users = 0
+    total_messages = 0
+
+    for user in users:
+        # Get all conversations for this user
+        conversations = await user.conversations.all()
+
+        if not conversations:
+            continue
+
+        total_users += 1
+
+        # Count messages across all conversations
+        user_message_count = 0
+        total_message_count = 0
+
+        for conversation in conversations:
+            messages = await conversation.messages.all()
+            total_message_count += len(messages)
+
+            # Count only user messages (not assistant responses)
+            user_messages = [msg for msg in messages if msg.speaker == Speaker.USER]
+            user_message_count += len(user_messages)
+
+        total_messages += user_message_count
+
+        print(
+            f"{user.username:<20} {total_message_count:<15} {user_message_count:<15} {len(conversations):<15}"
+        )
+
+    print("=" * 70)
+    print("\n📈 Summary:")
+    print(f"   Total active users: {total_users}")
+    print(f"   Total user messages: {total_messages}")
+    print(
+        f"   Average messages per user: {total_messages / total_users if total_users > 0 else 0:.1f}\n"
+    )
+
+    await Tortoise.close_connections()
+
+
+if __name__ == "__main__":
+    asyncio.run(get_user_message_stats())