Enable async_mode on PGVector for async method support

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Handle missing pgvector tables on first run
2026-04-24 08:53:21 -04:00 · 2026-04-24 08:49:00 -04:00 · 2026-04-24 08:43:52 -04:00 · 2026-04-09 22:09:55 -04:00 · 2026-04-09 22:08:26 -04:00 · 2026-04-09 22:04:33 -04:00
29 changed files with 510 additions and 1785 deletions
@@ -19,11 +19,6 @@ BASE_URL=192.168.1.5:8000
 LLAMA_SERVER_URL=http://192.168.1.213:8080/v1
 LLAMA_MODEL_NAME=llama-3.1-8b-instruct

-# ChromaDB Configuration
-# For Docker: This is automatically set to /app/data/chromadb
-# For local development: Set to a local directory path
-CHROMADB_PATH=./data/chromadb
-
 # OpenAI Configuration
 OPENAI_API_KEY=your-openai-api-key

@@ -13,9 +13,6 @@ wheels/
 .env

 # Database files
-chromadb/
-chromadb_openai/
-chroma_db/
 database/
 *.db

@@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co

 ## Project Overview

-SimbaRAG is a RAG (Retrieval-Augmented Generation) conversational AI system for querying information about Simba (a cat). It ingests documents from Paperless-NGX, stores embeddings in ChromaDB, and uses LLMs (Ollama or OpenAI) to answer questions.
+SimbaRAG is a RAG (Retrieval-Augmented Generation) conversational AI system for querying information about Simba (a cat). It ingests documents from Paperless-NGX, stores embeddings in PostgreSQL via pgvector, and uses LLMs (Ollama or OpenAI) to answer questions.

 ## Commands

@@ -54,9 +54,8 @@ docker compose up -d
 │                     Docker Compose                          │
 ├─────────────────────────────────────────────────────────────┤
 │  raggr (port 8080)              │  postgres (port 5432)    │
-│  ├── Quart backend              │  PostgreSQL 16           │
-│  ├── React frontend (served)    │                          │
-│  └── ChromaDB (volume)          │                          │
+│  ├── Quart backend              │  PostgreSQL 16 + pgvector│
+│  └── React frontend (served)    │                          │
 └─────────────────────────────────────────────────────────────┘
 ```

@@ -37,15 +37,14 @@ WORKDIR /app/raggr-frontend
 RUN yarn install && yarn build
 WORKDIR /app

-# Create ChromaDB and database directories
-RUN mkdir -p /app/chromadb /app/database
+# Create database directory
+RUN mkdir -p /app/database

 # Expose port
 EXPOSE 8080

 # Set environment variables
 ENV PYTHONPATH=/app
-ENV CHROMADB_PATH=/app/chromadb

 # Run the startup script
 CMD ["./startup.sh"]
@@ -34,16 +34,15 @@ COPY . .
 WORKDIR /app/raggr-frontend
 RUN yarn build

-# Create ChromaDB and database directories
+# Create database directory
 WORKDIR /app
-RUN mkdir -p /app/chromadb /app/database
+RUN mkdir -p /app/database

 # Make startup script executable
 RUN chmod +x /app/startup-dev.sh

 # Set environment variables
 ENV PYTHONPATH=/app
-ENV CHROMADB_PATH=/app/chromadb
 ENV PYTHONUNBUFFERED=1

 # Expose port
@@ -1,8 +1,11 @@
-.PHONY: deploy build up down restart logs migrate migrate-new frontend test
+.PHONY: deploy redeploy build up down restart logs migrate migrate-new frontend test

 # Build and deploy
 deploy: build up

+redeploy:
+	git pull && $(MAKE) down && $(MAKE) up
+
 build:
 	docker compose build raggr

@@ -1,8 +1,9 @@
 import logging
 import os
+from datetime import timedelta

 from dotenv import load_dotenv
-from quart import Quart, jsonify, render_template, request, send_from_directory
+from quart import Quart, jsonify, render_template, send_from_directory
 from quart_jwt_extended import JWTManager, get_jwt_identity, jwt_refresh_token_required
 from tortoise import Tortoise

@@ -14,7 +15,6 @@ import blueprints.users
 import blueprints.whatsapp
 import blueprints.users.models
 from config.db import TORTOISE_CONFIG
-from main import consult_simba_oracle

 # Load environment variables
 load_dotenv()
@@ -38,6 +38,8 @@ app = Quart(
 )

 app.config["JWT_SECRET_KEY"] = os.getenv("JWT_SECRET_KEY", "SECRET_KEY")
+app.config["JWT_ACCESS_TOKEN_EXPIRES"] = timedelta(hours=1)
+app.config["JWT_REFRESH_TOKEN_EXPIRES"] = timedelta(days=30)
 app.config["MAX_CONTENT_LENGTH"] = 10 * 1024 * 1024  # 10 MB upload limit
 jwt = JWTManager(app)

@@ -75,39 +77,6 @@ async def serve_react_app(path):
    return await render_template("index.html")


-@app.route("/api/query", methods=["POST"])
-@jwt_refresh_token_required
-async def query():
-    current_user_uuid = get_jwt_identity()
-    user = await blueprints.users.models.User.get(id=current_user_uuid)
-    data = await request.get_json()
-    query = data.get("query")
-    conversation_id = data.get("conversation_id")
-    conversation = await blueprints.conversation.logic.get_conversation_by_id(
-        conversation_id
-    )
-    await conversation.fetch_related("messages")
-    await blueprints.conversation.logic.add_message_to_conversation(
-        conversation=conversation,
-        message=query,
-        speaker="user",
-        user=user,
-    )
-
-    transcript = await blueprints.conversation.logic.get_conversation_transcript(
-        user=user, conversation=conversation
-    )
-
-    response = consult_simba_oracle(input=query, transcript=transcript)
-    await blueprints.conversation.logic.add_message_to_conversation(
-        conversation=conversation,
-        message=response,
-        speaker="simba",
-        user=user,
-    )
-    return jsonify({"response": response})
-
-
@app.route("/api/messages", methods=["GET"])
@jwt_refresh_token_required
 async def get_messages():
@@ -132,17 +101,10 @@ async def get_messages():
            }
        )

-    name = conversation.name
-    if len(messages) > 8:
-        name = await blueprints.conversation.logic.rename_conversation(
-            user=user,
-            conversation=conversation,
-        )
-
    return jsonify(
        {
            "id": str(conversation.id),
-            "name": name,
+            "name": conversation.name,
            "messages": messages,
            "created_at": conversation.created_at.isoformat(),
            "updated_at": conversation.updated_at.isoformat(),
@@ -1,4 +1,3 @@
-import datetime
 import json
 import logging
 import uuid
@@ -20,8 +19,8 @@ from .agents import main_agent
 from .logic import (
    add_message_to_conversation,
    get_conversation_by_id,
-    rename_conversation,
 )
+from .memory import get_memories_for_user
 from .models import (
    Conversation,
    PydConversation,
@@ -36,15 +35,27 @@ conversation_blueprint = Blueprint(
 _SYSTEM_PROMPT = SIMBA_SYSTEM_PROMPT


+async def _build_system_prompt_with_memories(user_id: str) -> str:
+    """Append user memories to the base system prompt."""
+    memories = await get_memories_for_user(user_id)
+    if not memories:
+        return _SYSTEM_PROMPT
+    memory_block = "\n".join(f"- {m}" for m in memories)
+    return f"{_SYSTEM_PROMPT}\n\nUSER MEMORIES (facts the user has asked you to remember):\n{memory_block}"
+
+
 def _build_messages_payload(
-    conversation, query_text: str, image_description: str | None = None
+    conversation,
+    query_text: str,
+    image_description: str | None = None,
+    system_prompt: str | None = None,
 ) -> list:
    recent_messages = (
        conversation.messages[-10:]
        if len(conversation.messages) > 10
        else conversation.messages
    )
-    messages_payload = [{"role": "system", "content": _SYSTEM_PROMPT}]
+    messages_payload = [{"role": "system", "content": system_prompt or _SYSTEM_PROMPT}]
    for msg in recent_messages[:-1]:  # Exclude the message we just added
        role = "user" if msg.speaker == "user" else "assistant"
        text = msg.text
@@ -80,10 +91,14 @@ async def query():
        user=user,
    )

-    messages_payload = _build_messages_payload(conversation, query)
+    system_prompt = await _build_system_prompt_with_memories(str(user.id))
+    messages_payload = _build_messages_payload(
+        conversation, query, system_prompt=system_prompt
+    )
    payload = {"messages": messages_payload}
+    agent_config = {"configurable": {"user_id": str(user.id)}}

-    response = await main_agent.ainvoke(payload)
+    response = await main_agent.ainvoke(payload, config=agent_config)
    message = response.get("messages", [])[-1].content
    await add_message_to_conversation(
        conversation=conversation,
@@ -163,15 +178,19 @@ async def stream_query():
            logging.error(f"Failed to analyze image: {e}")
            image_description = "[Image could not be analyzed]"

+    system_prompt = await _build_system_prompt_with_memories(str(user.id))
    messages_payload = _build_messages_payload(
-        conversation, query_text or "", image_description
+        conversation, query_text or "", image_description, system_prompt=system_prompt
    )
    payload = {"messages": messages_payload}
+    agent_config = {"configurable": {"user_id": str(user.id)}}

    async def event_generator():
        final_message = None
        try:
-            async for event in main_agent.astream_events(payload, version="v2"):
+            async for event in main_agent.astream_events(
+                payload, version="v2", config=agent_config
+            ):
                event_type = event.get("event")

                if event_type == "on_tool_start":
@@ -221,8 +240,6 @@ async def stream_query():
@jwt_refresh_token_required
 async def get_conversation(conversation_id: str):
    conversation = await Conversation.get(id=conversation_id)
-    current_user_uuid = get_jwt_identity()
-    user = await blueprints.users.models.User.get(id=current_user_uuid)
    await conversation.fetch_related("messages")

    # Manually serialize the conversation with messages
@@ -237,18 +254,10 @@ async def get_conversation(conversation_id: str):
                "image_key": msg.image_key,
            }
        )
-    name = conversation.name
-    if len(messages) > 8 and "datetime" in name.lower():
-        name = await rename_conversation(
-            user=user,
-            conversation=conversation,
-        )
-        print(name)
-
    return jsonify(
        {
            "id": str(conversation.id),
-            "name": name,
+            "name": conversation.name,
            "messages": messages,
            "created_at": conversation.created_at.isoformat(),
            "updated_at": conversation.updated_at.isoformat(),
@@ -262,7 +271,7 @@ async def create_conversation():
    user_uuid = get_jwt_identity()
    user = await blueprints.users.models.User.get(id=user_uuid)
    conversation = await Conversation.create(
-        name=f"{user.username} {datetime.datetime.now().timestamp}",
+        name="New Conversation",
        user=user,
    )

@@ -275,7 +284,7 @@ async def create_conversation():
 async def get_all_conversations():
    user_uuid = get_jwt_identity()
    user = await blueprints.users.models.User.get(id=user_uuid)
-    conversations = Conversation.filter(user=user)
+    conversations = Conversation.filter(user=user).order_by("-updated_at")
    serialized_conversations = await PydListConversation.from_queryset(conversations)

    return jsonify(serialized_conversations.model_dump())
@@ -5,9 +5,11 @@ from dotenv import load_dotenv
 from langchain.agents import create_agent
 from langchain.chat_models import BaseChatModel
 from langchain.tools import tool
+from langchain_core.runnables import RunnableConfig
 from langchain_openai import ChatOpenAI
 from tavily import AsyncTavilyClient

+from blueprints.conversation.memory import save_memory
 from blueprints.rag.logic import query_vector_store
 from utils.obsidian_service import ObsidianService
 from utils.ynab_service import YNABService
@@ -326,7 +328,7 @@ async def obsidian_search_notes(query: str) -> str:
        return "Obsidian integration is not configured. Please set OBSIDIAN_VAULT_PATH environment variable."

    try:
-        # Query ChromaDB for obsidian documents
+        # Query vector store for obsidian documents
        serialized, docs = await query_vector_store(query=query)
        return serialized

@@ -589,8 +591,35 @@ async def obsidian_create_task(
        return f"Error creating task: {str(e)}"


+@tool
+async def save_user_memory(content: str, config: RunnableConfig) -> str:
+    """Save a fact or preference about the user for future conversations.
+
+    Use this tool when the user:
+    - Explicitly asks you to remember something ("remember that...", "keep in mind...")
+    - Shares a personal preference that would be useful in future conversations
+      (e.g., "I prefer metric units", "my cat's name is Luna")
+    - Tells you a meaningful personal fact (e.g., "I'm allergic to peanuts")
+
+    Do NOT save:
+    - Trivial or ephemeral info (e.g., "I'm tired today")
+    - Information already in the system prompt or documents
+    - Conversation-specific context that won't matter later
+
+    Args:
+        content: A concise statement of the fact or preference to remember.
+                 Write it as a standalone sentence (e.g., "User prefers dark mode"
+                 rather than "likes dark mode").
+
+    Returns:
+        Confirmation that the memory was saved.
+    """
+    user_id = config["configurable"]["user_id"]
+    return await save_memory(user_id=user_id, content=content)
+
+
 # Create tools list based on what's available
-tools = [get_current_date, simba_search, web_search]
+tools = [get_current_date, simba_search, web_search, save_user_memory]
 if ynab_enabled:
    tools.extend(
        [
@@ -1,9 +1,8 @@
 import tortoise.exceptions
-from langchain_openai import ChatOpenAI

 import blueprints.users.models

-from .models import Conversation, ConversationMessage, RenameConversationOutputSchema
+from .models import Conversation, ConversationMessage


 async def create_conversation(name: str = "") -> Conversation:
@@ -19,6 +18,12 @@ async def add_message_to_conversation(
    image_key: str | None = None,
 ) -> ConversationMessage:
    print(conversation, message, speaker)
+
+    # Name the conversation after the first user message
+    if speaker == "user" and not await conversation.messages.all().exists():
+        conversation.name = message[:100]
+        await conversation.save()
+
    message = await ConversationMessage.create(
        text=message,
        speaker=speaker,
@@ -61,22 +66,3 @@ async def get_conversation_transcript(
        messages.append(f"{message.speaker} at {message.created_at}: {message.text}")

    return "\n".join(messages)
-
-
-async def rename_conversation(
-    user: blueprints.users.models.User,
-    conversation: Conversation,
-) -> str:
-    messages: str = await get_conversation_transcript(
-        user=user, conversation=conversation
-    )
-
-    llm = ChatOpenAI(model="gpt-4o-mini")
-    structured_llm = llm.with_structured_output(RenameConversationOutputSchema)
-
-    prompt = f"Summarize the following conversation into a sassy one-liner title:\n\n{messages}"
-    response = structured_llm.invoke(prompt)
-    new_name: str = response.get("title", "")
-    conversation.name = new_name
-    await conversation.save()
-    return new_name
@@ -0,0 +1,19 @@
+from .models import UserMemory
+
+
+async def get_memories_for_user(user_id: str) -> list[str]:
+    """Return all memory content strings for a user, ordered by most recently updated."""
+    memories = await UserMemory.filter(user_id=user_id).order_by("-updated_at")
+    return [m.content for m in memories]
+
+
+async def save_memory(user_id: str, content: str) -> str:
+    """Save a new memory or touch an existing one (exact-match dedup)."""
+    existing = await UserMemory.filter(user_id=user_id, content=content).first()
+    if existing:
+        existing.updated_at = None  # auto_now=True will refresh it on save
+        await existing.save(update_fields=["updated_at"])
+        return "Memory already exists (refreshed)."
+
+    await UserMemory.create(user_id=user_id, content=content)
+    return "Memory saved."
@@ -1,5 +1,4 @@
 import enum
-from dataclasses import dataclass

 from tortoise import fields
 from tortoise.contrib.pydantic import (
@@ -9,12 +8,6 @@ from tortoise.contrib.pydantic import (
 from tortoise.models import Model


-@dataclass
-class RenameConversationOutputSchema:
-    title: str
-    justification: str
-
-
 class Speaker(enum.Enum):
    USER = "user"
    SIMBA = "simba"
@@ -47,6 +40,17 @@ class ConversationMessage(Model):
        table = "conversation_messages"


+class UserMemory(Model):
+    id = fields.UUIDField(primary_key=True)
+    user = fields.ForeignKeyField("models.User", related_name="memories")
+    content = fields.TextField()
+    created_at = fields.DatetimeField(auto_now_add=True)
+    updated_at = fields.DatetimeField(auto_now=True)
+
+    class Meta:
+        table = "user_memories"
+
+
 PydConversationMessage = pydantic_model_creator(ConversationMessage)
 PydConversation = pydantic_model_creator(
    Conversation, name="Conversation", allow_cycles=True, exclude=("user",)
@@ -54,4 +54,7 @@ You have access to Ryan's daily journal notes. Each note lives at journal/YYYY/Y
 - Use journal_get_tasks to list tasks (done/pending) for today or a specific date
 - Use journal_add_task to add a new task to today's (or a given date's) note
 - Use journal_complete_task to check off a task as done
-Use these tools when Ryan asks about today's tasks, wants to add something to his list, or wants to mark a task complete."""
+Use these tools when Ryan asks about today's tasks, wants to add something to his list, or wants to mark a task complete.
+
+USER MEMORY:
+You can remember facts about the user across conversations using the save_user_memory tool. When a user explicitly asks you to remember something, or shares a meaningful preference or personal fact, save it. Saved memories will automatically appear at the end of this prompt in future conversations under "USER MEMORIES"."""
@@ -1,7 +1,12 @@
 from quart import Blueprint, jsonify
 from quart_jwt_extended import jwt_refresh_token_required

-from .logic import fetch_obsidian_documents, get_vector_store_stats, index_documents, index_obsidian_documents, vector_store
+from .logic import (
+    delete_all_documents,
+    get_vector_store_stats,
+    index_documents,
+    index_obsidian_documents,
+)
 from blueprints.users.decorators import admin_required

 rag_blueprint = Blueprint("rag_api", __name__, url_prefix="/api/rag")
@@ -32,14 +37,7 @@ async def trigger_index():
 async def trigger_reindex():
    """Clear and reindex all documents. Admin only."""
    try:
-        # Clear existing documents
-        collection = vector_store._collection
-        all_docs = collection.get()
-
-        if all_docs["ids"]:
-            collection.delete(ids=all_docs["ids"])
-
-        # Reindex
+        delete_all_documents()
        await index_documents()
        stats = get_vector_store_stats()
        return jsonify({"status": "success", "stats": stats})
@@ -1,11 +1,13 @@
 import datetime
+import logging
 import os

 from dotenv import load_dotenv
-from langchain_chroma import Chroma
 from langchain_core.documents import Document
 from langchain_openai import OpenAIEmbeddings
+from langchain_postgres import PGVector
 from langchain_text_splitters import RecursiveCharacterTextSplitter
+from sqlalchemy import create_engine, text

 from .fetchers import PaperlessNGXService
 from utils.obsidian_service import ObsidianService
@@ -13,13 +15,40 @@ from utils.obsidian_service import ObsidianService
 # Load environment variables
 load_dotenv()

+logger = logging.getLogger(__name__)
+
 embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

-vector_store = Chroma(
-    collection_name="simba_docs",
-    embedding_function=embeddings,
-    persist_directory=os.getenv("CHROMADB_PATH", ""),
+# Convert Tortoise-style postgres:// URL to SQLAlchemy-style postgresql+psycopg://
+_db_url = os.getenv(
+    "DATABASE_URL", "postgres://raggr:raggr_dev_password@localhost:5432/raggr"
 )
+_pgvector_url = _db_url.replace("postgres://", "postgresql+psycopg://")
+
+# Lazy-initialized vector store (defers DB connection to first use)
+_vector_store = None
+
+
+def _get_vector_store() -> PGVector:
+    global _vector_store
+    if _vector_store is None:
+        _vector_store = PGVector(
+            embeddings=embeddings,
+            collection_name="simba_docs",
+            connection=_pgvector_url,
+            use_jsonb=True,
+            create_extension=False,  # created by docker init script
+            async_mode=True,
+        )
+    return _vector_store
+
+
+def _get_engine():
+    """Get a SQLAlchemy engine for direct queries."""
+    if not hasattr(_get_engine, "_engine"):
+        _get_engine._engine = create_engine(_pgvector_url)
+    return _get_engine._engine
+

 text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # chunk size (characters)
@@ -28,6 +57,22 @@ text_splitter = RecursiveCharacterTextSplitter(
 )


+def _get_collection_id():
+    """Get the UUID of our collection from the langchain_pg_collection table."""
+    engine = _get_engine()
+    try:
+        with engine.connect() as conn:
+            result = conn.execute(
+                text("SELECT uuid FROM langchain_pg_collection WHERE name = :name"),
+                {"name": "simba_docs"},
+            )
+            row = result.fetchone()
+            return row[0] if row else None
+    except Exception:
+        # Table doesn't exist yet (first run before any indexing)
+        return None
+
+
 def date_to_epoch(date_str: str) -> float:
    split_date = date_str.split("-")
    date = datetime.datetime(
@@ -63,6 +108,7 @@ async def index_documents():
    documents = await fetch_documents_from_paperless_ngx()

    splits = text_splitter.split_documents(documents)
+    vector_store = _get_vector_store()
    await vector_store.aadd_documents(documents=splits)


@@ -92,13 +138,17 @@ async def fetch_obsidian_documents() -> list[Document]:
                    "filepath": parsed["filepath"],
                    "tags": parsed["tags"],
                    "created_at": parsed["metadata"].get("created_at"),
-                    **{k: v for k, v in parsed["metadata"].items() if k not in ["created_at", "created_by"]},
+                    **{
+                        k: v
+                        for k, v in parsed["metadata"].items()
+                        if k not in ["created_at", "created_by"]
+                    },
                },
            )
            documents.append(document)

        except Exception as e:
-            print(f"Error reading {md_path}: {e}")
+            logger.warning(f"Error reading {md_path}: {e}")
            continue

    return documents
@@ -109,26 +159,25 @@ async def index_obsidian_documents():

    Deletes existing obsidian source chunks before re-indexing.
    """
-    obsidian_service = ObsidianService()
    documents = await fetch_obsidian_documents()

    if not documents:
-        print("No Obsidian documents found to index")
+        logger.info("No Obsidian documents found to index")
        return {"indexed": 0}

    # Delete existing obsidian chunks
-    existing_results = vector_store.get(where={"source": "obsidian"})
-    if existing_results.get("ids"):
-        await vector_store.adelete(existing_results["ids"])
+    delete_documents_by_metadata("source", "obsidian")

    # Split and index documents
    splits = text_splitter.split_documents(documents)
+    vector_store = _get_vector_store()
    await vector_store.aadd_documents(documents=splits)

    return {"indexed": len(documents)}


 async def query_vector_store(query: str):
+    vector_store = _get_vector_store()
    retrieved_docs = await vector_store.asimilarity_search(query, k=2)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\nContent: {doc.page_content}")
@@ -137,33 +186,80 @@ async def query_vector_store(query: str):
    return serialized, retrieved_docs


+def delete_all_documents():
+    """Delete all documents from the vector store collection."""
+    collection_id = _get_collection_id()
+    if not collection_id:
+        return
+    engine = _get_engine()
+    with engine.connect() as conn:
+        conn.execute(
+            text("DELETE FROM langchain_pg_embedding WHERE collection_id = :cid"),
+            {"cid": collection_id},
+        )
+        conn.commit()
+
+
+def delete_documents_by_metadata(key: str, value: str):
+    """Delete documents matching a metadata key/value pair."""
+    collection_id = _get_collection_id()
+    if not collection_id:
+        return
+    engine = _get_engine()
+    with engine.connect() as conn:
+        conn.execute(
+            text(
+                "DELETE FROM langchain_pg_embedding "
+                "WHERE collection_id = :cid AND cmetadata->>:key = :value"
+            ),
+            {"cid": collection_id, "key": key, "value": value},
+        )
+        conn.commit()
+
+
 def get_vector_store_stats():
    """Get statistics about the vector store."""
-    collection = vector_store._collection
-    count = collection.count()
+    collection_id = _get_collection_id()
+    count = 0
+    if collection_id:
+        engine = _get_engine()
+        with engine.connect() as conn:
+            result = conn.execute(
+                text(
+                    "SELECT COUNT(*) FROM langchain_pg_embedding WHERE collection_id = :cid"
+                ),
+                {"cid": collection_id},
+            )
+            count = result.scalar()
    return {
        "total_documents": count,
-        "collection_name": collection.name,
+        "collection_name": "simba_docs",
    }


 def list_all_documents(limit: int = 10):
    """List documents in the vector store with their metadata."""
-    collection = vector_store._collection
-    results = collection.get(limit=limit, include=["metadatas", "documents"])
+    collection_id = _get_collection_id()
+    if not collection_id:
+        return []

-    documents = []
-    for i, doc_id in enumerate(results["ids"]):
-        documents.append(
-            {
-                "id": doc_id,
-                "metadata": results["metadatas"][i]
-                if results.get("metadatas")
-                else None,
-                "content_preview": results["documents"][i][:200]
-                if results.get("documents")
-                else None,
-            }
+    engine = _get_engine()
+    with engine.connect() as conn:
+        result = conn.execute(
+            text(
+                "SELECT id, document, cmetadata FROM langchain_pg_embedding "
+                "WHERE collection_id = :cid LIMIT :limit"
+            ),
+            {"cid": collection_id, "limit": limit},
        )
+        documents = []
+        for row in result:
+            documents.append(
+                {
+                    "id": str(row[0]),
+                    "metadata": row[2],
+                    "content_preview": row[1][:200] if row[1] else None,
+                }
+            )

    return documents
@@ -35,7 +35,7 @@ class OIDCUserService:
                claims.get("preferred_username") or claims.get("name") or user.username
            )
            # Update LDAP groups from claims
-            user.ldap_groups = claims.get("groups", [])
+            user.ldap_groups = claims.get("groups") or []
            await user.save()
            return user

@@ -48,7 +48,7 @@ class OIDCUserService:
                user.oidc_subject = oidc_subject
                user.auth_provider = "oidc"
                user.password = None  # Clear password
-                user.ldap_groups = claims.get("groups", [])
+                user.ldap_groups = claims.get("groups") or []
                await user.save()
                return user

@@ -61,7 +61,7 @@ class OIDCUserService:
        )

        # Extract LDAP groups from claims
-        groups = claims.get("groups", [])
+        groups = claims.get("groups") or []

        user = await User.create(
            id=uuid4(),
@@ -2,7 +2,7 @@ version: "3.8"

 services:
  postgres:
-    image: postgres:16-alpine
+    image: pgvector/pgvector:pg16
    ports:
      - "5432:5432"
    environment:
@@ -11,6 +11,7 @@ services:
      - POSTGRES_DB=${POSTGRES_DB:-raggr}
    volumes:
      - postgres_data:/var/lib/postgresql/data
+      - ./docker/init-pgvector.sql:/docker-entrypoint-initdb.d/init-pgvector.sql
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-raggr}"]
      interval: 10s
@@ -29,7 +30,6 @@ services:
      - PAPERLESS_TOKEN=${PAPERLESS_TOKEN}
      - BASE_URL=${BASE_URL}
      - OLLAMA_URL=${OLLAMA_URL:-http://localhost:11434}
-      - CHROMADB_PATH=/app/data/chromadb
      - OPENAI_API_KEY=${OPENAI_API_KEY}
      - JWT_SECRET_KEY=${JWT_SECRET_KEY}
      - LLAMA_SERVER_URL=${LLAMA_SERVER_URL}
@@ -66,10 +66,8 @@ services:
      postgres:
        condition: service_healthy
    volumes:
-      - chromadb_data:/app/data/chromadb
      - ./obvault:/app/data/obsidian
    restart: unless-stopped

 volumes:
-  chromadb_data:
  postgres_data:
@@ -0,0 +1 @@
+CREATE EXTENSION IF NOT EXISTS vector;
@@ -1,278 +0,0 @@
-import argparse
-import datetime
-import logging
-import os
-import sqlite3
-import time
-
-from dotenv import load_dotenv
-
-import chromadb
-from utils.chunker import Chunker
-from utils.cleaner import pdf_to_image, summarize_pdf_image
-from llm import LLMClient
-from scripts.query import QueryGenerator
-from utils.request import PaperlessNGXService
-
-_dotenv_loaded = load_dotenv()
-
-client = chromadb.PersistentClient(path=os.getenv("CHROMADB_PATH", ""))
-simba_docs = client.get_or_create_collection(name="simba_docs2")
-feline_vet_lookup = client.get_or_create_collection(name="feline_vet_lookup")
-
-parser = argparse.ArgumentParser(
-    description="An LLM tool to query information about Simba <3"
-)
-
-parser.add_argument("query", type=str, help="questions about simba's health")
-parser.add_argument(
-    "--reindex", action="store_true", help="re-index the simba documents"
-)
-parser.add_argument("--classify", action="store_true", help="test classification")
-parser.add_argument("--index", help="index a file")
-
-ppngx = PaperlessNGXService()
-
-llm_client = LLMClient()
-
-
-def index_using_pdf_llm(doctypes):
-    logging.info("reindex data...")
-    files = ppngx.get_data()
-    for file in files:
-        document_id: int = file["id"]
-        pdf_path = ppngx.download_pdf_from_id(id=document_id)
-        image_paths = pdf_to_image(filepath=pdf_path)
-        logging.info(f"summarizing {file}")
-        generated_summary = summarize_pdf_image(filepaths=image_paths)
-        file["content"] = generated_summary
-
-    chunk_data(files, simba_docs, doctypes=doctypes)
-
-
-def date_to_epoch(date_str: str) -> float:
-    split_date = date_str.split("-")
-    date = datetime.datetime(
-        int(split_date[0]),
-        int(split_date[1]),
-        int(split_date[2]),
-        0,
-        0,
-        0,
-    )
-
-    return date.timestamp()
-
-
-def chunk_data(docs, collection, doctypes):
-    # Step 2: Create chunks
-    chunker = Chunker(collection)
-
-    logging.info(f"chunking {len(docs)} documents")
-    texts: list[str] = [doc["content"] for doc in docs]
-    with sqlite3.connect("database/visited.db") as conn:
-        to_insert = []
-        c = conn.cursor()
-        for index, text in enumerate(texts):
-            metadata = {
-                "created_date": date_to_epoch(docs[index]["created_date"]),
-                "filename": docs[index]["original_file_name"],
-                "document_type": doctypes.get(docs[index]["document_type"], ""),
-            }
-
-            if doctypes:
-                metadata["type"] = doctypes.get(docs[index]["document_type"])
-
-            chunker.chunk_document(
-                document=text,
-                metadata=metadata,
-            )
-            to_insert.append((docs[index]["id"],))
-
-        c.executemany(
-            "INSERT INTO indexed_documents (paperless_id) values (?)", to_insert
-        )
-        conn.commit()
-
-
-def chunk_text(texts: list[str], collection):
-    chunker = Chunker(collection)
-
-    for index, text in enumerate(texts):
-        metadata = {}
-        chunker.chunk_document(
-            document=text,
-            metadata=metadata,
-        )
-
-
-def classify_query(query: str, transcript: str) -> bool:
-    logging.info("Starting query generation")
-    qg_start = time.time()
-    qg = QueryGenerator()
-    query_type = qg.get_query_type(input=query, transcript=transcript)
-    logging.info(query_type)
-    qg_end = time.time()
-    logging.info(f"Query generation took {qg_end - qg_start:.2f} seconds")
-    return query_type == "Simba"
-
-
-def consult_oracle(
-    input: str,
-    collection,
-    transcript: str = "",
-):
-    chunker = Chunker(collection)
-
-    start_time = time.time()
-
-    # Ask
-    logging.info("Starting query generation")
-    qg_start = time.time()
-    qg = QueryGenerator()
-    doctype_query = qg.get_doctype_query(input=input)
-    # metadata_filter = qg.get_query(input)
-    metadata_filter = {**doctype_query}
-    logging.info(metadata_filter)
-    qg_end = time.time()
-    logging.info(f"Query generation took {qg_end - qg_start:.2f} seconds")
-
-    logging.info("Starting embedding generation")
-    embedding_start = time.time()
-    embeddings = chunker.embedding_fx(inputs=[input])
-    embedding_end = time.time()
-    logging.info(
-        f"Embedding generation took {embedding_end - embedding_start:.2f} seconds"
-    )
-
-    logging.info("Starting collection query")
-    query_start = time.time()
-    results = collection.query(
-        query_texts=[input],
-        query_embeddings=embeddings,
-        where=metadata_filter,
-    )
-    query_end = time.time()
-    logging.info(f"Collection query took {query_end - query_start:.2f} seconds")
-
-    # Generate
-    logging.info("Starting LLM generation")
-    llm_start = time.time()
-    system_prompt = "You are a helpful assistant that understands veterinary terms."
-    transcript_prompt = f"Here is the message transcript thus far {transcript}."
-    prompt = f"""Using the following data, help answer the user's query by providing as many details as possible.
-    Using this data: {results}. {transcript_prompt if len(transcript) > 0 else ""}
-    Respond to this prompt: {input}"""
-    output = llm_client.chat(prompt=prompt, system_prompt=system_prompt)
-    llm_end = time.time()
-    logging.info(f"LLM generation took {llm_end - llm_start:.2f} seconds")
-
-    total_time = time.time() - start_time
-    logging.info(f"Total consult_oracle execution took {total_time:.2f} seconds")
-
-    return output
-
-
-def llm_chat(input: str, transcript: str = "") -> str:
-    system_prompt = "You are a helpful assistant that understands veterinary terms."
-    transcript_prompt = f"Here is the message transcript thus far {transcript}."
-    prompt = f"""Answer the user in as if you were a cat named Simba. Don't act too catlike. Be assertive.
-    {transcript_prompt if len(transcript) > 0 else ""}
-    Respond to this prompt: {input}"""
-    output = llm_client.chat(prompt=prompt, system_prompt=system_prompt)
-    return output
-
-
-def paperless_workflow(input):
-    # Step 1: Get the text
-    ppngx = PaperlessNGXService()
-    docs = ppngx.get_data()
-
-    chunk_data(docs, collection=simba_docs)
-    consult_oracle(input, simba_docs)
-
-
-def consult_simba_oracle(input: str, transcript: str = ""):
-    is_simba_related = classify_query(query=input, transcript=transcript)
-
-    if is_simba_related:
-        logging.info("Query is related to simba")
-        return consult_oracle(
-            input=input,
-            collection=simba_docs,
-            transcript=transcript,
-        )
-
-    logging.info("Query is NOT related to simba")
-
-    return llm_chat(input=input, transcript=transcript)
-
-
-def filter_indexed_files(docs):
-    with sqlite3.connect("database/visited.db") as conn:
-        c = conn.cursor()
-        c.execute(
-            "CREATE TABLE IF NOT EXISTS indexed_documents (id INTEGER PRIMARY KEY AUTOINCREMENT, paperless_id INTEGER)"
-        )
-        c.execute("SELECT paperless_id FROM indexed_documents")
-        rows = c.fetchall()
-        conn.commit()
-
-    visited = {row[0] for row in rows}
-    return [doc for doc in docs if doc["id"] not in visited]
-
-
-def reindex():
-    with sqlite3.connect("database/visited.db") as conn:
-        c = conn.cursor()
-        # Ensure the table exists before trying to delete from it
-        c.execute(
-            "CREATE TABLE IF NOT EXISTS indexed_documents (id INTEGER PRIMARY KEY AUTOINCREMENT, paperless_id INTEGER)"
-        )
-        c.execute("DELETE FROM indexed_documents")
-        conn.commit()
-
-    # Delete all documents from the collection
-    all_docs = simba_docs.get()
-    if all_docs["ids"]:
-        simba_docs.delete(ids=all_docs["ids"])
-
-    logging.info("Fetching documents from Paperless-NGX")
-    ppngx = PaperlessNGXService()
-    docs = ppngx.get_data()
-    docs = filter_indexed_files(docs)
-    logging.info(f"Fetched {len(docs)} documents")
-
-    # Delete all chromadb data
-    ids = simba_docs.get(ids=None, limit=None, offset=0)
-    all_ids = ids["ids"]
-    if len(all_ids) > 0:
-        simba_docs.delete(ids=all_ids)
-
-    # Chunk documents
-    logging.info("Chunking documents now ...")
-    doctype_lookup = ppngx.get_doctypes()
-    chunk_data(docs, collection=simba_docs, doctypes=doctype_lookup)
-    logging.info("Done chunking documents")
-
-
-if __name__ == "__main__":
-    args = parser.parse_args()
-    if args.reindex:
-        reindex()
-
-    if args.classify:
-        consult_simba_oracle(input="yohohoho testing")
-        consult_simba_oracle(input="write an email")
-        consult_simba_oracle(input="how much does simba weigh")
-
-    if args.query:
-        logging.info("Consulting oracle ...")
-        print(
-            consult_oracle(
-                input=args.query,
-                collection=simba_docs,
-            )
-        )
-    else:
-        logging.info("please provide a query")
@@ -0,0 +1,112 @@
+from tortoise import BaseDBAsyncClient
+
+RUN_IN_TRANSACTION = True
+
+
+async def upgrade(db: BaseDBAsyncClient) -> str:
+    return """
+        CREATE TABLE IF NOT EXISTS "user_memories" (
+    "id" UUID NOT NULL PRIMARY KEY,
+    "content" TEXT NOT NULL,
+    "created_at" TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    "updated_at" TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    "user_id" UUID NOT NULL REFERENCES "users" ("id") ON DELETE CASCADE
+);
+        CREATE TABLE IF NOT EXISTS "email_accounts" (
+    "id" UUID NOT NULL PRIMARY KEY,
+    "email_address" VARCHAR(255) NOT NULL UNIQUE,
+    "display_name" VARCHAR(255),
+    "imap_host" VARCHAR(255) NOT NULL,
+    "imap_port" INT NOT NULL DEFAULT 993,
+    "imap_username" VARCHAR(255) NOT NULL,
+    "imap_password" TEXT NOT NULL,
+    "is_active" BOOL NOT NULL DEFAULT True,
+    "last_error" TEXT,
+    "created_at" TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    "updated_at" TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    "user_id" UUID NOT NULL REFERENCES "users" ("id") ON DELETE CASCADE
+);
+COMMENT ON TABLE "email_accounts" IS 'Email account configuration for IMAP connections.';
+        CREATE TABLE IF NOT EXISTS "emails" (
+    "id" UUID NOT NULL PRIMARY KEY,
+    "message_id" VARCHAR(255) NOT NULL UNIQUE,
+    "subject" VARCHAR(500) NOT NULL,
+    "from_address" VARCHAR(255) NOT NULL,
+    "to_address" TEXT NOT NULL,
+    "date" TIMESTAMPTZ NOT NULL,
+    "body_text" TEXT,
+    "body_html" TEXT,
+    "chromadb_doc_id" VARCHAR(255),
+    "created_at" TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    "expires_at" TIMESTAMPTZ NOT NULL,
+    "account_id" UUID NOT NULL REFERENCES "email_accounts" ("id") ON DELETE CASCADE
+);
+CREATE INDEX IF NOT EXISTS "idx_emails_message_981ddd" ON "emails" ("message_id");
+COMMENT ON TABLE "emails" IS 'Email message metadata and content.';
+        CREATE TABLE IF NOT EXISTS "email_sync_status" (
+    "id" UUID NOT NULL PRIMARY KEY,
+    "last_sync_date" TIMESTAMPTZ,
+    "last_message_uid" INT NOT NULL DEFAULT 0,
+    "message_count" INT NOT NULL DEFAULT 0,
+    "consecutive_failures" INT NOT NULL DEFAULT 0,
+    "last_failure_date" TIMESTAMPTZ,
+    "updated_at" TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    "account_id" UUID NOT NULL REFERENCES "email_accounts" ("id") ON DELETE CASCADE
+);
+COMMENT ON TABLE "email_sync_status" IS 'Tracks sync progress and state per email account.';"""
+
+
+async def downgrade(db: BaseDBAsyncClient) -> str:
+    return """
+        DROP TABLE IF EXISTS "user_memories";
+        DROP TABLE IF EXISTS "email_accounts";
+        DROP TABLE IF EXISTS "emails";
+        DROP TABLE IF EXISTS "email_sync_status";"""
+
+
+MODELS_STATE = (
+    "eJztXGtv2zYU/SuCPrVAFjTPbcUwwE7czVudDLGz9ZFCoCXa1ixRGkk1NYr+911Skq0HZV"
+    "t+RUr1oU1C8lLU4SV57tGVvuquZ2GHHV955DOmDHHbI/pr7atOkIvhF2X9kaYj31/UigKO"
+    "ho40MBMtZQ0aMk6RyaFyhByGocjCzKS2H12MBI4jCj0TGtpkvCgKiP1fgA3ujTGfYAoVHz"
+    "9BsU0s/AWz+E9/aoxs7FipcduWuLYsN/jMl2X3993rN7KluNzQMD0ncMmitT/jE4/MmweB"
+    "bR0LG1E3xgRTxLGVuA0xyui246JwxFDAaYDnQ7UWBRYeocARYOi/jAJiCgw0eSXx3/mveg"
+    "l4AGoBrU24wOLrt/CuFvcsS3VxqavfW3cvzi5fyrv0GB9TWSkR0b9JQ8RRaCpxXQApf+ag"
+    "vJogqoYybp8BEwa6CYxxwQLHhQ/FQMYAbYaa7qIvhoPJmE/gz9OLiyUw/t26k0hCKwmlB3"
+    "4dev1NVHUa1glIFxCaFItbNhDPA3kNNdx2sRrMtGUGUisyPY5/qSjAcA/WLXFm0SJYgu+g"
+    "2+v0B63eX+JOXMb+cyRErUFH1JzK0lmm9MVlZirmnWj/dAe/a+JP7cPtTSfr+/N2gw+6GB"
+    "MKuGcQ79FAVmK9xqUxMKmJDXxrw4lNWzYT+6QTGw0+Ma8MU6PcCZIw2eIYicZ2wEnc/NAQ"
+    "R+9oqjwzBBh58N54FNtj8ieeSQi7MA5ETNVhEZGO+6ibqoK2KF2MgqLHORtJOgXcHdwT5u"
+    "Hp2epfta47usRwiMzpI6KWUQCmixlDY8zygLYjyzd/3mFnTs3UWCYJXC/ssZq7ShG2Eivv"
+    "1EtglEIvX+WeutkSROC+reja4kpL0FnBghMgrkeGjeRENqS41qSY4y+KI38ApWoo4/Z1Ic"
+    "XLjvLOu0HqFI+p74te693L1En+9vbmt7h5gipfvb1tNwz5ORKpPENmPkZTFRkQAWSHBG6O"
+    "CqRmN2H+xEtHv+937l5r4kR/IP1ur916rTHbHSJ9vSlORZknr9YIMk9eFcaYoiq9gGwXTh"
+    "ZjimdlQvWU0Ub4Hp56pYG8ODldA0loVQilrMtsRslDu9yRqTDd5flZ03DAzIiHW4YFWS2y"
+    "siiujA8U7lI2TtgnKxbxVw+7Hp3pCjKcqF3KgWUQ5IqGdsN9nwH3hYtwTErR34RJw4AbBv"
+    "xdMeBGI34WE1sdjbham2FdROIKs8AtVOJ9s78i3rea8TVMr/5MT8xj2cf/SZu6cL0DpAD4"
+    "iLFHjyo8s20TRGdqMJNWGTCHMx5GU5VTaJaA1xa8N3m6A2Tt7k3r7r2aOsftk37bfj/otD"
+    "LoYhfZThkvnRvsxkVXr/hdOujJq/Xkw2X6YU5AfJwgzmBLN0jgDosEWzWYCtOdiImHRfVs"
+    "HVDPijE9y0EqnczARNyeauF7noMRWeKgSdvs8gfjfW2mZY/qEuv/9vZtav23u9nQ+L7X7o"
+    "DzSpihkR1Soe7NQAnuxEUmcIQpVuiKK1Z/xraGHntyuc42kI2QErvAZdZjPdsyDRYM/8Wm"
+    "IlotBjRrV0Mw93LqQ/w4MXzqfbatcltqzvBwVEp3PBM5W3DRzBOadbbVi+Jt9SK3rToW8o"
+    "0x9QJfkRLzR//2Rg1pxiwD6D2Bu/xo2SY/0hyb8U97g/fjp/3wfHHny1XJrACZIVaig0aV"
+    "fJbiVaNKPtOJnSfG5VShVVmFudc0dpNaWOWINJ9SmFwRySeUm2ORfihaPc9fC4qQHyPT9A"
+    "JhthUgHdFXK+yqZpDsU1yVsOgKdbUTKxPF8qqcnvX0VV12p0WZp/CTI6H2aYhYWvRQ9ljP"
+    "oLSOzQN5IH3uwbal+YgybGlyUJps+GjzCYTTP1hoplEs2sNgjrW3NpkyjXva1YR6LrpuP5"
+    "CRR7XPEDPAD4YRNSeaiXw0tCHsg5UoR9bowDvih1vowJErKJ92Fccwaas6Cm17iQk3CK+3"
+    "jayfXFK/WEuxvFiiWF7kFcsR7CKCGIEfK86oYjSzdvWEdC++CbyyENAlye1eHeE8dIKPiI"
+    "jKxlqxTT2jrJpEVfFtL42Xh541M8q+9ZEyqkl+9aGXhcRowl3F47sVwMZGDbDqhELJsuGK"
+    "MLSSzE1hWhOQm5f5G+VsU0kUf/Ft6G2DiU1b1nNiazKRax3WkXJVMjszbdUkaMaA5DEsna"
+    "NZXxHwKJOrmXaSKqVrpjAuEhYTc7BCX0zJv+vqjJGNkAlH9jigUhvWhMrX7bX+EsUES7mL"
+    "FamOJXpIaJBzK4otITfCGEMVEhOTznxwNC1OpTtK9KExzDlcnh09EKFuxt2AO/OAHWv9wP"
+    "c9ypnmgovZvoPjFkzzMZXvgjYaZUU0yshpy8tBOcNGqYwVC5v5DpoZZVOAs3ZN7JB4S9s3"
+    "JuDYZeBMGdVFXTsUmGJ/zoPZJQXCQcomg6W9P27y889nW0Apv0Xzw+nJ+Y/nP51dnv8ETe"
+    "RQ5iU/LgE3nzkpMdgktT9n2DhjxhkLk/w7MQ8p1rRyPdQF3UMLWzYE2sBHPit8d2lKdcru"
+    "gOnU84O/wtnUDmLcwJR6iizVYpdNW9XkmG9e7G70wiaFspnY5sXu5sXu6r7YnRE2dpGEWS"
+    "8s0zlTM2IaoSq3AyD60Ft/3lmNINm7fJxApkhBToO3SkTOTNxqHXkA1VOmCTvNp57YdJjM"
+    "PBWdYCm74qRQnNeRS/cgdOSegBn+MU1w2tBYnL1g4/pHYSF0ZkJf2JqnxsJOGCnHI+gwoF"
+    "gTdzeFgYg0Vxaqx5oNsR92MfTvhB0LA8matQn86kDzRkWuiIosIxrptJuka+Wtd8DtqhUh"
+    "VYjKrfUoWE5JnIkcqNZJoVaoMj2cZPhqC2q+Y8EwxqDgaXAhgDm77xI90T02AyE8GdExoS"
+    "AxhSAWmX+XWMolGaGw+Q6d7aDZpJ94k26UlOeppDR5WM8sD2vfSQ31z8JqYWqbE10RPUc1"
+    "R8uCZrRoU5kv5xU/S1+TEUcT+KTJMjvhIcVho3j9Xflt8+KH6QmTujzoPcQHc2BplAAxal"
+    "5PAPfyGbfCj3MXfxin+OPcB/sozt4O3Z19FKfENzZ2f7x8+x8fHBMe"
+)
@@ -5,7 +5,8 @@ description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.13"
 dependencies = [
-    "chromadb>=1.1.0",
+    "langchain-postgres>=0.0.13",
+    "psycopg[binary]>=3.1.0",
    "python-dotenv>=1.0.0",
    "flask>=3.1.2",
    "httpx>=0.28.1",
@@ -30,7 +31,6 @@ dependencies = [
    "asyncpg>=0.30.0",
    "langchain-openai>=1.1.6",
    "langchain>=1.2.0",
-    "langchain-chroma>=1.0.0",
    "langchain-community>=0.4.1",
    "jq>=1.10.0",
    "tavily-python>=0.7.17",
@@ -1,4 +1,4 @@
-import { useEffect, useState, useRef } from "react";
+import { useCallback, useEffect, useState, useRef } from "react";
 import { LogOut, Shield, PanelLeftClose, PanelLeftOpen, Menu, X } from "lucide-react";
 import { conversationService } from "../api/conversationService";
 import { userService } from "../api/userService";
@@ -63,9 +63,13 @@ export const ChatScreen = ({ setAuthenticated }: ChatScreenProps) => {
  const abortControllerRef = useRef<AbortController | null>(null);
  const simbaAnswers = ["meow.", "hiss...", "purrrrrr", "yowOWROWWowowr"];

-  const scrollToBottom = () => {
-    messagesEndRef.current?.scrollIntoView({ behavior: "smooth" });
-  };
+  const scrollToBottom = useCallback(() => {
+    requestAnimationFrame(() => {
+      messagesEndRef.current?.scrollIntoView({
+        behavior: isLoading ? "instant" : "smooth",
+      });
+    });
+  }, [isLoading]);

  useEffect(() => {
    isMountedRef.current = true;
@@ -116,21 +120,7 @@ export const ChatScreen = ({ setAuthenticated }: ChatScreenProps) => {
    scrollToBottom();
  }, [messages]);

-  useEffect(() => {
-    const load = async () => {
-      if (!selectedConversation) return;
-      try {
-        const conv = await conversationService.getConversation(selectedConversation.id);
-        setSelectedConversation({ id: conv.id, title: conv.name });
-        setMessages(conv.messages.map((m) => ({ text: m.text, speaker: m.speaker, image_key: m.image_key })));
-      } catch (err) {
-        console.error("Failed to load messages:", err);
-      }
-    };
-    load();
-  }, [selectedConversation?.id]);
-
-  const handleQuestionSubmit = async () => {
+  const handleQuestionSubmit = useCallback(async () => {
    if ((!query.trim() && !pendingImage) || isLoading) return;

    let activeConversation = selectedConversation;
@@ -211,22 +201,28 @@ export const ChatScreen = ({ setAuthenticated }: ChatScreenProps) => {
        }
      }
    } finally {
-      if (isMountedRef.current) setIsLoading(false);
+      if (isMountedRef.current) {
+        setIsLoading(false);
+        loadConversations();
+      }
      abortControllerRef.current = null;
    }
-  };
+  }, [query, pendingImage, isLoading, selectedConversation, simbaMode, messages, setAuthenticated]);

-  const handleQueryChange = (event: React.ChangeEvent<HTMLTextAreaElement>) => {
+  const handleQueryChange = useCallback((event: React.ChangeEvent<HTMLTextAreaElement>) => {
    setQuery(event.target.value);
-  };
+  }, []);

-  const handleKeyDown = (event: React.ChangeEvent<HTMLTextAreaElement>) => {
+  const handleKeyDown = useCallback((event: React.ChangeEvent<HTMLTextAreaElement>) => {
    const kev = event as unknown as React.KeyboardEvent<HTMLTextAreaElement>;
    if (kev.key === "Enter" && !kev.shiftKey) {
      kev.preventDefault();
      handleQuestionSubmit();
    }
-  };
+  }, [handleQuestionSubmit]);
+
+  const handleImageSelect = useCallback((file: File) => setPendingImage(file), []);
+  const handleClearImage = useCallback(() => setPendingImage(null), []);

  const handleLogout = () => {
    localStorage.removeItem("access_token");
@@ -380,8 +376,8 @@ export const ChatScreen = ({ setAuthenticated }: ChatScreenProps) => {
                setSimbaMode={setSimbaMode}
                isLoading={isLoading}
                pendingImage={pendingImage}
-                onImageSelect={(file) => setPendingImage(file)}
-                onClearImage={() => setPendingImage(null)}
+                onImageSelect={handleImageSelect}
+                onClearImage={handleClearImage}
              />
            </div>
          </div>
@@ -416,7 +412,7 @@ export const ChatScreen = ({ setAuthenticated }: ChatScreenProps) => {
              </div>
            </div>

-            <footer className="border-t border-sand-light/40 bg-cream/80 backdrop-blur-sm">
+            <footer className="border-t border-sand-light/40 bg-cream">
              <div className="max-w-2xl mx-auto px-4 py-3">
                <MessageInput
                  query={query}
@@ -1,4 +1,4 @@
-import { useRef, useState } from "react";
+import React, { useEffect, useMemo, useRef, useState } from "react";
 import { ArrowUp, ImagePlus, X } from "lucide-react";
 import { cn } from "../lib/utils";
 import { Textarea } from "./ui/textarea";
@@ -15,7 +15,7 @@ type MessageInputProps = {
  onClearImage: () => void;
 };

-export const MessageInput = ({
+export const MessageInput = React.memo(({
  query,
  handleKeyDown,
  handleQueryChange,
@@ -29,6 +29,18 @@ export const MessageInput = ({
  const [simbaMode, setLocalSimbaMode] = useState(false);
  const fileInputRef = useRef<HTMLInputElement>(null);

+  // Create blob URL once per file, revoke on cleanup
+  const previewUrl = useMemo(
+    () => (pendingImage ? URL.createObjectURL(pendingImage) : null),
+    [pendingImage],
+  );
+
+  useEffect(() => {
+    return () => {
+      if (previewUrl) URL.revokeObjectURL(previewUrl);
+    };
+  }, [previewUrl]);
+
  const toggleSimbaMode = () => {
    const next = !simbaMode;
    setLocalSimbaMode(next);
@@ -59,7 +71,7 @@ export const MessageInput = ({
        <div className="px-3 pt-3">
          <div className="relative inline-block">
            <img
-              src={URL.createObjectURL(pendingImage)}
+              src={previewUrl!}
              alt="Pending upload"
              className="h-20 rounded-lg object-cover border border-sand"
            />
@@ -145,4 +157,4 @@ export const MessageInput = ({
      </div>
    </div>
  );
-};
+});
@@ -6,19 +6,19 @@ import asyncio
 import sys

 from blueprints.rag.logic import (
+    delete_all_documents,
    get_vector_store_stats,
    index_documents,
    list_all_documents,
-    vector_store,
 )


 def stats():
    """Show vector store statistics."""
-    stats = get_vector_store_stats()
+    s = get_vector_store_stats()
    print("=== Vector Store Statistics ===")
-    print(f"Collection: {stats['collection_name']}")
-    print(f"Total Documents: {stats['total_documents']}")
+    print(f"Collection: {s['collection_name']}")
+    print(f"Total Documents: {s['total_documents']}")


 async def index():
@@ -26,23 +26,15 @@ async def index():
    print("Starting indexing process...")
    print("Fetching documents from Paperless-NGX...")
    await index_documents()
-    print("✓ Indexing complete!")
+    print("Indexing complete!")
    stats()


 async def reindex():
    """Clear and reindex all documents."""
    print("Clearing existing documents...")
-    collection = vector_store._collection
-    all_docs = collection.get()
-
-    if all_docs["ids"]:
-        print(f"Deleting {len(all_docs['ids'])} existing documents...")
-        collection.delete(ids=all_docs["ids"])
-        print("✓ Cleared")
-    else:
-        print("Collection is already empty")
-
+    delete_all_documents()
+    print("Cleared")
    await index()


@@ -113,7 +105,7 @@ Examples:
        print("\n\nOperation cancelled by user")
        sys.exit(1)
    except Exception as e:
-        print(f"\n❌ Error: {e}", file=sys.stderr)
+        print(f"\nError: {e}", file=sys.stderr)
        sys.exit(1)


@@ -1,24 +0,0 @@
-from bs4 import BeautifulSoup
-import chromadb
-import httpx
-
-client = chromadb.PersistentClient(path="/Users/ryanchen/Programs/raggr/chromadb")
-
-# Scrape
-BASE_URL = "https://www.vet.cornell.edu"
-LIST_URL = "/departments-centers-and-institutes/cornell-feline-health-center/health-information/feline-health-topics"
-
-QUERY_URL = BASE_URL + LIST_URL
-r = httpx.get(QUERY_URL)
-soup = BeautifulSoup(r.text)
-
-container = soup.find("div", class_="field-body")
-a_s = container.find_all("a", href=True)
-
-new_texts = []
-
-for link in a_s:
-    endpoint = link["href"]
-    query_url = BASE_URL + endpoint
-    r2 = httpx.get(query_url)
-    article_soup = BeautifulSoup(r2.text)
@@ -1,9 +1,6 @@
 #!/bin/bash
 set -e

-echo "Initializing directories..."
-mkdir -p /app/data/chromadb
-
 echo "Rebuilding frontend..."
 cd /app/raggr-frontend
 yarn build
@@ -1,139 +0,0 @@
-"""Tests for text preprocessing functions in utils/chunker.py."""
-
-from utils.chunker import (
-    remove_headers_footers,
-    remove_special_characters,
-    remove_repeated_substrings,
-    remove_extra_spaces,
-    preprocess_text,
-)
-
-
-class TestRemoveHeadersFooters:
-    def test_removes_default_header(self):
-        text = "Header Line\nActual content here"
-        result = remove_headers_footers(text)
-        assert "Header" not in result
-        assert "Actual content here" in result
-
-    def test_removes_default_footer(self):
-        text = "Actual content\nFooter Line"
-        result = remove_headers_footers(text)
-        assert "Footer" not in result
-        assert "Actual content" in result
-
-    def test_custom_patterns(self):
-        text = "PAGE 1\nContent\nCopyright 2024"
-        result = remove_headers_footers(
-            text,
-            header_patterns=[r"^PAGE \d+$"],
-            footer_patterns=[r"^Copyright.*$"],
-        )
-        assert "PAGE 1" not in result
-        assert "Copyright" not in result
-        assert "Content" in result
-
-    def test_no_match_preserves_text(self):
-        text = "Just normal content"
-        result = remove_headers_footers(text)
-        assert result == "Just normal content"
-
-    def test_empty_string(self):
-        assert remove_headers_footers("") == ""
-
-
-class TestRemoveSpecialCharacters:
-    def test_removes_special_chars(self):
-        text = "Hello @world #test $100"
-        result = remove_special_characters(text)
-        assert "@" not in result
-        assert "#" not in result
-        assert "$" not in result
-
-    def test_preserves_allowed_chars(self):
-        text = "Hello, world! How's it going? Yes-no."
-        result = remove_special_characters(text)
-        assert "," in result
-        assert "!" in result
-        assert "'" in result
-        assert "?" in result
-        assert "-" in result
-        assert "." in result
-
-    def test_custom_pattern(self):
-        text = "keep @this but not #that"
-        result = remove_special_characters(text, special_chars=r"[#]")
-        assert "@this" in result
-        assert "#" not in result
-
-    def test_empty_string(self):
-        assert remove_special_characters("") == ""
-
-
-class TestRemoveRepeatedSubstrings:
-    def test_collapses_dots(self):
-        text = "Item.....Value"
-        result = remove_repeated_substrings(text)
-        assert result == "Item.Value"
-
-    def test_single_dot_preserved(self):
-        text = "End of sentence."
-        result = remove_repeated_substrings(text)
-        assert result == "End of sentence."
-
-    def test_custom_pattern(self):
-        text = "hello---world"
-        result = remove_repeated_substrings(text, pattern=r"-{2,}")
-        # Function always replaces matched pattern with "."
-        assert result == "hello.world"
-
-    def test_empty_string(self):
-        assert remove_repeated_substrings("") == ""
-
-
-class TestRemoveExtraSpaces:
-    def test_collapses_multiple_blank_lines(self):
-        text = "Line 1\n\n\n\nLine 2"
-        result = remove_extra_spaces(text)
-        # After collapsing newlines to \n\n, then \s+ collapses everything to single spaces
-        assert "\n\n\n" not in result
-
-    def test_collapses_multiple_spaces(self):
-        text = "Hello    world"
-        result = remove_extra_spaces(text)
-        assert result == "Hello world"
-
-    def test_strips_whitespace(self):
-        text = "  Hello world  "
-        result = remove_extra_spaces(text)
-        assert result == "Hello world"
-
-    def test_empty_string(self):
-        assert remove_extra_spaces("") == ""
-
-
-class TestPreprocessText:
-    def test_full_pipeline(self):
-        text = "Header Info\nHello @world...  with   spaces\nFooter Info"
-        result = preprocess_text(text)
-        assert "Header" not in result
-        assert "Footer" not in result
-        assert "@" not in result
-        assert "..." not in result
-        assert "   " not in result
-
-    def test_preserves_meaningful_content(self):
-        text = "The cat weighs 10 pounds."
-        result = preprocess_text(text)
-        assert "cat" in result
-        assert "10" in result
-        assert "pounds" in result
-
-    def test_empty_string(self):
-        assert preprocess_text("") == ""
-
-    def test_already_clean(self):
-        text = "Simple clean text here."
-        result = preprocess_text(text)
-        assert "Simple" in result
-        assert "clean" in result
@@ -1,137 +0,0 @@
-import os
-from math import ceil
-import re
-from typing import Union
-from uuid import UUID, uuid4
-from chromadb.utils.embedding_functions.openai_embedding_function import (
-    OpenAIEmbeddingFunction,
-)
-from dotenv import load_dotenv
-from llm import LLMClient
-
-
-load_dotenv()
-
-
-def remove_headers_footers(text, header_patterns=None, footer_patterns=None):
-    if header_patterns is None:
-        header_patterns = [r"^.*Header.*$"]
-    if footer_patterns is None:
-        footer_patterns = [r"^.*Footer.*$"]
-
-    for pattern in header_patterns + footer_patterns:
-        text = re.sub(pattern, "", text, flags=re.MULTILINE)
-
-    return text.strip()
-
-
-def remove_special_characters(text, special_chars=None):
-    if special_chars is None:
-        special_chars = r"[^A-Za-z0-9\s\.,;:\'\"\?\!\-]"
-
-    text = re.sub(special_chars, "", text)
-    return text.strip()
-
-
-def remove_repeated_substrings(text, pattern=r"\.{2,}"):
-    text = re.sub(pattern, ".", text)
-    return text.strip()
-
-
-def remove_extra_spaces(text):
-    text = re.sub(r"\n\s*\n", "\n\n", text)
-    text = re.sub(r"\s+", " ", text)
-
-    return text.strip()
-
-
-def preprocess_text(text):
-    # Remove headers and footers
-    text = remove_headers_footers(text)
-
-    # Remove special characters
-    text = remove_special_characters(text)
-
-    # Remove repeated substrings like dots
-    text = remove_repeated_substrings(text)
-
-    # Remove extra spaces between lines and within lines
-    text = remove_extra_spaces(text)
-
-    # Additional cleaning steps can be added here
-
-    return text.strip()
-
-
-class Chunk:
-    def __init__(
-        self,
-        text: str,
-        size: int,
-        document_id: UUID,
-        chunk_id: int,
-        embedding,
-    ):
-        self.text = text
-        self.size = size
-        self.document_id = document_id
-        self.chunk_id = chunk_id
-        self.embedding = embedding
-
-
-class Chunker:
-    def __init__(self, collection) -> None:
-        self.collection = collection
-        self.llm_client = LLMClient()
-
-    def embedding_fx(self, inputs):
-        openai_embedding_fx = OpenAIEmbeddingFunction(
-            api_key=os.getenv("OPENAI_API_KEY"),
-            model_name="text-embedding-3-small",
-        )
-        return openai_embedding_fx(inputs)
-
-    def chunk_document(
-        self,
-        document: str,
-        chunk_size: int = 1000,
-        metadata: dict[str, Union[str, float]] = {},
-    ) -> list[Chunk]:
-        doc_uuid = uuid4()
-
-        chunk_size = min(chunk_size, len(document)) or 1
-
-        chunks = []
-        num_chunks = ceil(len(document) / chunk_size)
-        document_length = len(document)
-
-        for i in range(num_chunks):
-            curr_pos = i * num_chunks
-            to_pos = (
-                curr_pos + chunk_size
-                if curr_pos + chunk_size < document_length
-                else document_length
-            )
-            text_chunk = self.clean_document(document[curr_pos:to_pos])
-
-            embedding = self.embedding_fx([text_chunk])
-            self.collection.add(
-                ids=[str(doc_uuid) + ":" + str(i)],
-                documents=[text_chunk],
-                embeddings=embedding,
-                metadatas=[metadata],
-            )
-
-        return chunks
-
-    def clean_document(self, document: str) -> str:
-        """This function will remove information that is noise or already known.
-
-        Example: We already know all the things in here are Simba-related, so we don't need things like
-        "Sumamry of simba's visit"
-        """
-
-        document = document.replace("\\n", "")
-        document = document.strip()
-
-        return preprocess_text(document)
Author	SHA1	Message	Date
Ryan Chen	564a9b68a5	Enable async_mode on PGVector for async method support Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-24 08:53:21 -04:00
Ryan Chen	c157c37cde	Handle missing pgvector tables on first run _get_collection_id now catches the UndefinedTable error that occurs before the first index operation creates the langchain tables. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-24 08:49:00 -04:00
Ryan Chen	438399646f	Replace ChromaDB with pgvector for vector storage Consolidate onto PostgreSQL by using pgvector instead of a separate ChromaDB instance. This removes a Docker volume, a large dependency, and simplifies the stack without meaningful performance impact at our document scale. - Swap langchain-chroma for langchain-postgres (PGVector) - Use pgvector/pgvector:pg16 Docker image with init script - Lazy-initialize vector store to avoid eager DB connections - Add SQL helpers for stats/delete/list (replacing _collection access) - Remove legacy main.py, chunker, petmd scraper, and /api/query endpoint Re-index required after deploy (POST /api/rag/index + /index-obsidian). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-24 08:43:52 -04:00
ryan	9ed4ca126a	Merge pull request 'Fix mobile conversation launch resetting to homepage' (#27 ) from fix/mobile-conversation-launch into main Reviewed-on: #27	2026-04-09 22:09:55 -04:00
Ryan Chen	f3ae76ce68	Fix mobile conversation launch resetting to homepage Remove the useEffect on selectedConversation.id that race-conditions with handleQuestionSubmit — it fetches the (still-empty) conversation and wipes messages, sending the user back to the empty state. Refresh conversation list after streaming completes instead to pick up the auto-generated title. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-09 22:08:26 -04:00
ryan	7ee3bdef84	Merge pull request 'Simplify conversation naming to first message truncation' (#26 ) from feat/conversation-name-truncation into main Reviewed-on: #26	2026-04-09 22:04:33 -04:00
Ryan Chen	500c44feb1	Simplify conversation naming to truncate first message Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-09 22:02:10 -04:00
ryan	896501deb1	Merge pull request 'Add user memory for cross-conversation recall' (#25 ) from feat/user-memory into main Reviewed-on: #25	2026-04-09 21:54:04 -04:00
Ryan Chen	c95800e65d	Add user memory feature for cross-conversation recall Give the LangChain agent a save_user_memory tool so users can ask it to remember preferences and personal facts. Memories are stored per-user in a new user_memories table and injected into the system prompt on each conversation turn. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-09 21:53:14 -04:00
ryan	90372a6a6d	Merge pull request 'Order conversations by recency and auto-name from first message' (#24 ) from feat/conversation-ordering-and-naming into main Reviewed-on: #24	2026-04-05 10:43:09 -04:00
Ryan Chen	c01764243f	Order conversations by recency and auto-name from first message Conversations are now returned sorted by most recently updated first. New conversations are named using the first 100 characters of the user's initial message instead of a username+timestamp placeholder. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-05 10:34:48 -04:00
ryan	dfaac4caf8	Merge pull request 'Extend JWT token expiration times' (#23 ) from extend-jwt-expiration into main Reviewed-on: #23	2026-04-05 10:13:29 -04:00
ryan	17c3a2f888	Merge pull request 'Add redeploy Makefile target' (#20 ) from feat/makefile-redeploy into main Reviewed-on: #20	2026-04-05 10:13:01 -04:00
ryan	fa0f68e3b4	Merge pull request 'Fix OIDC login crash when groups claim is null' (#22 ) from fix/oidc-null-groups into main Reviewed-on: #22	2026-04-05 10:12:55 -04:00
Ryan Chen	a6c698c6bd	Fix OIDC login crash when groups claim is null Use `claims.get("groups") or []` instead of `claims.get("groups", [])` so that an explicit `null` value is coerced to an empty list, preventing a ValueError on the non-nullable ldap_groups field. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-05 10:12:12 -04:00
Ryan Chen	07c272c96a	Extend JWT token expiration times Access tokens now last 1 hour (up from default 15 min) and refresh tokens last 30 days, reducing frequent re-authentication. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-05 10:10:47 -04:00
ryan	975a337af4	Merge pull request 'Fix mobile performance degradation during typing and after image upload' (#21 ) from fix/mobile-input-performance into main Reviewed-on: #21	2026-04-05 06:59:39 -04:00
Ryan Chen	e644def141	Fix mobile performance degradation during typing and after image upload Memoize blob URL creation to prevent leak on every keystroke, wrap MessageInput in React.memo with stable useCallback props, remove expensive backdrop-blur-sm from chat footer, and use instant scroll during streaming to avoid queuing smooth scroll animations. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-05 06:58:53 -04:00
Ryan Chen	3671926430	Add redeploy Makefile target for quick pull-and-restart Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-04 09:10:10 -04:00