Replace ChromaDB with pgvector for vector storage

Consolidate onto PostgreSQL by using pgvector instead of a separate
ChromaDB instance. This removes a Docker volume, a large dependency,
and simplifies the stack without meaningful performance impact at
our document scale.

- Swap langchain-chroma for langchain-postgres (PGVector)
- Use pgvector/pgvector:pg16 Docker image with init script
- Lazy-initialize vector store to avoid eager DB connections
- Add SQL helpers for stats/delete/list (replacing _collection access)
- Remove legacy main.py, chunker, petmd scraper, and /api/query endpoint

Re-index required after deploy (POST /api/rag/index + /index-obsidian).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-24 08:43:52 -04:00
parent 9ed4ca126a
commit 438399646f
19 changed files with 241 additions and 1690 deletions
+1 -35
View File
@@ -3,7 +3,7 @@ import os
from datetime import timedelta
from dotenv import load_dotenv
from quart import Quart, jsonify, render_template, request, send_from_directory
from quart import Quart, jsonify, render_template, send_from_directory
from quart_jwt_extended import JWTManager, get_jwt_identity, jwt_refresh_token_required
from tortoise import Tortoise
@@ -15,7 +15,6 @@ import blueprints.users
import blueprints.whatsapp
import blueprints.users.models
from config.db import TORTOISE_CONFIG
from main import consult_simba_oracle
# Load environment variables
load_dotenv()
@@ -78,39 +77,6 @@ async def serve_react_app(path):
return await render_template("index.html")
@app.route("/api/query", methods=["POST"])
@jwt_refresh_token_required
async def query():
current_user_uuid = get_jwt_identity()
user = await blueprints.users.models.User.get(id=current_user_uuid)
data = await request.get_json()
query = data.get("query")
conversation_id = data.get("conversation_id")
conversation = await blueprints.conversation.logic.get_conversation_by_id(
conversation_id
)
await conversation.fetch_related("messages")
await blueprints.conversation.logic.add_message_to_conversation(
conversation=conversation,
message=query,
speaker="user",
user=user,
)
transcript = await blueprints.conversation.logic.get_conversation_transcript(
user=user, conversation=conversation
)
response = consult_simba_oracle(input=query, transcript=transcript)
await blueprints.conversation.logic.add_message_to_conversation(
conversation=conversation,
message=response,
speaker="simba",
user=user,
)
return jsonify({"response": response})
@app.route("/api/messages", methods=["GET"])
@jwt_refresh_token_required
async def get_messages():