Compare commits
44 Commits
quart-logi
...
713a058c4f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
713a058c4f | ||
|
|
12f7d9ead1 | ||
|
|
ad39904dda | ||
|
|
1fd2e860b2 | ||
|
|
7cfad5baba | ||
|
|
f68a79bdb7 | ||
|
|
52153cdf1e | ||
|
|
6eb3775e0f | ||
|
|
b3793d2d32 | ||
|
|
033429798e | ||
|
|
733ffae8cf | ||
|
|
0895668ddd | ||
|
|
07512409f1 | ||
|
|
12eb110313 | ||
|
|
1a026f76a1 | ||
|
|
da3a464897 | ||
|
|
913875188a | ||
|
|
f5e2d68cd2 | ||
|
|
70799ffb7d | ||
|
|
7f1d4fbdda | ||
|
|
5ebdd60ea0 | ||
|
|
289045e7d0 | ||
|
|
ceea83cb54 | ||
|
|
1b60aab97c | ||
|
|
210bfc1476 | ||
|
|
454fb1b52c | ||
|
|
c3f2501585 | ||
|
|
1da21fabee | ||
|
|
dd5690ee53 | ||
|
|
5e7ac28b6f | ||
|
|
29f8894e4a | ||
|
|
19d1df2f68 | ||
|
|
e577cb335b | ||
|
|
591788dfa4 | ||
|
|
561b5bddce | ||
|
|
ddd455a4c6 | ||
|
|
07424e77e0 | ||
|
|
a56f752917 | ||
|
|
e8264e80ce | ||
|
|
04350045d3 | ||
|
|
f16e13fccc | ||
|
|
245db92524 | ||
|
|
7161c09a4e | ||
|
|
68d73b62e8 |
@@ -1,16 +0,0 @@
|
|||||||
.git
|
|
||||||
.gitignore
|
|
||||||
README.md
|
|
||||||
.env
|
|
||||||
.DS_Store
|
|
||||||
chromadb/
|
|
||||||
chroma_db/
|
|
||||||
raggr-frontend/node_modules/
|
|
||||||
__pycache__/
|
|
||||||
*.pyc
|
|
||||||
*.pyo
|
|
||||||
*.pyd
|
|
||||||
.Python
|
|
||||||
.venv/
|
|
||||||
venv/
|
|
||||||
.pytest_cache/
|
|
||||||
46
.env.example
Normal file
46
.env.example
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
# Database Configuration
|
||||||
|
# PostgreSQL is recommended (required for OIDC features)
|
||||||
|
DATABASE_URL=postgres://raggr:changeme@postgres:5432/raggr
|
||||||
|
|
||||||
|
# PostgreSQL credentials (if using docker-compose postgres service)
|
||||||
|
POSTGRES_USER=raggr
|
||||||
|
POSTGRES_PASSWORD=changeme
|
||||||
|
POSTGRES_DB=raggr
|
||||||
|
|
||||||
|
# JWT Configuration
|
||||||
|
JWT_SECRET_KEY=your-secret-key-here
|
||||||
|
|
||||||
|
# Paperless Configuration
|
||||||
|
PAPERLESS_TOKEN=your-paperless-token
|
||||||
|
BASE_URL=192.168.1.5:8000
|
||||||
|
|
||||||
|
# Ollama Configuration
|
||||||
|
OLLAMA_URL=http://192.168.1.14:11434
|
||||||
|
OLLAMA_HOST=http://192.168.1.14:11434
|
||||||
|
|
||||||
|
# ChromaDB Configuration
|
||||||
|
# For Docker: This is automatically set to /app/data/chromadb
|
||||||
|
# For local development: Set to a local directory path
|
||||||
|
CHROMADB_PATH=./data/chromadb
|
||||||
|
|
||||||
|
# OpenAI Configuration
|
||||||
|
OPENAI_API_KEY=your-openai-api-key
|
||||||
|
|
||||||
|
# Immich Configuration
|
||||||
|
IMMICH_URL=http://192.168.1.5:2283
|
||||||
|
IMMICH_API_KEY=your-immich-api-key
|
||||||
|
SEARCH_QUERY=simba cat
|
||||||
|
DOWNLOAD_DIR=./simba_photos
|
||||||
|
|
||||||
|
# OIDC Configuration (Authelia)
|
||||||
|
OIDC_ISSUER=https://auth.example.com
|
||||||
|
OIDC_CLIENT_ID=simbarag
|
||||||
|
OIDC_CLIENT_SECRET=your-client-secret-here
|
||||||
|
OIDC_REDIRECT_URI=http://localhost:8080/
|
||||||
|
OIDC_USE_DISCOVERY=true
|
||||||
|
|
||||||
|
# Optional: Manual OIDC endpoints (if discovery is disabled)
|
||||||
|
# OIDC_AUTHORIZATION_ENDPOINT=https://auth.example.com/api/oidc/authorization
|
||||||
|
# OIDC_TOKEN_ENDPOINT=https://auth.example.com/api/oidc/token
|
||||||
|
# OIDC_USERINFO_ENDPOINT=https://auth.example.com/api/oidc/userinfo
|
||||||
|
# OIDC_JWKS_URI=https://auth.example.com/api/oidc/jwks
|
||||||
9
.gitignore
vendored
9
.gitignore
vendored
@@ -9,5 +9,12 @@ wheels/
|
|||||||
# Virtual environments
|
# Virtual environments
|
||||||
.venv
|
.venv
|
||||||
|
|
||||||
|
# Environment files
|
||||||
.env
|
.env
|
||||||
|
|
||||||
|
# Database files
|
||||||
|
chromadb/
|
||||||
|
chromadb_openai/
|
||||||
|
chroma_db/
|
||||||
|
database/
|
||||||
|
*.db
|
||||||
|
|||||||
6
.pre-commit-config.yaml
Normal file
6
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
repos:
|
||||||
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
|
rev: v0.8.2
|
||||||
|
hooks:
|
||||||
|
- id: ruff # Linter
|
||||||
|
- id: ruff-format # Formatter
|
||||||
@@ -1 +0,0 @@
|
|||||||
3.13
|
|
||||||
109
CLAUDE.md
Normal file
109
CLAUDE.md
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
# CLAUDE.md
|
||||||
|
|
||||||
|
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||||
|
|
||||||
|
## Project Overview
|
||||||
|
|
||||||
|
SimbaRAG is a RAG (Retrieval-Augmented Generation) conversational AI system for querying information about Simba (a cat). It ingests documents from Paperless-NGX, stores embeddings in ChromaDB, and uses LLMs (Ollama or OpenAI) to answer questions.
|
||||||
|
|
||||||
|
## Commands
|
||||||
|
|
||||||
|
### Development
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start dev environment with hot reload
|
||||||
|
docker compose -f docker-compose.dev.yml up --build
|
||||||
|
|
||||||
|
# View logs
|
||||||
|
docker compose -f docker-compose.dev.yml logs -f raggr
|
||||||
|
```
|
||||||
|
|
||||||
|
### Database Migrations (Aerich/Tortoise ORM)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Generate migration (must run in Docker with DB access)
|
||||||
|
docker compose -f docker-compose.dev.yml exec raggr aerich migrate --name describe_change
|
||||||
|
|
||||||
|
# Apply migrations (auto-runs on startup, manual if needed)
|
||||||
|
docker compose -f docker-compose.dev.yml exec raggr aerich upgrade
|
||||||
|
|
||||||
|
# View migration history
|
||||||
|
docker compose exec raggr aerich history
|
||||||
|
```
|
||||||
|
|
||||||
|
### Frontend
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd raggr-frontend
|
||||||
|
yarn install
|
||||||
|
yarn build # Production build
|
||||||
|
yarn dev # Dev server (rarely needed, backend serves frontend)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Production
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose build raggr
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────┐
|
||||||
|
│ Docker Compose │
|
||||||
|
├─────────────────────────────────────────────────────────────┤
|
||||||
|
│ raggr (port 8080) │ postgres (port 5432) │
|
||||||
|
│ ├── Quart backend │ PostgreSQL 16 │
|
||||||
|
│ ├── React frontend (served) │ │
|
||||||
|
│ └── ChromaDB (volume) │ │
|
||||||
|
└─────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
**Backend** (root directory):
|
||||||
|
- `app.py` - Quart application entry, serves API and static frontend
|
||||||
|
- `main.py` - RAG logic, document indexing, LLM interaction, LangChain agent
|
||||||
|
- `llm.py` - LLM client with Ollama primary, OpenAI fallback
|
||||||
|
- `aerich_config.py` - Database migration configuration
|
||||||
|
- `blueprints/` - API routes organized as Quart blueprints
|
||||||
|
- `users/` - OIDC auth, JWT tokens, RBAC with LDAP groups
|
||||||
|
- `conversation/` - Chat conversations and message history
|
||||||
|
- `rag/` - Document indexing endpoints (admin-only)
|
||||||
|
- `config/` - Configuration modules
|
||||||
|
- `oidc_config.py` - OIDC authentication configuration
|
||||||
|
- `utils/` - Reusable utilities
|
||||||
|
- `chunker.py` - Document chunking for embeddings
|
||||||
|
- `cleaner.py` - PDF cleaning and summarization
|
||||||
|
- `image_process.py` - Image description with LLM
|
||||||
|
- `request.py` - Paperless-NGX API client
|
||||||
|
- `scripts/` - Administrative and utility scripts
|
||||||
|
- `add_user.py` - Create users manually
|
||||||
|
- `user_message_stats.py` - User message statistics
|
||||||
|
- `manage_vectorstore.py` - Vector store management CLI
|
||||||
|
- `inspect_vector_store.py` - Inspect ChromaDB contents
|
||||||
|
- `query.py` - Query generation utilities
|
||||||
|
- `migrations/` - Database migration files
|
||||||
|
|
||||||
|
**Frontend** (`raggr-frontend/`):
|
||||||
|
- React 19 with Rsbuild bundler
|
||||||
|
- Tailwind CSS for styling
|
||||||
|
- Built to `dist/`, served by backend at `/`
|
||||||
|
|
||||||
|
**Auth Flow**: LLDAP → Authelia (OIDC) → Backend JWT → Frontend localStorage
|
||||||
|
|
||||||
|
## Key Patterns
|
||||||
|
|
||||||
|
- All endpoints are async (`async def`)
|
||||||
|
- Use `@jwt_refresh_token_required` for authenticated endpoints
|
||||||
|
- Use `@admin_required` for admin-only endpoints (checks `lldap_admin` group)
|
||||||
|
- Tortoise ORM models in `blueprints/*/models.py`
|
||||||
|
- Frontend API services in `raggr-frontend/src/api/`
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
See `.env.example`. Key ones:
|
||||||
|
- `DATABASE_URL` - PostgreSQL connection
|
||||||
|
- `OIDC_*` - Authelia OIDC configuration
|
||||||
|
- `OLLAMA_URL` - Local LLM server
|
||||||
|
- `OPENAI_API_KEY` - Fallback LLM
|
||||||
|
- `PAPERLESS_TOKEN` / `BASE_URL` - Document source
|
||||||
@@ -24,8 +24,10 @@ RUN uv pip install --system -e .
|
|||||||
# Copy application code
|
# Copy application code
|
||||||
COPY *.py ./
|
COPY *.py ./
|
||||||
COPY blueprints ./blueprints
|
COPY blueprints ./blueprints
|
||||||
COPY aerich.toml ./
|
|
||||||
COPY migrations ./migrations
|
COPY migrations ./migrations
|
||||||
|
COPY utils ./utils
|
||||||
|
COPY config ./config
|
||||||
|
COPY scripts ./scripts
|
||||||
COPY startup.sh ./
|
COPY startup.sh ./
|
||||||
RUN chmod +x startup.sh
|
RUN chmod +x startup.sh
|
||||||
|
|
||||||
@@ -35,8 +37,8 @@ WORKDIR /app/raggr-frontend
|
|||||||
RUN yarn install && yarn build
|
RUN yarn install && yarn build
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Create ChromaDB directory
|
# Create ChromaDB and database directories
|
||||||
RUN mkdir -p /app/chromadb
|
RUN mkdir -p /app/chromadb /app/database
|
||||||
|
|
||||||
# Expose port
|
# Expose port
|
||||||
EXPOSE 8080
|
EXPOSE 8080
|
||||||
|
|||||||
53
Dockerfile.dev
Normal file
53
Dockerfile.dev
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
FROM python:3.13-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies, Node.js, uv, and yarn
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
build-essential \
|
||||||
|
curl \
|
||||||
|
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
|
||||||
|
&& apt-get install -y nodejs \
|
||||||
|
&& npm install -g yarn \
|
||||||
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
|
&& curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
|
||||||
|
# Add uv to PATH
|
||||||
|
ENV PATH="/root/.local/bin:$PATH"
|
||||||
|
|
||||||
|
# Copy dependency files
|
||||||
|
COPY pyproject.toml ./
|
||||||
|
|
||||||
|
# Install Python dependencies using uv
|
||||||
|
RUN uv pip install --system -e .
|
||||||
|
|
||||||
|
# Copy frontend package files and install dependencies
|
||||||
|
COPY raggr-frontend/package.json raggr-frontend/yarn.lock* raggr-frontend/
|
||||||
|
WORKDIR /app/raggr-frontend
|
||||||
|
RUN yarn install
|
||||||
|
|
||||||
|
# Copy application source code
|
||||||
|
WORKDIR /app
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Build frontend
|
||||||
|
WORKDIR /app/raggr-frontend
|
||||||
|
RUN yarn build
|
||||||
|
|
||||||
|
# Create ChromaDB and database directories
|
||||||
|
WORKDIR /app
|
||||||
|
RUN mkdir -p /app/chromadb /app/database
|
||||||
|
|
||||||
|
# Make startup script executable
|
||||||
|
RUN chmod +x /app/startup-dev.sh
|
||||||
|
|
||||||
|
# Set environment variables
|
||||||
|
ENV PYTHONPATH=/app
|
||||||
|
ENV CHROMADB_PATH=/app/chromadb
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
# Expose port
|
||||||
|
EXPOSE 8080
|
||||||
|
|
||||||
|
# Default command
|
||||||
|
CMD ["/app/startup-dev.sh"]
|
||||||
371
README.md
371
README.md
@@ -1,7 +1,370 @@
|
|||||||
# simbarag
|
# SimbaRAG 🐱
|
||||||
|
|
||||||
**Goal:** Learn how retrieval-augmented generation works and also create a neat little tool to ask about Simba's health.
|
A Retrieval-Augmented Generation (RAG) conversational AI system for querying information about Simba the cat. Built with LangChain, ChromaDB, and modern web technologies.
|
||||||
|
|
||||||
**Current objectives:**
|
## Features
|
||||||
|
|
||||||
- [ ] Successfully use RAG to ask a question about existing information (e.g. how many teeth has Simba had extracted)
|
- 🤖 **Intelligent Conversations** - LangChain-powered agent with tool use and memory
|
||||||
|
- 📚 **Document Retrieval** - RAG system using ChromaDB vector store
|
||||||
|
- 🔍 **Web Search** - Integrated Tavily API for real-time web searches
|
||||||
|
- 🔐 **OIDC Authentication** - Secure auth via Authelia with LDAP group support
|
||||||
|
- 💬 **Multi-Conversation** - Manage multiple conversation threads per user
|
||||||
|
- 🎨 **Modern UI** - React 19 frontend with Tailwind CSS
|
||||||
|
- 🐳 **Docker Ready** - Containerized deployment with Docker Compose
|
||||||
|
|
||||||
|
## System Architecture
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
graph TB
|
||||||
|
subgraph "Client Layer"
|
||||||
|
Browser[Web Browser]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph "Frontend - React"
|
||||||
|
UI[React UI<br/>Tailwind CSS]
|
||||||
|
Auth[Auth Service]
|
||||||
|
API[API Client]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph "Backend - Quart/Python"
|
||||||
|
App[Quart App<br/>app.py]
|
||||||
|
|
||||||
|
subgraph "Blueprints"
|
||||||
|
Users[Users Blueprint<br/>OIDC + JWT]
|
||||||
|
Conv[Conversation Blueprint<br/>Chat Management]
|
||||||
|
RAG[RAG Blueprint<br/>Document Indexing]
|
||||||
|
end
|
||||||
|
|
||||||
|
Agent[LangChain Agent<br/>main.py]
|
||||||
|
LLM[LLM Client<br/>llm.py]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph "Tools & Utilities"
|
||||||
|
Search[Simba Search Tool]
|
||||||
|
Web[Web Search Tool<br/>Tavily]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph "Data Layer"
|
||||||
|
Postgres[(PostgreSQL<br/>Users & Conversations)]
|
||||||
|
Chroma[(ChromaDB<br/>Vector Store)]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph "External Services"
|
||||||
|
Authelia[Authelia<br/>OIDC Provider]
|
||||||
|
LLDAP[LLDAP<br/>User Directory]
|
||||||
|
Ollama[Ollama<br/>Local LLM]
|
||||||
|
OpenAI[OpenAI<br/>Fallback LLM]
|
||||||
|
Paperless[Paperless-NGX<br/>Documents]
|
||||||
|
TavilyAPI[Tavily API<br/>Web Search]
|
||||||
|
end
|
||||||
|
|
||||||
|
Browser --> UI
|
||||||
|
UI --> Auth
|
||||||
|
UI --> API
|
||||||
|
API --> App
|
||||||
|
|
||||||
|
App --> Users
|
||||||
|
App --> Conv
|
||||||
|
App --> RAG
|
||||||
|
|
||||||
|
Conv --> Agent
|
||||||
|
Agent --> Search
|
||||||
|
Agent --> Web
|
||||||
|
Agent --> LLM
|
||||||
|
|
||||||
|
Search --> Chroma
|
||||||
|
Web --> TavilyAPI
|
||||||
|
RAG --> Chroma
|
||||||
|
RAG --> Paperless
|
||||||
|
|
||||||
|
Users --> Postgres
|
||||||
|
Conv --> Postgres
|
||||||
|
|
||||||
|
Users --> Authelia
|
||||||
|
Authelia --> LLDAP
|
||||||
|
|
||||||
|
LLM --> Ollama
|
||||||
|
LLM -.Fallback.-> OpenAI
|
||||||
|
|
||||||
|
style Browser fill:#e1f5ff
|
||||||
|
style UI fill:#fff3cd
|
||||||
|
style App fill:#d4edda
|
||||||
|
style Agent fill:#d4edda
|
||||||
|
style Postgres fill:#f8d7da
|
||||||
|
style Chroma fill:#f8d7da
|
||||||
|
style Ollama fill:#e2e3e5
|
||||||
|
style OpenAI fill:#e2e3e5
|
||||||
|
```
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
- Docker & Docker Compose
|
||||||
|
- PostgreSQL (or use Docker)
|
||||||
|
- Ollama (optional, for local LLM)
|
||||||
|
- Paperless-NGX instance (for document source)
|
||||||
|
|
||||||
|
### Installation
|
||||||
|
|
||||||
|
1. **Clone the repository**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/yourusername/simbarag.git
|
||||||
|
cd simbarag
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Configure environment variables**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp .env.example .env
|
||||||
|
# Edit .env with your configuration
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Start the services**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Development (local PostgreSQL only)
|
||||||
|
docker compose -f docker-compose.dev.yml up -d
|
||||||
|
|
||||||
|
# Or full Docker deployment
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Access the application**
|
||||||
|
|
||||||
|
Open `http://localhost:8080` in your browser.
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
### Local Development Setup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Start PostgreSQL
|
||||||
|
docker compose -f docker-compose.dev.yml up -d
|
||||||
|
|
||||||
|
# 2. Set environment variables
|
||||||
|
export DATABASE_URL="postgres://raggr:raggr_dev_password@localhost:5432/raggr"
|
||||||
|
export CHROMADB_PATH="./chromadb"
|
||||||
|
export $(grep -v '^#' .env | xargs)
|
||||||
|
|
||||||
|
# 3. Install dependencies
|
||||||
|
pip install -r requirements.txt
|
||||||
|
cd raggr-frontend && yarn install && yarn build && cd ..
|
||||||
|
|
||||||
|
# 4. Run migrations
|
||||||
|
aerich upgrade
|
||||||
|
|
||||||
|
# 5. Start the server
|
||||||
|
python app.py
|
||||||
|
```
|
||||||
|
|
||||||
|
See [docs/development.md](docs/development.md) for detailed development guide.
|
||||||
|
|
||||||
|
## Project Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
simbarag/
|
||||||
|
├── app.py # Quart application entry point
|
||||||
|
├── main.py # RAG logic & LangChain agent
|
||||||
|
├── llm.py # LLM client with Ollama/OpenAI
|
||||||
|
├── aerich_config.py # Database migration configuration
|
||||||
|
│
|
||||||
|
├── blueprints/ # API route blueprints
|
||||||
|
│ ├── users/ # Authentication & authorization
|
||||||
|
│ ├── conversation/ # Chat conversations
|
||||||
|
│ └── rag/ # Document indexing
|
||||||
|
│
|
||||||
|
├── config/ # Configuration modules
|
||||||
|
│ └── oidc_config.py # OIDC authentication settings
|
||||||
|
│
|
||||||
|
├── utils/ # Reusable utilities
|
||||||
|
│ ├── chunker.py # Document chunking for embeddings
|
||||||
|
│ ├── cleaner.py # PDF cleaning and summarization
|
||||||
|
│ ├── image_process.py # Image description with LLM
|
||||||
|
│ └── request.py # Paperless-NGX API client
|
||||||
|
│
|
||||||
|
├── scripts/ # Administrative scripts
|
||||||
|
│ ├── add_user.py
|
||||||
|
│ ├── user_message_stats.py
|
||||||
|
│ ├── manage_vectorstore.py
|
||||||
|
│ └── inspect_vector_store.py
|
||||||
|
│
|
||||||
|
├── raggr-frontend/ # React frontend
|
||||||
|
│ └── src/
|
||||||
|
│
|
||||||
|
├── migrations/ # Database migrations
|
||||||
|
│
|
||||||
|
├── docs/ # Documentation
|
||||||
|
│ ├── index.md # Documentation hub
|
||||||
|
│ ├── development.md # Development guide
|
||||||
|
│ ├── deployment.md # Deployment & migrations
|
||||||
|
│ ├── VECTORSTORE.md # Vector store management
|
||||||
|
│ ├── MIGRATIONS.md # Migration reference
|
||||||
|
│ └── authentication.md # Authentication setup
|
||||||
|
│
|
||||||
|
├── docker-compose.yml # Production compose
|
||||||
|
├── docker-compose.dev.yml # Development compose
|
||||||
|
├── Dockerfile # Production Dockerfile
|
||||||
|
├── Dockerfile.dev # Development Dockerfile
|
||||||
|
├── CLAUDE.md # AI assistant instructions
|
||||||
|
└── README.md # This file
|
||||||
|
```
|
||||||
|
|
||||||
|
## Key Technologies
|
||||||
|
|
||||||
|
### Backend
|
||||||
|
- **Quart** - Async Python web framework
|
||||||
|
- **LangChain** - Agent framework with tool use
|
||||||
|
- **Tortoise ORM** - Async ORM for PostgreSQL
|
||||||
|
- **Aerich** - Database migration tool
|
||||||
|
- **ChromaDB** - Vector database for embeddings
|
||||||
|
- **OpenAI** - Embeddings & LLM (fallback)
|
||||||
|
- **Ollama** - Local LLM (primary)
|
||||||
|
|
||||||
|
### Frontend
|
||||||
|
- **React 19** - UI framework
|
||||||
|
- **Rsbuild** - Fast bundler
|
||||||
|
- **Tailwind CSS** - Utility-first styling
|
||||||
|
- **Axios** - HTTP client
|
||||||
|
|
||||||
|
### Authentication
|
||||||
|
- **Authelia** - OIDC provider
|
||||||
|
- **LLDAP** - Lightweight LDAP server
|
||||||
|
- **JWT** - Token-based auth
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### Authentication
|
||||||
|
- `GET /api/user/oidc/login` - Initiate OIDC login
|
||||||
|
- `GET /api/user/oidc/callback` - OIDC callback handler
|
||||||
|
- `POST /api/user/refresh` - Refresh JWT token
|
||||||
|
|
||||||
|
### Conversations
|
||||||
|
- `POST /api/conversation/` - Create conversation
|
||||||
|
- `GET /api/conversation/` - List conversations
|
||||||
|
- `GET /api/conversation/<id>` - Get conversation with messages
|
||||||
|
- `POST /api/conversation/query` - Send message and get response
|
||||||
|
|
||||||
|
### RAG (Admin Only)
|
||||||
|
- `GET /api/rag/stats` - Vector store statistics
|
||||||
|
- `POST /api/rag/index` - Index new documents
|
||||||
|
- `POST /api/rag/reindex` - Clear and reindex all
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
| Variable | Description | Default |
|
||||||
|
|----------|-------------|---------|
|
||||||
|
| `DATABASE_URL` | PostgreSQL connection string | `postgres://...` |
|
||||||
|
| `CHROMADB_PATH` | ChromaDB storage path | `./chromadb` |
|
||||||
|
| `OLLAMA_URL` | Ollama server URL | `http://localhost:11434` |
|
||||||
|
| `OPENAI_API_KEY` | OpenAI API key | - |
|
||||||
|
| `PAPERLESS_TOKEN` | Paperless-NGX API token | - |
|
||||||
|
| `BASE_URL` | Paperless-NGX base URL | - |
|
||||||
|
| `OIDC_ISSUER` | OIDC provider URL | - |
|
||||||
|
| `OIDC_CLIENT_ID` | OIDC client ID | - |
|
||||||
|
| `OIDC_CLIENT_SECRET` | OIDC client secret | - |
|
||||||
|
| `JWT_SECRET_KEY` | JWT signing key | - |
|
||||||
|
| `TAVILY_KEY` | Tavily web search API key | - |
|
||||||
|
|
||||||
|
See `.env.example` for full list.
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
### User Management
|
||||||
|
```bash
|
||||||
|
# Add a new user
|
||||||
|
python scripts/add_user.py
|
||||||
|
|
||||||
|
# View message statistics
|
||||||
|
python scripts/user_message_stats.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Vector Store Management
|
||||||
|
```bash
|
||||||
|
# Show vector store statistics
|
||||||
|
python scripts/manage_vectorstore.py stats
|
||||||
|
|
||||||
|
# Index new documents from Paperless
|
||||||
|
python scripts/manage_vectorstore.py index
|
||||||
|
|
||||||
|
# Clear and reindex everything
|
||||||
|
python scripts/manage_vectorstore.py reindex
|
||||||
|
|
||||||
|
# Inspect vector store contents
|
||||||
|
python scripts/inspect_vector_store.py
|
||||||
|
```
|
||||||
|
|
||||||
|
See [docs/vectorstore.md](docs/vectorstore.md) for details.
|
||||||
|
|
||||||
|
## Database Migrations
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Generate a new migration
|
||||||
|
aerich migrate --name "describe_your_changes"
|
||||||
|
|
||||||
|
# Apply pending migrations
|
||||||
|
aerich upgrade
|
||||||
|
|
||||||
|
# View migration history
|
||||||
|
aerich history
|
||||||
|
|
||||||
|
# Rollback last migration
|
||||||
|
aerich downgrade
|
||||||
|
```
|
||||||
|
|
||||||
|
See [docs/deployment.md](docs/deployment.md) for detailed migration workflows.
|
||||||
|
|
||||||
|
## LangChain Agent
|
||||||
|
|
||||||
|
The conversational agent has access to two tools:
|
||||||
|
|
||||||
|
1. **simba_search** - Query the vector store for Simba's documents
|
||||||
|
- Used for: Medical records, veterinary history, factual information
|
||||||
|
|
||||||
|
2. **web_search** - Search the web via Tavily API
|
||||||
|
- Used for: Recent events, external knowledge, general questions
|
||||||
|
|
||||||
|
The agent automatically selects the appropriate tool based on the user's query.
|
||||||
|
|
||||||
|
## Authentication Flow
|
||||||
|
|
||||||
|
```
|
||||||
|
User → Authelia (OIDC) → Backend (JWT) → Frontend (localStorage)
|
||||||
|
↓
|
||||||
|
LLDAP
|
||||||
|
```
|
||||||
|
|
||||||
|
1. User clicks "Login"
|
||||||
|
2. Frontend redirects to Authelia
|
||||||
|
3. User authenticates via Authelia (backed by LLDAP)
|
||||||
|
4. Authelia redirects back with authorization code
|
||||||
|
5. Backend exchanges code for OIDC tokens
|
||||||
|
6. Backend issues JWT tokens
|
||||||
|
7. Frontend stores tokens in localStorage
|
||||||
|
|
||||||
|
## Contributing
|
||||||
|
|
||||||
|
1. Fork the repository
|
||||||
|
2. Create a feature branch
|
||||||
|
3. Make your changes
|
||||||
|
4. Run tests and linting
|
||||||
|
5. Submit a pull request
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
|
- [Development Guide](docs/development.md) - Setup and development workflow
|
||||||
|
- [Deployment Guide](docs/deployment.md) - Deployment and migrations
|
||||||
|
- [Vector Store Guide](docs/vectorstore.md) - Managing the vector database
|
||||||
|
- [Authentication Guide](docs/authentication.md) - OIDC and LDAP setup
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
[Your License Here]
|
||||||
|
|
||||||
|
## Acknowledgments
|
||||||
|
|
||||||
|
- Built for Simba, the most important cat in the world 🐱
|
||||||
|
- Powered by LangChain, ChromaDB, and the open-source community
|
||||||
|
|||||||
@@ -1,7 +1,12 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
|
# Database configuration with environment variable support
|
||||||
|
# Use DATABASE_PATH for relative paths or DATABASE_URL for full connection strings
|
||||||
|
DATABASE_PATH = os.getenv("DATABASE_PATH", "database/raggr.db")
|
||||||
|
DATABASE_URL = os.getenv("DATABASE_URL", f"sqlite://{DATABASE_PATH}")
|
||||||
|
|
||||||
TORTOISE_ORM = {
|
TORTOISE_ORM = {
|
||||||
"connections": {"default": os.getenv("DATABASE_URL", "sqlite:///app/raggr.db")},
|
"connections": {"default": DATABASE_URL},
|
||||||
"apps": {
|
"apps": {
|
||||||
"models": {
|
"models": {
|
||||||
"models": [
|
"models": [
|
||||||
|
|||||||
40
app.py
40
app.py
@@ -1,16 +1,15 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from quart import Quart, request, jsonify, render_template, send_from_directory
|
from quart import Quart, jsonify, render_template, request, send_from_directory
|
||||||
|
from quart_jwt_extended import JWTManager, get_jwt_identity, jwt_refresh_token_required
|
||||||
from tortoise.contrib.quart import register_tortoise
|
from tortoise.contrib.quart import register_tortoise
|
||||||
|
|
||||||
from quart_jwt_extended import JWTManager, jwt_refresh_token_required, get_jwt_identity
|
|
||||||
|
|
||||||
from main import consult_simba_oracle
|
|
||||||
|
|
||||||
import blueprints.users
|
|
||||||
import blueprints.conversation
|
import blueprints.conversation
|
||||||
import blueprints.conversation.logic
|
import blueprints.conversation.logic
|
||||||
|
import blueprints.rag
|
||||||
|
import blueprints.users
|
||||||
import blueprints.users.models
|
import blueprints.users.models
|
||||||
|
from main import consult_simba_oracle
|
||||||
|
|
||||||
app = Quart(
|
app = Quart(
|
||||||
__name__,
|
__name__,
|
||||||
@@ -24,10 +23,16 @@ jwt = JWTManager(app)
|
|||||||
# Register blueprints
|
# Register blueprints
|
||||||
app.register_blueprint(blueprints.users.user_blueprint)
|
app.register_blueprint(blueprints.users.user_blueprint)
|
||||||
app.register_blueprint(blueprints.conversation.conversation_blueprint)
|
app.register_blueprint(blueprints.conversation.conversation_blueprint)
|
||||||
|
app.register_blueprint(blueprints.rag.rag_blueprint)
|
||||||
|
|
||||||
|
|
||||||
|
# Database configuration with environment variable support
|
||||||
|
DATABASE_URL = os.getenv(
|
||||||
|
"DATABASE_URL", "postgres://raggr:raggr_dev_password@localhost:5432/raggr"
|
||||||
|
)
|
||||||
|
|
||||||
TORTOISE_CONFIG = {
|
TORTOISE_CONFIG = {
|
||||||
"connections": {"default": "sqlite://raggr.db"},
|
"connections": {"default": DATABASE_URL},
|
||||||
"apps": {
|
"apps": {
|
||||||
"models": {
|
"models": {
|
||||||
"models": [
|
"models": [
|
||||||
@@ -69,9 +74,11 @@ async def query():
|
|||||||
user = await blueprints.users.models.User.get(id=current_user_uuid)
|
user = await blueprints.users.models.User.get(id=current_user_uuid)
|
||||||
data = await request.get_json()
|
data = await request.get_json()
|
||||||
query = data.get("query")
|
query = data.get("query")
|
||||||
conversation = await blueprints.conversation.logic.get_conversation_for_user(
|
conversation_id = data.get("conversation_id")
|
||||||
user=user
|
conversation = await blueprints.conversation.logic.get_conversation_by_id(
|
||||||
|
conversation_id
|
||||||
)
|
)
|
||||||
|
await conversation.fetch_related("messages")
|
||||||
await blueprints.conversation.logic.add_message_to_conversation(
|
await blueprints.conversation.logic.add_message_to_conversation(
|
||||||
conversation=conversation,
|
conversation=conversation,
|
||||||
message=query,
|
message=query,
|
||||||
@@ -79,7 +86,11 @@ async def query():
|
|||||||
user=user,
|
user=user,
|
||||||
)
|
)
|
||||||
|
|
||||||
response = consult_simba_oracle(query)
|
transcript = await blueprints.conversation.logic.get_conversation_transcript(
|
||||||
|
user=user, conversation=conversation
|
||||||
|
)
|
||||||
|
|
||||||
|
response = consult_simba_oracle(input=query, transcript=transcript)
|
||||||
await blueprints.conversation.logic.add_message_to_conversation(
|
await blueprints.conversation.logic.add_message_to_conversation(
|
||||||
conversation=conversation,
|
conversation=conversation,
|
||||||
message=response,
|
message=response,
|
||||||
@@ -113,10 +124,17 @@ async def get_messages():
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
name = conversation.name
|
||||||
|
if len(messages) > 8:
|
||||||
|
name = await blueprints.conversation.logic.rename_conversation(
|
||||||
|
user=user,
|
||||||
|
conversation=conversation,
|
||||||
|
)
|
||||||
|
|
||||||
return jsonify(
|
return jsonify(
|
||||||
{
|
{
|
||||||
"id": str(conversation.id),
|
"id": str(conversation.id),
|
||||||
"name": conversation.name,
|
"name": name,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"created_at": conversation.created_at.isoformat(),
|
"created_at": conversation.created_at.isoformat(),
|
||||||
"updated_at": conversation.updated_at.isoformat(),
|
"updated_at": conversation.updated_at.isoformat(),
|
||||||
|
|||||||
@@ -1,7 +1,23 @@
|
|||||||
from quart import Blueprint, jsonify
|
import datetime
|
||||||
|
|
||||||
|
from quart import Blueprint, jsonify, request
|
||||||
|
from quart_jwt_extended import (
|
||||||
|
get_jwt_identity,
|
||||||
|
jwt_refresh_token_required,
|
||||||
|
)
|
||||||
|
|
||||||
|
import blueprints.users.models
|
||||||
|
|
||||||
|
from .agents import main_agent
|
||||||
|
from .logic import (
|
||||||
|
add_message_to_conversation,
|
||||||
|
get_conversation_by_id,
|
||||||
|
rename_conversation,
|
||||||
|
)
|
||||||
from .models import (
|
from .models import (
|
||||||
Conversation,
|
Conversation,
|
||||||
PydConversation,
|
PydConversation,
|
||||||
|
PydListConversation,
|
||||||
)
|
)
|
||||||
|
|
||||||
conversation_blueprint = Blueprint(
|
conversation_blueprint = Blueprint(
|
||||||
@@ -9,9 +25,148 @@ conversation_blueprint = Blueprint(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@conversation_blueprint.post("/query")
|
||||||
|
@jwt_refresh_token_required
|
||||||
|
async def query():
|
||||||
|
current_user_uuid = get_jwt_identity()
|
||||||
|
user = await blueprints.users.models.User.get(id=current_user_uuid)
|
||||||
|
data = await request.get_json()
|
||||||
|
query = data.get("query")
|
||||||
|
conversation_id = data.get("conversation_id")
|
||||||
|
conversation = await get_conversation_by_id(conversation_id)
|
||||||
|
await conversation.fetch_related("messages")
|
||||||
|
await add_message_to_conversation(
|
||||||
|
conversation=conversation,
|
||||||
|
message=query,
|
||||||
|
speaker="user",
|
||||||
|
user=user,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build conversation history from recent messages (last 10 for context)
|
||||||
|
recent_messages = (
|
||||||
|
conversation.messages[-10:]
|
||||||
|
if len(conversation.messages) > 10
|
||||||
|
else conversation.messages
|
||||||
|
)
|
||||||
|
|
||||||
|
messages_payload = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": """You are a helpful cat assistant named Simba that understands veterinary terms. When there are questions to you specifically, they are referring to Simba the cat. Answer the user in as if you were a cat named Simba. Don't act too catlike. Be assertive.
|
||||||
|
|
||||||
|
SIMBA FACTS (as of January 2026):
|
||||||
|
- Name: Simba
|
||||||
|
- Species: Feline (Domestic Short Hair / American Short Hair)
|
||||||
|
- Sex: Male, Neutered
|
||||||
|
- Date of Birth: August 8, 2016 (approximately 9 years 5 months old)
|
||||||
|
- Color: Orange
|
||||||
|
- Current Weight: 16 lbs (as of 1/8/2026)
|
||||||
|
- Owner: Ryan Chen
|
||||||
|
- Location: Long Island City, NY
|
||||||
|
- Veterinarian: Court Square Animal Hospital
|
||||||
|
|
||||||
|
Medical Conditions:
|
||||||
|
- Hypertrophic Cardiomyopathy (HCM): Diagnosed 12/11/2025. Concentric left ventricular hypertrophy with no left atrial dilation. Grade II-III/VI systolic heart murmur. No cardiac medications currently needed. Must avoid Domitor, acepromazine, and ketamine during anesthesia.
|
||||||
|
- Dental Issues: Prior extraction of teeth 307 and 407 due to resorption. Tooth 107 extracted on 1/8/2026. Early resorption lesions present on teeth 207, 309, and 409.
|
||||||
|
|
||||||
|
Recent Medical Events:
|
||||||
|
- 1/8/2026: Dental cleaning and tooth 107 extraction. Prescribed Onsior for 3 days. Oravet sealant applied.
|
||||||
|
- 12/11/2025: Echocardiogram confirming HCM diagnosis. Pre-op bloodwork was normal.
|
||||||
|
- 12/1/2025: Visited for decreased appetite/nausea. Received subcutaneous fluids and Cerenia.
|
||||||
|
|
||||||
|
Diet & Lifestyle:
|
||||||
|
- Diet: Hill's I/D wet and dry food
|
||||||
|
- Supplements: Plaque Off
|
||||||
|
- Indoor only cat, only pet in the household
|
||||||
|
|
||||||
|
Upcoming Appointments:
|
||||||
|
- Rabies Vaccine: Due 2/19/2026
|
||||||
|
- Routine Examination: Due 6/1/2026
|
||||||
|
- FVRCP-3yr Vaccine: Due 10/2/2026
|
||||||
|
|
||||||
|
IMPORTANT: When users ask factual questions about Simba's health, medical history, veterinary visits, medications, weight, or any information that would be in documents, you MUST use the simba_search tool to retrieve accurate information before answering. Do not rely on general knowledge - always search the documents for factual questions.""",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
# Add recent conversation history
|
||||||
|
for msg in recent_messages[:-1]: # Exclude the message we just added
|
||||||
|
role = "user" if msg.speaker == "user" else "assistant"
|
||||||
|
messages_payload.append({"role": role, "content": msg.text})
|
||||||
|
|
||||||
|
# Add current query
|
||||||
|
messages_payload.append({"role": "user", "content": query})
|
||||||
|
|
||||||
|
payload = {"messages": messages_payload}
|
||||||
|
|
||||||
|
response = await main_agent.ainvoke(payload)
|
||||||
|
message = response.get("messages", [])[-1].content
|
||||||
|
await add_message_to_conversation(
|
||||||
|
conversation=conversation,
|
||||||
|
message=message,
|
||||||
|
speaker="simba",
|
||||||
|
user=user,
|
||||||
|
)
|
||||||
|
return jsonify({"response": message})
|
||||||
|
|
||||||
|
|
||||||
@conversation_blueprint.route("/<conversation_id>")
|
@conversation_blueprint.route("/<conversation_id>")
|
||||||
|
@jwt_refresh_token_required
|
||||||
async def get_conversation(conversation_id: str):
|
async def get_conversation(conversation_id: str):
|
||||||
conversation = await Conversation.get(id=conversation_id)
|
conversation = await Conversation.get(id=conversation_id)
|
||||||
serialized_conversation = await PydConversation.from_tortoise_orm(conversation)
|
current_user_uuid = get_jwt_identity()
|
||||||
|
user = await blueprints.users.models.User.get(id=current_user_uuid)
|
||||||
|
await conversation.fetch_related("messages")
|
||||||
|
|
||||||
return jsonify(serialized_conversation.model_dump_json())
|
# Manually serialize the conversation with messages
|
||||||
|
messages = []
|
||||||
|
for msg in conversation.messages:
|
||||||
|
messages.append(
|
||||||
|
{
|
||||||
|
"id": str(msg.id),
|
||||||
|
"text": msg.text,
|
||||||
|
"speaker": msg.speaker.value,
|
||||||
|
"created_at": msg.created_at.isoformat(),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
name = conversation.name
|
||||||
|
if len(messages) > 8 and "datetime" in name.lower():
|
||||||
|
name = await rename_conversation(
|
||||||
|
user=user,
|
||||||
|
conversation=conversation,
|
||||||
|
)
|
||||||
|
print(name)
|
||||||
|
|
||||||
|
return jsonify(
|
||||||
|
{
|
||||||
|
"id": str(conversation.id),
|
||||||
|
"name": name,
|
||||||
|
"messages": messages,
|
||||||
|
"created_at": conversation.created_at.isoformat(),
|
||||||
|
"updated_at": conversation.updated_at.isoformat(),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@conversation_blueprint.post("/")
|
||||||
|
@jwt_refresh_token_required
|
||||||
|
async def create_conversation():
|
||||||
|
user_uuid = get_jwt_identity()
|
||||||
|
user = await blueprints.users.models.User.get(id=user_uuid)
|
||||||
|
conversation = await Conversation.create(
|
||||||
|
name=f"{user.username} {datetime.datetime.now().timestamp}",
|
||||||
|
user=user,
|
||||||
|
)
|
||||||
|
|
||||||
|
serialized_conversation = await PydConversation.from_tortoise_orm(conversation)
|
||||||
|
return jsonify(serialized_conversation.model_dump())
|
||||||
|
|
||||||
|
|
||||||
|
@conversation_blueprint.get("/")
|
||||||
|
@jwt_refresh_token_required
|
||||||
|
async def get_all_conversations():
|
||||||
|
user_uuid = get_jwt_identity()
|
||||||
|
user = await blueprints.users.models.User.get(id=user_uuid)
|
||||||
|
conversations = Conversation.filter(user=user)
|
||||||
|
serialized_conversations = await PydListConversation.from_queryset(conversations)
|
||||||
|
|
||||||
|
return jsonify(serialized_conversations.model_dump())
|
||||||
|
|||||||
78
blueprints/conversation/agents.py
Normal file
78
blueprints/conversation/agents.py
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
import os
|
||||||
|
from typing import cast
|
||||||
|
|
||||||
|
from langchain.agents import create_agent
|
||||||
|
from langchain.chat_models import BaseChatModel
|
||||||
|
from langchain.tools import tool
|
||||||
|
from langchain_ollama import ChatOllama
|
||||||
|
from langchain_openai import ChatOpenAI
|
||||||
|
from tavily import AsyncTavilyClient
|
||||||
|
|
||||||
|
from blueprints.rag.logic import query_vector_store
|
||||||
|
|
||||||
|
openai_gpt_5_mini = ChatOpenAI(model="gpt-5-mini")
|
||||||
|
ollama_deepseek = ChatOllama(model="llama3.1:8b", base_url=os.getenv("OLLAMA_URL"))
|
||||||
|
model_with_fallback = cast(
|
||||||
|
BaseChatModel, ollama_deepseek.with_fallbacks([openai_gpt_5_mini])
|
||||||
|
)
|
||||||
|
client = AsyncTavilyClient(os.getenv("TAVILY_KEY"), "")
|
||||||
|
|
||||||
|
|
||||||
|
@tool
|
||||||
|
async def web_search(query: str) -> str:
|
||||||
|
"""Search the web for current information using Tavily.
|
||||||
|
|
||||||
|
Use this tool when you need to:
|
||||||
|
- Find current information not in the knowledge base
|
||||||
|
- Look up recent events, news, or updates
|
||||||
|
- Verify facts or get additional context
|
||||||
|
- Search for information outside of Simba's documents
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: The search query to look up on the web
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Search results from the web with titles, content, and source URLs
|
||||||
|
"""
|
||||||
|
response = await client.search(query=query, search_depth="basic")
|
||||||
|
results = response.get("results", [])
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
return "No results found for the query."
|
||||||
|
|
||||||
|
formatted = "\n\n".join(
|
||||||
|
[
|
||||||
|
f"**{result['title']}**\n{result['content']}\nSource: {result['url']}"
|
||||||
|
for result in results[:5]
|
||||||
|
]
|
||||||
|
)
|
||||||
|
return formatted
|
||||||
|
|
||||||
|
|
||||||
|
@tool(response_format="content_and_artifact")
|
||||||
|
async def simba_search(query: str):
|
||||||
|
"""Search through Simba's medical records, veterinary documents, and personal information.
|
||||||
|
|
||||||
|
Use this tool whenever the user asks questions about:
|
||||||
|
- Simba's health history, medical records, or veterinary visits
|
||||||
|
- Medications, treatments, or diagnoses
|
||||||
|
- Weight, diet, or physical characteristics over time
|
||||||
|
- Veterinary recommendations or advice
|
||||||
|
- Ryan's (the owner's) information related to Simba
|
||||||
|
- Any factual information that would be found in documents
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: The user's question or information need about Simba
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Relevant information from Simba's documents
|
||||||
|
"""
|
||||||
|
print(f"[SIMBA SEARCH] Tool called with query: {query}")
|
||||||
|
serialized, docs = await query_vector_store(query=query)
|
||||||
|
print(f"[SIMBA SEARCH] Found {len(docs)} documents")
|
||||||
|
print(f"[SIMBA SEARCH] Serialized result length: {len(serialized)}")
|
||||||
|
print(f"[SIMBA SEARCH] First 200 chars: {serialized[:200]}")
|
||||||
|
return serialized, docs
|
||||||
|
|
||||||
|
|
||||||
|
main_agent = create_agent(model=model_with_fallback, tools=[simba_search, web_search])
|
||||||
@@ -1,9 +1,10 @@
|
|||||||
import tortoise.exceptions
|
import tortoise.exceptions
|
||||||
|
from langchain_openai import ChatOpenAI
|
||||||
from .models import Conversation, ConversationMessage
|
|
||||||
|
|
||||||
import blueprints.users.models
|
import blueprints.users.models
|
||||||
|
|
||||||
|
from .models import Conversation, ConversationMessage, RenameConversationOutputSchema
|
||||||
|
|
||||||
|
|
||||||
async def create_conversation(name: str = "") -> Conversation:
|
async def create_conversation(name: str = "") -> Conversation:
|
||||||
conversation = await Conversation.create(name=name)
|
conversation = await Conversation.create(name=name)
|
||||||
@@ -44,3 +45,36 @@ async def get_conversation_for_user(user: blueprints.users.models.User) -> Conve
|
|||||||
await Conversation.get_or_create(name=f"{user.username}'s chat", user=user)
|
await Conversation.get_or_create(name=f"{user.username}'s chat", user=user)
|
||||||
|
|
||||||
return await Conversation.get(user=user)
|
return await Conversation.get(user=user)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_conversation_by_id(id: str) -> Conversation:
|
||||||
|
return await Conversation.get(id=id)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_conversation_transcript(
|
||||||
|
user: blueprints.users.models.User, conversation: Conversation
|
||||||
|
) -> str:
|
||||||
|
messages = []
|
||||||
|
for message in conversation.messages:
|
||||||
|
messages.append(f"{message.speaker} at {message.created_at}: {message.text}")
|
||||||
|
|
||||||
|
return "\n".join(messages)
|
||||||
|
|
||||||
|
|
||||||
|
async def rename_conversation(
|
||||||
|
user: blueprints.users.models.User,
|
||||||
|
conversation: Conversation,
|
||||||
|
) -> str:
|
||||||
|
messages: str = await get_conversation_transcript(
|
||||||
|
user=user, conversation=conversation
|
||||||
|
)
|
||||||
|
|
||||||
|
llm = ChatOpenAI(model="gpt-4o-mini")
|
||||||
|
structured_llm = llm.with_structured_output(RenameConversationOutputSchema)
|
||||||
|
|
||||||
|
prompt = f"Summarize the following conversation into a sassy one-liner title:\n\n{messages}"
|
||||||
|
response = structured_llm.invoke(prompt)
|
||||||
|
new_name: str = response.get("title", "")
|
||||||
|
conversation.name = new_name
|
||||||
|
await conversation.save()
|
||||||
|
return new_name
|
||||||
|
|||||||
@@ -1,11 +1,18 @@
|
|||||||
import enum
|
import enum
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from tortoise.models import Model
|
|
||||||
from tortoise import fields
|
from tortoise import fields
|
||||||
from tortoise.contrib.pydantic import (
|
from tortoise.contrib.pydantic import (
|
||||||
pydantic_queryset_creator,
|
|
||||||
pydantic_model_creator,
|
pydantic_model_creator,
|
||||||
|
pydantic_queryset_creator,
|
||||||
)
|
)
|
||||||
|
from tortoise.models import Model
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RenameConversationOutputSchema:
|
||||||
|
title: str
|
||||||
|
justification: str
|
||||||
|
|
||||||
|
|
||||||
class Speaker(enum.Enum):
|
class Speaker(enum.Enum):
|
||||||
@@ -40,5 +47,15 @@ class ConversationMessage(Model):
|
|||||||
|
|
||||||
|
|
||||||
PydConversationMessage = pydantic_model_creator(ConversationMessage)
|
PydConversationMessage = pydantic_model_creator(ConversationMessage)
|
||||||
PydConversation = pydantic_model_creator(Conversation, name="Conversation")
|
PydConversation = pydantic_model_creator(
|
||||||
|
Conversation, name="Conversation", allow_cycles=True, exclude=("user",)
|
||||||
|
)
|
||||||
|
PydConversationWithMessages = pydantic_model_creator(
|
||||||
|
Conversation,
|
||||||
|
name="ConversationWithMessages",
|
||||||
|
allow_cycles=True,
|
||||||
|
exclude=("user",),
|
||||||
|
include=("messages",),
|
||||||
|
)
|
||||||
|
PydListConversation = pydantic_queryset_creator(Conversation)
|
||||||
PydListConversationMessage = pydantic_queryset_creator(ConversationMessage)
|
PydListConversationMessage = pydantic_queryset_creator(ConversationMessage)
|
||||||
|
|||||||
47
blueprints/rag/__init__.py
Normal file
47
blueprints/rag/__init__.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
from quart import Blueprint, jsonify
|
||||||
|
from quart_jwt_extended import jwt_refresh_token_required
|
||||||
|
|
||||||
|
from .logic import get_vector_store_stats, index_documents, vector_store
|
||||||
|
from blueprints.users.decorators import admin_required
|
||||||
|
|
||||||
|
rag_blueprint = Blueprint("rag_api", __name__, url_prefix="/api/rag")
|
||||||
|
|
||||||
|
|
||||||
|
@rag_blueprint.get("/stats")
|
||||||
|
@jwt_refresh_token_required
|
||||||
|
async def get_stats():
|
||||||
|
"""Get vector store statistics."""
|
||||||
|
stats = get_vector_store_stats()
|
||||||
|
return jsonify(stats)
|
||||||
|
|
||||||
|
|
||||||
|
@rag_blueprint.post("/index")
|
||||||
|
@admin_required
|
||||||
|
async def trigger_index():
|
||||||
|
"""Trigger indexing of documents from Paperless-NGX. Admin only."""
|
||||||
|
try:
|
||||||
|
await index_documents()
|
||||||
|
stats = get_vector_store_stats()
|
||||||
|
return jsonify({"status": "success", "stats": stats})
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({"status": "error", "message": str(e)}), 500
|
||||||
|
|
||||||
|
|
||||||
|
@rag_blueprint.post("/reindex")
|
||||||
|
@admin_required
|
||||||
|
async def trigger_reindex():
|
||||||
|
"""Clear and reindex all documents. Admin only."""
|
||||||
|
try:
|
||||||
|
# Clear existing documents
|
||||||
|
collection = vector_store._collection
|
||||||
|
all_docs = collection.get()
|
||||||
|
|
||||||
|
if all_docs["ids"]:
|
||||||
|
collection.delete(ids=all_docs["ids"])
|
||||||
|
|
||||||
|
# Reindex
|
||||||
|
await index_documents()
|
||||||
|
stats = get_vector_store_stats()
|
||||||
|
return jsonify({"status": "success", "stats": stats})
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({"status": "error", "message": str(e)}), 500
|
||||||
75
blueprints/rag/fetchers.py
Normal file
75
blueprints/rag/fetchers.py
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
|
||||||
|
class PaperlessNGXService:
|
||||||
|
def __init__(self):
|
||||||
|
self.base_url = os.getenv("BASE_URL")
|
||||||
|
self.token = os.getenv("PAPERLESS_TOKEN")
|
||||||
|
self.url = f"http://{os.getenv('BASE_URL')}/api/documents/?tags__id=8"
|
||||||
|
self.headers = {"Authorization": f"Token {os.getenv('PAPERLESS_TOKEN')}"}
|
||||||
|
|
||||||
|
def get_data(self):
|
||||||
|
print(f"Getting data from: {self.url}")
|
||||||
|
r = httpx.get(self.url, headers=self.headers)
|
||||||
|
results = r.json()["results"]
|
||||||
|
|
||||||
|
nextLink = r.json().get("next")
|
||||||
|
|
||||||
|
while nextLink:
|
||||||
|
r = httpx.get(nextLink, headers=self.headers)
|
||||||
|
results += r.json()["results"]
|
||||||
|
nextLink = r.json().get("next")
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def get_doc_by_id(self, doc_id: int):
|
||||||
|
url = f"http://{os.getenv('BASE_URL')}/api/documents/{doc_id}/"
|
||||||
|
r = httpx.get(url, headers=self.headers)
|
||||||
|
return r.json()
|
||||||
|
|
||||||
|
def download_pdf_from_id(self, id: int) -> str:
|
||||||
|
download_url = f"http://{os.getenv('BASE_URL')}/api/documents/{id}/download/"
|
||||||
|
response = httpx.get(
|
||||||
|
download_url, headers=self.headers, follow_redirects=True, timeout=30
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
# Use a temporary file for the downloaded PDF
|
||||||
|
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
|
||||||
|
temp_file.write(response.content)
|
||||||
|
temp_file.close()
|
||||||
|
temp_pdf_path = temp_file.name
|
||||||
|
pdf_to_process = temp_pdf_path
|
||||||
|
return pdf_to_process
|
||||||
|
|
||||||
|
def upload_cleaned_content(self, document_id, data):
|
||||||
|
PUTS_URL = f"http://{os.getenv('BASE_URL')}/api/documents/{document_id}/"
|
||||||
|
r = httpx.put(PUTS_URL, headers=self.headers, data=data)
|
||||||
|
r.raise_for_status()
|
||||||
|
|
||||||
|
def upload_description(self, description_filepath, file, title, exif_date: str):
|
||||||
|
POST_URL = f"http://{os.getenv('BASE_URL')}/api/documents/post_document/"
|
||||||
|
files = {"document": ("description_filepath", file, "application/txt")}
|
||||||
|
data = {
|
||||||
|
"title": title,
|
||||||
|
"create": exif_date,
|
||||||
|
"document_type": 3,
|
||||||
|
"tags": [7],
|
||||||
|
}
|
||||||
|
|
||||||
|
r = httpx.post(POST_URL, headers=self.headers, data=data, files=files)
|
||||||
|
r.raise_for_status()
|
||||||
|
|
||||||
|
def get_tags(self):
|
||||||
|
GET_URL = f"http://{os.getenv('BASE_URL')}/api/tags/"
|
||||||
|
r = httpx.get(GET_URL, headers=self.headers)
|
||||||
|
data = r.json()
|
||||||
|
return {tag["id"]: tag["name"] for tag in data["results"]}
|
||||||
|
|
||||||
|
def get_doctypes(self):
|
||||||
|
GET_URL = f"http://{os.getenv('BASE_URL')}/api/document_types/"
|
||||||
|
r = httpx.get(GET_URL, headers=self.headers)
|
||||||
|
data = r.json()
|
||||||
|
return {doctype["id"]: doctype["name"] for doctype in data["results"]}
|
||||||
101
blueprints/rag/logic.py
Normal file
101
blueprints/rag/logic.py
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
import datetime
|
||||||
|
import os
|
||||||
|
|
||||||
|
from langchain_chroma import Chroma
|
||||||
|
from langchain_core.documents import Document
|
||||||
|
from langchain_openai import OpenAIEmbeddings
|
||||||
|
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||||
|
|
||||||
|
from .fetchers import PaperlessNGXService
|
||||||
|
|
||||||
|
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
|
||||||
|
|
||||||
|
vector_store = Chroma(
|
||||||
|
collection_name="simba_docs",
|
||||||
|
embedding_function=embeddings,
|
||||||
|
persist_directory=os.getenv("CHROMADB_PATH", ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
text_splitter = RecursiveCharacterTextSplitter(
|
||||||
|
chunk_size=1000, # chunk size (characters)
|
||||||
|
chunk_overlap=200, # chunk overlap (characters)
|
||||||
|
add_start_index=True, # track index in original document
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def date_to_epoch(date_str: str) -> float:
|
||||||
|
split_date = date_str.split("-")
|
||||||
|
date = datetime.datetime(
|
||||||
|
int(split_date[0]),
|
||||||
|
int(split_date[1]),
|
||||||
|
int(split_date[2]),
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
)
|
||||||
|
|
||||||
|
return date.timestamp()
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_documents_from_paperless_ngx() -> list[Document]:
|
||||||
|
ppngx = PaperlessNGXService()
|
||||||
|
data = ppngx.get_data()
|
||||||
|
doctypes = ppngx.get_doctypes()
|
||||||
|
documents = []
|
||||||
|
for doc in data:
|
||||||
|
metadata = {
|
||||||
|
"created_date": date_to_epoch(doc["created_date"]),
|
||||||
|
"filename": doc["original_file_name"],
|
||||||
|
"document_type": doctypes.get(doc["document_type"], ""),
|
||||||
|
}
|
||||||
|
documents.append(Document(page_content=doc["content"], metadata=metadata))
|
||||||
|
|
||||||
|
return documents
|
||||||
|
|
||||||
|
|
||||||
|
async def index_documents():
|
||||||
|
documents = await fetch_documents_from_paperless_ngx()
|
||||||
|
|
||||||
|
splits = text_splitter.split_documents(documents)
|
||||||
|
await vector_store.aadd_documents(documents=splits)
|
||||||
|
|
||||||
|
|
||||||
|
async def query_vector_store(query: str):
|
||||||
|
retrieved_docs = await vector_store.asimilarity_search(query, k=2)
|
||||||
|
serialized = "\n\n".join(
|
||||||
|
(f"Source: {doc.metadata}\nContent: {doc.page_content}")
|
||||||
|
for doc in retrieved_docs
|
||||||
|
)
|
||||||
|
return serialized, retrieved_docs
|
||||||
|
|
||||||
|
|
||||||
|
def get_vector_store_stats():
|
||||||
|
"""Get statistics about the vector store."""
|
||||||
|
collection = vector_store._collection
|
||||||
|
count = collection.count()
|
||||||
|
return {
|
||||||
|
"total_documents": count,
|
||||||
|
"collection_name": collection.name,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def list_all_documents(limit: int = 10):
|
||||||
|
"""List documents in the vector store with their metadata."""
|
||||||
|
collection = vector_store._collection
|
||||||
|
results = collection.get(limit=limit, include=["metadatas", "documents"])
|
||||||
|
|
||||||
|
documents = []
|
||||||
|
for i, doc_id in enumerate(results["ids"]):
|
||||||
|
documents.append(
|
||||||
|
{
|
||||||
|
"id": doc_id,
|
||||||
|
"metadata": results["metadatas"][i]
|
||||||
|
if results.get("metadatas")
|
||||||
|
else None,
|
||||||
|
"content_preview": results["documents"][i][:200]
|
||||||
|
if results.get("documents")
|
||||||
|
else None,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return documents
|
||||||
0
blueprints/rag/models.py
Normal file
0
blueprints/rag/models.py
Normal file
@@ -6,13 +6,169 @@ from quart_jwt_extended import (
|
|||||||
get_jwt_identity,
|
get_jwt_identity,
|
||||||
)
|
)
|
||||||
from .models import User
|
from .models import User
|
||||||
|
from .oidc_service import OIDCUserService
|
||||||
|
from config.oidc_config import oidc_config
|
||||||
|
import secrets
|
||||||
|
import httpx
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
import hashlib
|
||||||
|
import base64
|
||||||
|
|
||||||
|
|
||||||
user_blueprint = Blueprint("user_api", __name__, url_prefix="/api/user")
|
user_blueprint = Blueprint("user_api", __name__, url_prefix="/api/user")
|
||||||
|
|
||||||
|
# In-memory storage for OIDC state/PKCE (production: use Redis or database)
|
||||||
|
# Format: {state: {"pkce_verifier": str, "redirect_after_login": str}}
|
||||||
|
_oidc_sessions = {}
|
||||||
|
|
||||||
|
|
||||||
|
@user_blueprint.route("/oidc/login", methods=["GET"])
|
||||||
|
async def oidc_login():
|
||||||
|
"""
|
||||||
|
Initiate OIDC login flow
|
||||||
|
Generates PKCE parameters and redirects to Authelia
|
||||||
|
"""
|
||||||
|
if not oidc_config.validate_config():
|
||||||
|
return jsonify({"error": "OIDC not configured"}), 500
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Generate PKCE parameters
|
||||||
|
code_verifier = secrets.token_urlsafe(64)
|
||||||
|
|
||||||
|
# For PKCE, we need code_challenge = BASE64URL(SHA256(code_verifier))
|
||||||
|
code_challenge = (
|
||||||
|
base64.urlsafe_b64encode(hashlib.sha256(code_verifier.encode()).digest())
|
||||||
|
.decode()
|
||||||
|
.rstrip("=")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Generate state for CSRF protection
|
||||||
|
state = secrets.token_urlsafe(32)
|
||||||
|
|
||||||
|
# Store PKCE verifier and state for callback validation
|
||||||
|
_oidc_sessions[state] = {
|
||||||
|
"pkce_verifier": code_verifier,
|
||||||
|
"redirect_after_login": request.args.get("redirect", "/"),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get authorization endpoint from discovery
|
||||||
|
discovery = await oidc_config.get_discovery_document()
|
||||||
|
auth_endpoint = discovery.get("authorization_endpoint")
|
||||||
|
|
||||||
|
# Build authorization URL
|
||||||
|
params = {
|
||||||
|
"client_id": oidc_config.client_id,
|
||||||
|
"response_type": "code",
|
||||||
|
"redirect_uri": oidc_config.redirect_uri,
|
||||||
|
"scope": "openid email profile groups",
|
||||||
|
"state": state,
|
||||||
|
"code_challenge": code_challenge,
|
||||||
|
"code_challenge_method": "S256",
|
||||||
|
}
|
||||||
|
|
||||||
|
auth_url = f"{auth_endpoint}?{urlencode(params)}"
|
||||||
|
|
||||||
|
return jsonify({"auth_url": auth_url})
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({"error": f"OIDC login failed: {str(e)}"}), 500
|
||||||
|
|
||||||
|
|
||||||
|
@user_blueprint.route("/oidc/callback", methods=["GET"])
|
||||||
|
async def oidc_callback():
|
||||||
|
"""
|
||||||
|
Handle OIDC callback from Authelia
|
||||||
|
Exchanges authorization code for tokens, verifies ID token, and creates/updates user
|
||||||
|
"""
|
||||||
|
# Get authorization code and state from callback
|
||||||
|
code = request.args.get("code")
|
||||||
|
state = request.args.get("state")
|
||||||
|
error = request.args.get("error")
|
||||||
|
|
||||||
|
if error:
|
||||||
|
return jsonify({"error": f"OIDC error: {error}"}), 400
|
||||||
|
|
||||||
|
if not code or not state:
|
||||||
|
return jsonify({"error": "Missing code or state"}), 400
|
||||||
|
|
||||||
|
# Validate state and retrieve PKCE verifier
|
||||||
|
session = _oidc_sessions.pop(state, None)
|
||||||
|
if not session:
|
||||||
|
return jsonify({"error": "Invalid or expired state"}), 400
|
||||||
|
|
||||||
|
pkce_verifier = session["pkce_verifier"]
|
||||||
|
|
||||||
|
# Exchange authorization code for tokens
|
||||||
|
discovery = await oidc_config.get_discovery_document()
|
||||||
|
token_endpoint = discovery.get("token_endpoint")
|
||||||
|
|
||||||
|
token_data = {
|
||||||
|
"grant_type": "authorization_code",
|
||||||
|
"code": code,
|
||||||
|
"redirect_uri": oidc_config.redirect_uri,
|
||||||
|
"client_id": oidc_config.client_id,
|
||||||
|
"client_secret": oidc_config.client_secret,
|
||||||
|
"code_verifier": pkce_verifier,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Use client_secret_post method (credentials in POST body)
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
token_response = await client.post(token_endpoint, data=token_data)
|
||||||
|
|
||||||
|
if token_response.status_code != 200:
|
||||||
|
return jsonify(
|
||||||
|
{"error": f"Failed to exchange code for token: {token_response.text}"}
|
||||||
|
), 400
|
||||||
|
|
||||||
|
tokens = token_response.json()
|
||||||
|
|
||||||
|
id_token = tokens.get("id_token")
|
||||||
|
if not id_token:
|
||||||
|
return jsonify({"error": "No ID token received"}), 400
|
||||||
|
|
||||||
|
# Verify ID token
|
||||||
|
try:
|
||||||
|
claims = await oidc_config.verify_id_token(id_token)
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({"error": f"ID token verification failed: {str(e)}"}), 400
|
||||||
|
|
||||||
|
# Get or create user from OIDC claims
|
||||||
|
user = await OIDCUserService.get_or_create_user_from_oidc(claims)
|
||||||
|
|
||||||
|
# Issue backend JWT tokens
|
||||||
|
access_token = create_access_token(identity=str(user.id))
|
||||||
|
refresh_token = create_refresh_token(identity=str(user.id))
|
||||||
|
|
||||||
|
# Return tokens to frontend
|
||||||
|
# Frontend will handle storing these and redirecting
|
||||||
|
return jsonify(
|
||||||
|
access_token=access_token,
|
||||||
|
refresh_token=refresh_token,
|
||||||
|
user={
|
||||||
|
"id": str(user.id),
|
||||||
|
"username": user.username,
|
||||||
|
"email": user.email,
|
||||||
|
"groups": user.ldap_groups,
|
||||||
|
"is_admin": user.is_admin(),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@user_blueprint.route("/refresh", methods=["POST"])
|
||||||
|
@jwt_refresh_token_required
|
||||||
|
async def refresh():
|
||||||
|
"""Refresh access token (unchanged from original)"""
|
||||||
|
user_id = get_jwt_identity()
|
||||||
|
new_token = create_access_token(identity=user_id)
|
||||||
|
return jsonify(access_token=new_token)
|
||||||
|
|
||||||
|
|
||||||
|
# Legacy username/password login - kept for backward compatibility during migration
|
||||||
@user_blueprint.route("/login", methods=["POST"])
|
@user_blueprint.route("/login", methods=["POST"])
|
||||||
async def login():
|
async def login():
|
||||||
|
"""
|
||||||
|
Legacy username/password login
|
||||||
|
This can be removed after full OIDC migration is complete
|
||||||
|
"""
|
||||||
data = await request.get_json()
|
data = await request.get_json()
|
||||||
username = data.get("username")
|
username = data.get("username")
|
||||||
password = data.get("password")
|
password = data.get("password")
|
||||||
@@ -28,13 +184,5 @@ async def login():
|
|||||||
return jsonify(
|
return jsonify(
|
||||||
access_token=access_token,
|
access_token=access_token,
|
||||||
refresh_token=refresh_token,
|
refresh_token=refresh_token,
|
||||||
user={"id": user.id, "username": user.username},
|
user={"id": str(user.id), "username": user.username},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@user_blueprint.route("/refresh", methods=["POST"])
|
|
||||||
@jwt_refresh_token_required
|
|
||||||
async def refresh():
|
|
||||||
user_id = get_jwt_identity()
|
|
||||||
new_token = create_access_token(identity=user_id)
|
|
||||||
return jsonify(access_token=new_token)
|
|
||||||
|
|||||||
26
blueprints/users/decorators.py
Normal file
26
blueprints/users/decorators.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
"""
|
||||||
|
Authentication decorators for role-based access control.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from functools import wraps
|
||||||
|
from quart import jsonify
|
||||||
|
from quart_jwt_extended import jwt_refresh_token_required, get_jwt_identity
|
||||||
|
from .models import User
|
||||||
|
|
||||||
|
|
||||||
|
def admin_required(fn):
|
||||||
|
"""
|
||||||
|
Decorator that requires the user to be an admin (member of lldap_admin group).
|
||||||
|
Must be used on async route handlers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@wraps(fn)
|
||||||
|
@jwt_refresh_token_required
|
||||||
|
async def wrapper(*args, **kwargs):
|
||||||
|
user_id = get_jwt_identity()
|
||||||
|
user = await User.get_or_none(id=user_id)
|
||||||
|
if not user or not user.is_admin():
|
||||||
|
return jsonify({"error": "Admin access required"}), 403
|
||||||
|
return await fn(*args, **kwargs)
|
||||||
|
|
||||||
|
return wrapper
|
||||||
@@ -8,14 +8,32 @@ import bcrypt
|
|||||||
class User(Model):
|
class User(Model):
|
||||||
id = fields.UUIDField(primary_key=True)
|
id = fields.UUIDField(primary_key=True)
|
||||||
username = fields.CharField(max_length=255)
|
username = fields.CharField(max_length=255)
|
||||||
password = fields.BinaryField() # Hashed
|
password = fields.BinaryField(null=True) # Hashed - nullable for OIDC users
|
||||||
email = fields.CharField(max_length=100, unique=True)
|
email = fields.CharField(max_length=100, unique=True)
|
||||||
|
|
||||||
|
# OIDC fields
|
||||||
|
oidc_subject = fields.CharField(
|
||||||
|
max_length=255, unique=True, null=True, index=True
|
||||||
|
) # "sub" claim from OIDC
|
||||||
|
auth_provider = fields.CharField(
|
||||||
|
max_length=50, default="local"
|
||||||
|
) # "local" or "oidc"
|
||||||
|
ldap_groups = fields.JSONField(default=[]) # LDAP groups from OIDC claims
|
||||||
|
|
||||||
created_at = fields.DatetimeField(auto_now_add=True)
|
created_at = fields.DatetimeField(auto_now_add=True)
|
||||||
updated_at = fields.DatetimeField(auto_now=True)
|
updated_at = fields.DatetimeField(auto_now=True)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table = "users"
|
table = "users"
|
||||||
|
|
||||||
|
def has_group(self, group: str) -> bool:
|
||||||
|
"""Check if user belongs to a specific LDAP group."""
|
||||||
|
return group in (self.ldap_groups or [])
|
||||||
|
|
||||||
|
def is_admin(self) -> bool:
|
||||||
|
"""Check if user is an admin (member of lldap_admin group)."""
|
||||||
|
return self.has_group("lldap_admin")
|
||||||
|
|
||||||
def set_password(self, plain_password: str):
|
def set_password(self, plain_password: str):
|
||||||
self.password = bcrypt.hashpw(
|
self.password = bcrypt.hashpw(
|
||||||
plain_password.encode("utf-8"),
|
plain_password.encode("utf-8"),
|
||||||
@@ -23,4 +41,6 @@ class User(Model):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def verify_password(self, plain_password: str):
|
def verify_password(self, plain_password: str):
|
||||||
|
if not self.password:
|
||||||
|
return False
|
||||||
return bcrypt.checkpw(plain_password.encode("utf-8"), self.password)
|
return bcrypt.checkpw(plain_password.encode("utf-8"), self.password)
|
||||||
|
|||||||
81
blueprints/users/oidc_service.py
Normal file
81
blueprints/users/oidc_service.py
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
"""
|
||||||
|
OIDC User Management Service
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Dict, Any, Optional
|
||||||
|
from uuid import uuid4
|
||||||
|
from .models import User
|
||||||
|
|
||||||
|
|
||||||
|
class OIDCUserService:
|
||||||
|
"""Service for managing OIDC user authentication and provisioning"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def get_or_create_user_from_oidc(claims: Dict[str, Any]) -> User:
|
||||||
|
"""
|
||||||
|
Get existing user by OIDC subject, or create new user from OIDC claims
|
||||||
|
|
||||||
|
Args:
|
||||||
|
claims: Decoded OIDC ID token claims
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
User object (existing or newly created)
|
||||||
|
"""
|
||||||
|
oidc_subject = claims.get("sub")
|
||||||
|
if not oidc_subject:
|
||||||
|
raise ValueError("No 'sub' claim in ID token")
|
||||||
|
|
||||||
|
# Try to find existing user by OIDC subject
|
||||||
|
user = await User.filter(oidc_subject=oidc_subject).first()
|
||||||
|
|
||||||
|
if user:
|
||||||
|
# Update user info from latest claims (optional)
|
||||||
|
user.email = claims.get("email", user.email)
|
||||||
|
user.username = (
|
||||||
|
claims.get("preferred_username") or claims.get("name") or user.username
|
||||||
|
)
|
||||||
|
# Update LDAP groups from claims
|
||||||
|
user.ldap_groups = claims.get("groups", [])
|
||||||
|
await user.save()
|
||||||
|
return user
|
||||||
|
|
||||||
|
# Check if user exists by email (migration case)
|
||||||
|
email = claims.get("email")
|
||||||
|
if email:
|
||||||
|
user = await User.filter(email=email, auth_provider="local").first()
|
||||||
|
if user:
|
||||||
|
# Migrate existing local user to OIDC
|
||||||
|
user.oidc_subject = oidc_subject
|
||||||
|
user.auth_provider = "oidc"
|
||||||
|
user.password = None # Clear password
|
||||||
|
user.ldap_groups = claims.get("groups", [])
|
||||||
|
await user.save()
|
||||||
|
return user
|
||||||
|
|
||||||
|
# Create new user from OIDC claims
|
||||||
|
username = (
|
||||||
|
claims.get("preferred_username")
|
||||||
|
or claims.get("name")
|
||||||
|
or claims.get("email", "").split("@")[0]
|
||||||
|
or f"user_{oidc_subject[:8]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract LDAP groups from claims
|
||||||
|
groups = claims.get("groups", [])
|
||||||
|
|
||||||
|
user = await User.create(
|
||||||
|
id=uuid4(),
|
||||||
|
username=username,
|
||||||
|
email=email or f"{oidc_subject}@oidc.local", # Fallback if no email claim
|
||||||
|
oidc_subject=oidc_subject,
|
||||||
|
auth_provider="oidc",
|
||||||
|
password=None,
|
||||||
|
ldap_groups=groups,
|
||||||
|
)
|
||||||
|
|
||||||
|
return user
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def find_user_by_oidc_subject(oidc_subject: str) -> Optional[User]:
|
||||||
|
"""Find user by OIDC subject ID"""
|
||||||
|
return await User.filter(oidc_subject=oidc_subject).first()
|
||||||
0
config/__init__.py
Normal file
0
config/__init__.py
Normal file
114
config/oidc_config.py
Normal file
114
config/oidc_config.py
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
"""
|
||||||
|
OIDC Configuration for Authelia Integration
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from typing import Dict, Any
|
||||||
|
from authlib.jose import jwt
|
||||||
|
from authlib.jose.errors import JoseError
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
|
||||||
|
class OIDCConfig:
|
||||||
|
"""OIDC Configuration Manager"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
# Load from environment variables
|
||||||
|
self.issuer = os.getenv("OIDC_ISSUER") # e.g., https://auth.example.com
|
||||||
|
self.client_id = os.getenv("OIDC_CLIENT_ID")
|
||||||
|
self.client_secret = os.getenv("OIDC_CLIENT_SECRET")
|
||||||
|
self.redirect_uri = os.getenv(
|
||||||
|
"OIDC_REDIRECT_URI", "http://localhost:8080/api/user/oidc/callback"
|
||||||
|
)
|
||||||
|
|
||||||
|
# OIDC endpoints (can use discovery or manual config)
|
||||||
|
self.use_discovery = os.getenv("OIDC_USE_DISCOVERY", "true").lower() == "true"
|
||||||
|
|
||||||
|
# Manual endpoint configuration (fallback if discovery fails)
|
||||||
|
self.authorization_endpoint = os.getenv("OIDC_AUTHORIZATION_ENDPOINT")
|
||||||
|
self.token_endpoint = os.getenv("OIDC_TOKEN_ENDPOINT")
|
||||||
|
self.userinfo_endpoint = os.getenv("OIDC_USERINFO_ENDPOINT")
|
||||||
|
self.jwks_uri = os.getenv("OIDC_JWKS_URI")
|
||||||
|
|
||||||
|
# Cached discovery document and JWKS
|
||||||
|
self._discovery_doc: Dict[str, Any] | None = None
|
||||||
|
self._jwks: Dict[str, Any] | None = None
|
||||||
|
|
||||||
|
def validate_config(self) -> bool:
|
||||||
|
"""Validate that required configuration is present"""
|
||||||
|
if not self.issuer or not self.client_id or not self.client_secret:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
async def get_discovery_document(self) -> Dict[str, Any]:
|
||||||
|
"""Fetch OIDC discovery document from .well-known endpoint"""
|
||||||
|
if self._discovery_doc:
|
||||||
|
return self._discovery_doc
|
||||||
|
|
||||||
|
if not self.use_discovery:
|
||||||
|
# Return manual configuration
|
||||||
|
return {
|
||||||
|
"issuer": self.issuer,
|
||||||
|
"authorization_endpoint": self.authorization_endpoint,
|
||||||
|
"token_endpoint": self.token_endpoint,
|
||||||
|
"userinfo_endpoint": self.userinfo_endpoint,
|
||||||
|
"jwks_uri": self.jwks_uri,
|
||||||
|
}
|
||||||
|
|
||||||
|
discovery_url = f"{self.issuer.rstrip('/')}/.well-known/openid-configuration"
|
||||||
|
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
response = await client.get(discovery_url)
|
||||||
|
response.raise_for_status()
|
||||||
|
self._discovery_doc = response.json()
|
||||||
|
return self._discovery_doc
|
||||||
|
|
||||||
|
async def get_jwks(self) -> Dict[str, Any]:
|
||||||
|
"""Fetch JSON Web Key Set for token verification"""
|
||||||
|
if self._jwks:
|
||||||
|
return self._jwks
|
||||||
|
|
||||||
|
discovery = await self.get_discovery_document()
|
||||||
|
jwks_uri = discovery.get("jwks_uri")
|
||||||
|
|
||||||
|
if not jwks_uri:
|
||||||
|
raise ValueError("No jwks_uri found in discovery document")
|
||||||
|
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
response = await client.get(jwks_uri)
|
||||||
|
response.raise_for_status()
|
||||||
|
self._jwks = response.json()
|
||||||
|
return self._jwks
|
||||||
|
|
||||||
|
async def verify_id_token(self, id_token: str) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Verify and decode ID token from OIDC provider
|
||||||
|
|
||||||
|
Returns the decoded claims if valid
|
||||||
|
Raises exception if invalid
|
||||||
|
"""
|
||||||
|
jwks = await self.get_jwks()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Verify token signature and claims
|
||||||
|
claims = jwt.decode(
|
||||||
|
id_token,
|
||||||
|
jwks,
|
||||||
|
claims_options={
|
||||||
|
"iss": {"essential": True, "value": self.issuer},
|
||||||
|
"aud": {"essential": True, "value": self.client_id},
|
||||||
|
"exp": {"essential": True},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Additional validation
|
||||||
|
claims.validate()
|
||||||
|
|
||||||
|
return claims
|
||||||
|
|
||||||
|
except JoseError as e:
|
||||||
|
raise ValueError(f"Invalid ID token: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
# Global instance
|
||||||
|
oidc_config = OIDCConfig()
|
||||||
71
docker-compose.dev.yml
Normal file
71
docker-compose.dev.yml
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
services:
|
||||||
|
postgres:
|
||||||
|
image: postgres:16-alpine
|
||||||
|
environment:
|
||||||
|
- POSTGRES_USER=raggr
|
||||||
|
- POSTGRES_PASSWORD=raggr_dev_password
|
||||||
|
- POSTGRES_DB=raggr
|
||||||
|
ports:
|
||||||
|
- "5432:5432"
|
||||||
|
volumes:
|
||||||
|
- postgres_data:/var/lib/postgresql/data
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U raggr"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
# raggr service disabled - run locally for development
|
||||||
|
# raggr:
|
||||||
|
# build:
|
||||||
|
# context: .
|
||||||
|
# dockerfile: Dockerfile.dev
|
||||||
|
# image: torrtle/simbarag:dev
|
||||||
|
# ports:
|
||||||
|
# - "8080:8080"
|
||||||
|
# env_file:
|
||||||
|
# - .env
|
||||||
|
# environment:
|
||||||
|
# - PAPERLESS_TOKEN=${PAPERLESS_TOKEN}
|
||||||
|
# - BASE_URL=${BASE_URL}
|
||||||
|
# - OLLAMA_URL=${OLLAMA_URL:-http://localhost:11434}
|
||||||
|
# - CHROMADB_PATH=/app/data/chromadb
|
||||||
|
# - OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||||
|
# - JWT_SECRET_KEY=${JWT_SECRET_KEY}
|
||||||
|
# - OIDC_ISSUER=${OIDC_ISSUER}
|
||||||
|
# - OIDC_CLIENT_ID=${OIDC_CLIENT_ID}
|
||||||
|
# - OIDC_CLIENT_SECRET=${OIDC_CLIENT_SECRET}
|
||||||
|
# - OIDC_REDIRECT_URI=${OIDC_REDIRECT_URI}
|
||||||
|
# - OIDC_USE_DISCOVERY=${OIDC_USE_DISCOVERY:-true}
|
||||||
|
# - DATABASE_URL=postgres://raggr:raggr_dev_password@postgres:5432/raggr
|
||||||
|
# - FLASK_ENV=development
|
||||||
|
# - PYTHONUNBUFFERED=1
|
||||||
|
# - NODE_ENV=development
|
||||||
|
# - TAVILY_KEY=${TAVILIY_KEY}
|
||||||
|
# depends_on:
|
||||||
|
# postgres:
|
||||||
|
# condition: service_healthy
|
||||||
|
# volumes:
|
||||||
|
# - chromadb_data:/app/data/chromadb
|
||||||
|
# - ./migrations:/app/migrations # Bind mount for migrations (bidirectional)
|
||||||
|
# develop:
|
||||||
|
# watch:
|
||||||
|
# # Sync+restart on any file change in root directory
|
||||||
|
# - action: sync+restart
|
||||||
|
# path: .
|
||||||
|
# target: /app
|
||||||
|
# ignore:
|
||||||
|
# - __pycache__/
|
||||||
|
# - "*.pyc"
|
||||||
|
# - "*.pyo"
|
||||||
|
# - "*.pyd"
|
||||||
|
# - .git/
|
||||||
|
# - chromadb/
|
||||||
|
# - node_modules/
|
||||||
|
# - raggr-frontend/dist/
|
||||||
|
# - docs/
|
||||||
|
# - .venv/
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
chromadb_data:
|
||||||
|
postgres_data:
|
||||||
@@ -1,17 +1,51 @@
|
|||||||
version: "3.8"
|
version: "3.8"
|
||||||
|
|
||||||
services:
|
services:
|
||||||
|
postgres:
|
||||||
|
image: postgres:16-alpine
|
||||||
|
ports:
|
||||||
|
- "5432:5432"
|
||||||
|
environment:
|
||||||
|
- POSTGRES_USER=${POSTGRES_USER:-raggr}
|
||||||
|
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-changeme}
|
||||||
|
- POSTGRES_DB=${POSTGRES_DB:-raggr}
|
||||||
|
volumes:
|
||||||
|
- postgres_data:/var/lib/postgresql/data
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-raggr}"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
raggr:
|
raggr:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
image: torrtle/simbarag:latest
|
image: torrtle/simbarag:latest
|
||||||
network_mode: host
|
ports:
|
||||||
|
- "8080:8080"
|
||||||
environment:
|
environment:
|
||||||
- PAPERLESS_TOKEN=${PAPERLESS_TOKEN}
|
- PAPERLESS_TOKEN=${PAPERLESS_TOKEN}
|
||||||
- BASE_URL=${BASE_URL}
|
- BASE_URL=${BASE_URL}
|
||||||
- OLLAMA_URL=${OLLAMA_URL:-http://localhost:11434}
|
- OLLAMA_URL=${OLLAMA_URL:-http://localhost:11434}
|
||||||
- CHROMADB_PATH=/app/chromadb
|
- CHROMADB_PATH=/app/data/chromadb
|
||||||
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||||
|
- JWT_SECRET_KEY=${JWT_SECRET_KEY}
|
||||||
|
- OIDC_ISSUER=${OIDC_ISSUER}
|
||||||
|
- OIDC_CLIENT_ID=${OIDC_CLIENT_ID}
|
||||||
|
- OIDC_CLIENT_SECRET=${OIDC_CLIENT_SECRET}
|
||||||
|
- OIDC_REDIRECT_URI=${OIDC_REDIRECT_URI}
|
||||||
|
- OIDC_USE_DISCOVERY=${OIDC_USE_DISCOVERY:-true}
|
||||||
|
- DATABASE_URL=${DATABASE_URL:-postgres://raggr:changeme@postgres:5432/raggr}
|
||||||
|
- TAVILY_KEY=${TAVILIY_KEY}
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
volumes:
|
volumes:
|
||||||
- chromadb_data:/app/chromadb
|
- chromadb_data:/app/data/chromadb
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
chromadb_data:
|
chromadb_data:
|
||||||
|
postgres_data:
|
||||||
|
|||||||
53
docs/TASKS.md
Normal file
53
docs/TASKS.md
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
# Tasks & Feature Requests
|
||||||
|
|
||||||
|
## Feature Requests
|
||||||
|
|
||||||
|
### YNAB Integration (Admin-Only)
|
||||||
|
- **Description**: Integration with YNAB (You Need A Budget) API to enable financial data queries and insights
|
||||||
|
- **Requirements**:
|
||||||
|
- Admin-guarded endpoint (requires `lldap_admin` group)
|
||||||
|
- YNAB API token configuration in environment variables
|
||||||
|
- Sync budget data, transactions, and categories
|
||||||
|
- Store YNAB data for RAG queries
|
||||||
|
- **Endpoints**:
|
||||||
|
- `POST /api/admin/ynab/sync` - Trigger YNAB data sync
|
||||||
|
- `GET /api/admin/ynab/status` - Check sync status and last update
|
||||||
|
- `GET /api/admin/ynab/budgets` - List available budgets
|
||||||
|
- **Implementation Notes**:
|
||||||
|
- Use YNAB API v1 (https://api.youneedabudget.com/v1)
|
||||||
|
- Consider rate limiting (200 requests per hour)
|
||||||
|
- Store transaction data in PostgreSQL with appropriate indexing
|
||||||
|
- Index transaction descriptions and categories in ChromaDB for RAG queries
|
||||||
|
|
||||||
|
### Money Insights
|
||||||
|
- **Description**: AI-powered financial insights and analysis based on YNAB data
|
||||||
|
- **Features**:
|
||||||
|
- Spending pattern analysis
|
||||||
|
- Budget vs. actual comparisons
|
||||||
|
- Category-based spending trends
|
||||||
|
- Anomaly detection (unusual transactions)
|
||||||
|
- Natural language queries like "How much did I spend on groceries last month?"
|
||||||
|
- Month-over-month and year-over-year comparisons
|
||||||
|
- **Implementation Notes**:
|
||||||
|
- Leverage existing LangChain agent architecture
|
||||||
|
- Add custom tools for financial calculations
|
||||||
|
- Use LLM to generate insights and summaries
|
||||||
|
- Create visualizations or data exports for frontend display
|
||||||
|
|
||||||
|
## Backlog
|
||||||
|
|
||||||
|
- [ ] YNAB API client module
|
||||||
|
- [ ] YNAB data models (Budget, Transaction, Category, Account)
|
||||||
|
- [ ] Database schema for financial data
|
||||||
|
- [ ] YNAB sync background job/scheduler
|
||||||
|
- [ ] Financial insights LangChain tools
|
||||||
|
- [ ] Admin UI for YNAB configuration
|
||||||
|
- [ ] Frontend components for money insights display
|
||||||
|
|
||||||
|
## Technical Debt
|
||||||
|
|
||||||
|
_To be added_
|
||||||
|
|
||||||
|
## Bugs
|
||||||
|
|
||||||
|
_To be added_
|
||||||
97
docs/VECTORSTORE.md
Normal file
97
docs/VECTORSTORE.md
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
# Vector Store Management
|
||||||
|
|
||||||
|
This document describes how to manage the ChromaDB vector store used for RAG (Retrieval-Augmented Generation).
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
The vector store location is controlled by the `CHROMADB_PATH` environment variable:
|
||||||
|
|
||||||
|
- **Development (local)**: Set in `.env` to a local path (e.g., `/path/to/chromadb`)
|
||||||
|
- **Docker**: Automatically set to `/app/data/chromadb` and persisted via Docker volume
|
||||||
|
|
||||||
|
## Management Commands
|
||||||
|
|
||||||
|
### CLI (Command Line)
|
||||||
|
|
||||||
|
Use the `scripts/manage_vectorstore.py` script for vector store operations:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Show statistics
|
||||||
|
python scripts/manage_vectorstore.py stats
|
||||||
|
|
||||||
|
# Index documents from Paperless-NGX (incremental)
|
||||||
|
python scripts/manage_vectorstore.py index
|
||||||
|
|
||||||
|
# Clear and reindex all documents
|
||||||
|
python scripts/manage_vectorstore.py reindex
|
||||||
|
|
||||||
|
# List documents
|
||||||
|
python scripts/manage_vectorstore.py list 10
|
||||||
|
python scripts/manage_vectorstore.py list 20 --show-content
|
||||||
|
```
|
||||||
|
|
||||||
|
### Docker
|
||||||
|
|
||||||
|
Run commands inside the Docker container:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Show statistics
|
||||||
|
docker compose exec raggr python scripts/manage_vectorstore.py stats
|
||||||
|
|
||||||
|
# Reindex all documents
|
||||||
|
docker compose exec raggr python scripts/manage_vectorstore.py reindex
|
||||||
|
```
|
||||||
|
|
||||||
|
### API Endpoints
|
||||||
|
|
||||||
|
The following authenticated endpoints are available:
|
||||||
|
|
||||||
|
- `GET /api/rag/stats` - Get vector store statistics
|
||||||
|
- `POST /api/rag/index` - Trigger indexing of new documents
|
||||||
|
- `POST /api/rag/reindex` - Clear and reindex all documents
|
||||||
|
|
||||||
|
## How It Works
|
||||||
|
|
||||||
|
1. **Document Fetching**: Documents are fetched from Paperless-NGX via the API
|
||||||
|
2. **Chunking**: Documents are split into chunks of ~1000 characters with 200 character overlap
|
||||||
|
3. **Embedding**: Chunks are embedded using OpenAI's `text-embedding-3-large` model
|
||||||
|
4. **Storage**: Embeddings are stored in ChromaDB with metadata (filename, document type, date)
|
||||||
|
5. **Retrieval**: User queries are embedded and similar chunks are retrieved for RAG
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### "Error creating hnsw segment reader"
|
||||||
|
|
||||||
|
This indicates a corrupted index. Solution:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/manage_vectorstore.py reindex
|
||||||
|
```
|
||||||
|
|
||||||
|
### Empty results
|
||||||
|
|
||||||
|
Check if documents are indexed:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/manage_vectorstore.py stats
|
||||||
|
```
|
||||||
|
|
||||||
|
If count is 0, run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/manage_vectorstore.py index
|
||||||
|
```
|
||||||
|
|
||||||
|
### Different results in Docker vs local
|
||||||
|
|
||||||
|
Docker and local environments use separate ChromaDB instances. To sync:
|
||||||
|
|
||||||
|
1. Index inside Docker: `docker compose exec raggr python scripts/manage_vectorstore.py reindex`
|
||||||
|
2. Or mount the same volume for both environments
|
||||||
|
|
||||||
|
## Production Considerations
|
||||||
|
|
||||||
|
1. **Volume Persistence**: Use Docker volumes or persistent storage for ChromaDB
|
||||||
|
2. **Backup**: Regularly backup the ChromaDB data directory
|
||||||
|
3. **Reindexing**: Schedule periodic reindexing to keep data fresh
|
||||||
|
4. **Monitoring**: Monitor the `/api/rag/stats` endpoint for document counts
|
||||||
274
docs/authentication.md
Normal file
274
docs/authentication.md
Normal file
@@ -0,0 +1,274 @@
|
|||||||
|
# Authentication Architecture
|
||||||
|
|
||||||
|
This document describes the authentication stack for SimbaRAG: LLDAP → Authelia → OAuth2/OIDC.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────┐ ┌──────────┐ ┌──────────────┐ ┌──────────┐
|
||||||
|
│ LLDAP │────▶│ Authelia │────▶│ OAuth2/OIDC │────▶│ SimbaRAG │
|
||||||
|
│ (Users) │ │ (IdP) │ │ (Flow) │ │ (App) │
|
||||||
|
└─────────┘ └──────────┘ └──────────────┘ └──────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
| Component | Role |
|
||||||
|
|-----------|------|
|
||||||
|
| **LLDAP** | Lightweight LDAP server storing users and groups |
|
||||||
|
| **Authelia** | Identity provider that authenticates against LLDAP and issues OIDC tokens |
|
||||||
|
| **SimbaRAG** | Relying party that consumes OIDC tokens and manages sessions |
|
||||||
|
|
||||||
|
## OIDC Configuration
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
| Variable | Description | Default |
|
||||||
|
|----------|-------------|---------|
|
||||||
|
| `OIDC_ISSUER` | Authelia server URL | Required |
|
||||||
|
| `OIDC_CLIENT_ID` | Client ID registered in Authelia | Required |
|
||||||
|
| `OIDC_CLIENT_SECRET` | Client secret for token exchange | Required |
|
||||||
|
| `OIDC_REDIRECT_URI` | Callback URL after authentication | Required |
|
||||||
|
| `OIDC_USE_DISCOVERY` | Enable automatic discovery | `true` |
|
||||||
|
| `JWT_SECRET_KEY` | Secret for signing backend JWTs | Required |
|
||||||
|
|
||||||
|
### Discovery
|
||||||
|
|
||||||
|
When `OIDC_USE_DISCOVERY=true`, the application fetches endpoints from:
|
||||||
|
|
||||||
|
```
|
||||||
|
{OIDC_ISSUER}/.well-known/openid-configuration
|
||||||
|
```
|
||||||
|
|
||||||
|
This provides:
|
||||||
|
|
||||||
|
- Authorization endpoint
|
||||||
|
- Token endpoint
|
||||||
|
- JWKS URI for signature verification
|
||||||
|
- Supported scopes and claims
|
||||||
|
|
||||||
|
## Authentication Flow
|
||||||
|
|
||||||
|
### 1. Login Initiation
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/user/oidc/login
|
||||||
|
```
|
||||||
|
|
||||||
|
1. Generate PKCE code verifier and challenge (S256)
|
||||||
|
2. Generate CSRF state token
|
||||||
|
3. Store state in session storage
|
||||||
|
4. Return authorization URL for frontend redirect
|
||||||
|
|
||||||
|
### 2. Authorization
|
||||||
|
|
||||||
|
User is redirected to Authelia where they:
|
||||||
|
|
||||||
|
1. Enter LDAP credentials
|
||||||
|
2. Complete MFA if configured
|
||||||
|
3. Consent to requested scopes
|
||||||
|
|
||||||
|
### 3. Callback
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/user/oidc/callback?code=...&state=...
|
||||||
|
```
|
||||||
|
|
||||||
|
1. Validate state matches stored value (CSRF protection)
|
||||||
|
2. Exchange authorization code for tokens using PKCE verifier
|
||||||
|
3. Verify ID token signature using JWKS
|
||||||
|
4. Validate claims (issuer, audience, expiration)
|
||||||
|
5. Create or update user in database
|
||||||
|
6. Issue backend JWT tokens (access + refresh)
|
||||||
|
|
||||||
|
### 4. Token Refresh
|
||||||
|
|
||||||
|
```
|
||||||
|
POST /api/user/refresh
|
||||||
|
Authorization: Bearer <refresh_token>
|
||||||
|
```
|
||||||
|
|
||||||
|
Issues a new access token without re-authentication.
|
||||||
|
|
||||||
|
## User Model
|
||||||
|
|
||||||
|
```python
|
||||||
|
class User(Model):
|
||||||
|
id = UUIDField(primary_key=True)
|
||||||
|
username = CharField(max_length=255)
|
||||||
|
password = BinaryField(null=True) # Nullable for OIDC-only users
|
||||||
|
email = CharField(max_length=100, unique=True)
|
||||||
|
|
||||||
|
# OIDC fields
|
||||||
|
oidc_subject = CharField(max_length=255, unique=True, null=True)
|
||||||
|
auth_provider = CharField(max_length=50, default="local") # "local" or "oidc"
|
||||||
|
ldap_groups = JSONField(default=[]) # LDAP groups from OIDC claims
|
||||||
|
|
||||||
|
created_at = DatetimeField(auto_now_add=True)
|
||||||
|
updated_at = DatetimeField(auto_now=True)
|
||||||
|
|
||||||
|
def has_group(self, group: str) -> bool:
|
||||||
|
"""Check if user belongs to a specific LDAP group."""
|
||||||
|
return group in (self.ldap_groups or [])
|
||||||
|
|
||||||
|
def is_admin(self) -> bool:
|
||||||
|
"""Check if user is an admin (member of lldap_admin group)."""
|
||||||
|
return self.has_group("lldap_admin")
|
||||||
|
```
|
||||||
|
|
||||||
|
### User Provisioning
|
||||||
|
|
||||||
|
The `OIDCUserService` handles automatic user creation:
|
||||||
|
|
||||||
|
1. Extract claims from ID token (`sub`, `email`, `preferred_username`)
|
||||||
|
2. Check if user exists by `oidc_subject`
|
||||||
|
3. If not, check by email for migration from local auth
|
||||||
|
4. Create new user or update existing
|
||||||
|
|
||||||
|
## JWT Tokens
|
||||||
|
|
||||||
|
Backend issues its own JWTs after OIDC authentication:
|
||||||
|
|
||||||
|
| Token Type | Purpose | Typical Lifetime |
|
||||||
|
|------------|---------|------------------|
|
||||||
|
| Access Token | API authorization | 15 minutes |
|
||||||
|
| Refresh Token | Obtain new access tokens | 7 days |
|
||||||
|
|
||||||
|
### Claims
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"identity": "<user-uuid>",
|
||||||
|
"type": "access|refresh",
|
||||||
|
"exp": 1234567890,
|
||||||
|
"iat": 1234567890
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Protected Endpoints
|
||||||
|
|
||||||
|
All API endpoints use the `@jwt_refresh_token_required` decorator for basic authentication:
|
||||||
|
|
||||||
|
```python
|
||||||
|
@blueprint.route("/example")
|
||||||
|
@jwt_refresh_token_required
|
||||||
|
async def protected_endpoint():
|
||||||
|
user_id = get_jwt_identity()
|
||||||
|
# ...
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Role-Based Access Control (RBAC)
|
||||||
|
|
||||||
|
RBAC is implemented using LDAP groups passed through Authelia as OIDC claims. Users in the `lldap_admin` group have admin privileges.
|
||||||
|
|
||||||
|
### Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────┐
|
||||||
|
│ LLDAP │
|
||||||
|
│ Groups: lldap_admin, lldap_user │
|
||||||
|
└─────────────────────────────────────────────────────────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────────────────────────────────────────────┐
|
||||||
|
│ Authelia │
|
||||||
|
│ Scope: groups → Claim: groups = ["lldap_admin"] │
|
||||||
|
└─────────────────────────────────────────────────────────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────────────────────────────────────────────┐
|
||||||
|
│ SimbaRAG │
|
||||||
|
│ 1. Extract groups from ID token │
|
||||||
|
│ 2. Store in User.ldap_groups │
|
||||||
|
│ 3. Check membership with @admin_required decorator │
|
||||||
|
└─────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Authelia Configuration
|
||||||
|
|
||||||
|
Ensure Authelia is configured to pass the `groups` claim:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
identity_providers:
|
||||||
|
oidc:
|
||||||
|
clients:
|
||||||
|
- client_id: simbarag
|
||||||
|
scopes:
|
||||||
|
- openid
|
||||||
|
- profile
|
||||||
|
- email
|
||||||
|
- groups # Required for RBAC
|
||||||
|
```
|
||||||
|
|
||||||
|
### Admin-Only Endpoints
|
||||||
|
|
||||||
|
The `@admin_required` decorator protects privileged endpoints:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from blueprints.users.decorators import admin_required
|
||||||
|
|
||||||
|
@blueprint.post("/admin-action")
|
||||||
|
@admin_required
|
||||||
|
async def admin_only_endpoint():
|
||||||
|
# Only users in lldap_admin group can access
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
**Protected endpoints:**
|
||||||
|
|
||||||
|
| Endpoint | Access | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `POST /api/rag/index` | Admin | Trigger document indexing |
|
||||||
|
| `POST /api/rag/reindex` | Admin | Clear and reindex all documents |
|
||||||
|
| `GET /api/rag/stats` | All users | View vector store statistics |
|
||||||
|
|
||||||
|
### User Response
|
||||||
|
|
||||||
|
The OIDC callback returns group information:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"access_token": "...",
|
||||||
|
"refresh_token": "...",
|
||||||
|
"user": {
|
||||||
|
"id": "uuid",
|
||||||
|
"username": "john",
|
||||||
|
"email": "john@example.com",
|
||||||
|
"groups": ["lldap_admin", "lldap_user"],
|
||||||
|
"is_admin": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Security Considerations
|
||||||
|
|
||||||
|
### Current Gaps
|
||||||
|
|
||||||
|
| Issue | Risk | Mitigation |
|
||||||
|
|-------|------|------------|
|
||||||
|
| In-memory session storage | State lost on restart, not scalable | Use Redis for production |
|
||||||
|
| No token revocation | Tokens valid until expiry | Implement blacklist or short expiry |
|
||||||
|
| No audit logging | Cannot track auth events | Add event logging |
|
||||||
|
| Single JWT secret | Compromise affects all tokens | Rotate secrets, use asymmetric keys |
|
||||||
|
|
||||||
|
### Recommendations
|
||||||
|
|
||||||
|
1. **Use Redis** for OIDC state storage in production
|
||||||
|
2. **Implement logout** with token blacklisting
|
||||||
|
3. **Add audit logging** for authentication events
|
||||||
|
4. **Rotate JWT secrets** regularly
|
||||||
|
5. **Use short-lived access tokens** (15 min) with refresh
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## File Reference
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `services/raggr/oidc_config.py` | OIDC client configuration and discovery |
|
||||||
|
| `services/raggr/blueprints/users/models.py` | User model definition with group helpers |
|
||||||
|
| `services/raggr/blueprints/users/oidc_service.py` | User provisioning from OIDC claims |
|
||||||
|
| `services/raggr/blueprints/users/__init__.py` | Auth endpoints and flow |
|
||||||
|
| `services/raggr/blueprints/users/decorators.py` | Auth decorators (`@admin_required`) |
|
||||||
188
docs/deployment.md
Normal file
188
docs/deployment.md
Normal file
@@ -0,0 +1,188 @@
|
|||||||
|
# Deployment & Migrations Guide
|
||||||
|
|
||||||
|
This document covers database migrations and deployment workflows for SimbaRAG.
|
||||||
|
|
||||||
|
## Migration Workflow
|
||||||
|
|
||||||
|
Migrations are managed by [Aerich](https://github.com/tortoise/aerich), the migration tool for Tortoise ORM.
|
||||||
|
|
||||||
|
### Key Principles
|
||||||
|
|
||||||
|
1. **Generate migrations in Docker** - Aerich needs database access to detect schema changes
|
||||||
|
2. **Migrations auto-apply on startup** - Both `startup.sh` and `startup-dev.sh` run `aerich upgrade`
|
||||||
|
3. **Commit migrations to git** - Migration files must be in the repo for production deploys
|
||||||
|
|
||||||
|
### Generating a New Migration
|
||||||
|
|
||||||
|
#### Development (Recommended)
|
||||||
|
|
||||||
|
With `docker-compose.dev.yml`, your local `services/raggr` directory is synced to the container. Migrations generated inside the container appear on your host automatically.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Start the dev environment
|
||||||
|
docker compose -f docker-compose.dev.yml up -d
|
||||||
|
|
||||||
|
# 2. Generate migration (runs inside container, syncs to host)
|
||||||
|
docker compose -f docker-compose.dev.yml exec raggr aerich migrate --name describe_your_change
|
||||||
|
|
||||||
|
# 3. Verify migration was created
|
||||||
|
ls services/raggr/migrations/models/
|
||||||
|
|
||||||
|
# 4. Commit the migration
|
||||||
|
git add services/raggr/migrations/
|
||||||
|
git commit -m "Add migration: describe_your_change"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Production Container
|
||||||
|
|
||||||
|
For production, migration files are baked into the image. You must generate migrations in dev first.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# If you need to generate a migration from production (not recommended):
|
||||||
|
docker compose exec raggr aerich migrate --name describe_your_change
|
||||||
|
|
||||||
|
# Copy the file out of the container
|
||||||
|
docker cp $(docker compose ps -q raggr):/app/migrations/models/ ./services/raggr/migrations/
|
||||||
|
```
|
||||||
|
|
||||||
|
### Applying Migrations
|
||||||
|
|
||||||
|
Migrations apply automatically on container start via the startup scripts.
|
||||||
|
|
||||||
|
**Manual application (if needed):**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Dev
|
||||||
|
docker compose -f docker-compose.dev.yml exec raggr aerich upgrade
|
||||||
|
|
||||||
|
# Production
|
||||||
|
docker compose exec raggr aerich upgrade
|
||||||
|
```
|
||||||
|
|
||||||
|
### Checking Migration Status
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# View applied migrations
|
||||||
|
docker compose exec raggr aerich history
|
||||||
|
|
||||||
|
# View pending migrations
|
||||||
|
docker compose exec raggr aerich heads
|
||||||
|
```
|
||||||
|
|
||||||
|
### Rolling Back
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Downgrade one migration
|
||||||
|
docker compose exec raggr aerich downgrade
|
||||||
|
|
||||||
|
# Downgrade to specific version
|
||||||
|
docker compose exec raggr aerich downgrade -v 1
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deployment Workflows
|
||||||
|
|
||||||
|
### Development
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start with watch mode (auto-restarts on file changes)
|
||||||
|
docker compose -f docker-compose.dev.yml up
|
||||||
|
|
||||||
|
# Or with docker compose watch (requires Docker Compose v2.22+)
|
||||||
|
docker compose -f docker-compose.dev.yml watch
|
||||||
|
```
|
||||||
|
|
||||||
|
The dev environment:
|
||||||
|
- Syncs `services/raggr/` to `/app` in the container
|
||||||
|
- Rebuilds frontend on changes
|
||||||
|
- Auto-applies migrations on startup
|
||||||
|
|
||||||
|
### Production
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build and deploy
|
||||||
|
docker compose build raggr
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
# View logs
|
||||||
|
docker compose logs -f raggr
|
||||||
|
|
||||||
|
# Verify migrations applied
|
||||||
|
docker compose exec raggr aerich history
|
||||||
|
```
|
||||||
|
|
||||||
|
### Fresh Deploy (New Database)
|
||||||
|
|
||||||
|
On first deploy with an empty database, `startup-dev.sh` runs `aerich init-db` instead of `aerich upgrade`. This creates all tables from the current models.
|
||||||
|
|
||||||
|
For production (`startup.sh`), ensure the database exists and run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# If aerich table doesn't exist yet
|
||||||
|
docker compose exec raggr aerich init-db
|
||||||
|
|
||||||
|
# Or if migrating from existing schema
|
||||||
|
docker compose exec raggr aerich upgrade
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### "No migrations found" on startup
|
||||||
|
|
||||||
|
The `migrations/models/` directory is empty or not copied into the image.
|
||||||
|
|
||||||
|
**Fix:** Ensure migrations are committed and the Dockerfile copies them:
|
||||||
|
```dockerfile
|
||||||
|
COPY migrations ./migrations
|
||||||
|
```
|
||||||
|
|
||||||
|
### Migration fails with "relation already exists"
|
||||||
|
|
||||||
|
The database has tables but aerich doesn't know about them (fresh aerich setup on existing DB).
|
||||||
|
|
||||||
|
**Fix:** Fake the initial migration:
|
||||||
|
```bash
|
||||||
|
# Mark initial migration as applied without running it
|
||||||
|
docker compose exec raggr aerich upgrade --fake
|
||||||
|
```
|
||||||
|
|
||||||
|
### Model changes not detected
|
||||||
|
|
||||||
|
Aerich compares models against the last migration's state. If state is out of sync:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Regenerate migration state (dangerous - review carefully)
|
||||||
|
docker compose exec raggr aerich migrate --name fix_state
|
||||||
|
```
|
||||||
|
|
||||||
|
### Database connection errors
|
||||||
|
|
||||||
|
Ensure PostgreSQL is healthy before running migrations:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check postgres status
|
||||||
|
docker compose ps postgres
|
||||||
|
|
||||||
|
# Wait for postgres then run migrations
|
||||||
|
docker compose exec raggr bash -c "sleep 5 && aerich upgrade"
|
||||||
|
```
|
||||||
|
|
||||||
|
## File Reference
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `pyproject.toml` | Aerich config (`[tool.aerich]` section) |
|
||||||
|
| `migrations/models/` | Migration files |
|
||||||
|
| `startup.sh` | Production startup (runs `aerich upgrade`) |
|
||||||
|
| `startup-dev.sh` | Dev startup (runs `aerich upgrade` or `init-db`) |
|
||||||
|
| `app.py` | Contains `TORTOISE_CONFIG` |
|
||||||
|
| `aerich_config.py` | Aerich initialization configuration |
|
||||||
|
|
||||||
|
## Quick Reference
|
||||||
|
|
||||||
|
| Task | Command |
|
||||||
|
|------|---------|
|
||||||
|
| Generate migration | `docker compose -f docker-compose.dev.yml exec raggr aerich migrate --name name` |
|
||||||
|
| Apply migrations | `docker compose exec raggr aerich upgrade` |
|
||||||
|
| View history | `docker compose exec raggr aerich history` |
|
||||||
|
| Rollback | `docker compose exec raggr aerich downgrade` |
|
||||||
|
| Fresh init | `docker compose exec raggr aerich init-db` |
|
||||||
258
docs/development.md
Normal file
258
docs/development.md
Normal file
@@ -0,0 +1,258 @@
|
|||||||
|
# Development Guide
|
||||||
|
|
||||||
|
This guide explains how to run SimbaRAG in development mode.
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
### Option 1: Local Development (Recommended)
|
||||||
|
|
||||||
|
Run PostgreSQL in Docker and the application locally for faster iteration:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Start PostgreSQL
|
||||||
|
docker compose -f docker-compose.dev.yml up -d
|
||||||
|
|
||||||
|
# 2. Set environment variables
|
||||||
|
export DATABASE_URL="postgres://raggr:raggr_dev_password@localhost:5432/raggr"
|
||||||
|
export CHROMADB_PATH="./chromadb"
|
||||||
|
export $(grep -v '^#' .env | xargs) # Load other vars from .env
|
||||||
|
|
||||||
|
# 3. Install dependencies (first time)
|
||||||
|
pip install -r requirements.txt
|
||||||
|
cd raggr-frontend && yarn install && yarn build && cd ..
|
||||||
|
|
||||||
|
# 4. Run migrations
|
||||||
|
aerich upgrade
|
||||||
|
|
||||||
|
# 5. Start the server
|
||||||
|
python app.py
|
||||||
|
```
|
||||||
|
|
||||||
|
The application will be available at `http://localhost:8080`.
|
||||||
|
|
||||||
|
### Option 2: Full Docker Development
|
||||||
|
|
||||||
|
Run everything in Docker with hot reload (slower, but matches production):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Uncomment the raggr service in docker-compose.dev.yml first!
|
||||||
|
|
||||||
|
# Start all services
|
||||||
|
docker compose -f docker-compose.dev.yml up --build
|
||||||
|
|
||||||
|
# View logs
|
||||||
|
docker compose -f docker-compose.dev.yml logs -f raggr
|
||||||
|
```
|
||||||
|
|
||||||
|
## Project Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
raggr/
|
||||||
|
├── app.py # Quart application entry point
|
||||||
|
├── main.py # RAG logic and LangChain agent
|
||||||
|
├── llm.py # LLM client (Ollama + OpenAI fallback)
|
||||||
|
├── aerich_config.py # Database migration configuration
|
||||||
|
│
|
||||||
|
├── blueprints/ # API route blueprints
|
||||||
|
│ ├── users/ # Authentication (OIDC, JWT, RBAC)
|
||||||
|
│ ├── conversation/ # Chat conversations and messages
|
||||||
|
│ └── rag/ # Document indexing (admin only)
|
||||||
|
│
|
||||||
|
├── config/ # Configuration modules
|
||||||
|
│ └── oidc_config.py # OIDC authentication settings
|
||||||
|
│
|
||||||
|
├── utils/ # Reusable utilities
|
||||||
|
│ ├── chunker.py # Document chunking for embeddings
|
||||||
|
│ ├── cleaner.py # PDF cleaning and summarization
|
||||||
|
│ ├── image_process.py # Image description with LLM
|
||||||
|
│ └── request.py # Paperless-NGX API client
|
||||||
|
│
|
||||||
|
├── scripts/ # Administrative scripts
|
||||||
|
│ ├── add_user.py # Create users manually
|
||||||
|
│ ├── user_message_stats.py # User message statistics
|
||||||
|
│ ├── manage_vectorstore.py # Vector store management
|
||||||
|
│ ├── inspect_vector_store.py # Inspect ChromaDB contents
|
||||||
|
│ └── query.py # Query generation utilities
|
||||||
|
│
|
||||||
|
├── raggr-frontend/ # React frontend
|
||||||
|
│ └── src/ # Frontend source code
|
||||||
|
│
|
||||||
|
├── migrations/ # Database migrations
|
||||||
|
└── docs/ # Documentation
|
||||||
|
```
|
||||||
|
|
||||||
|
## Making Changes
|
||||||
|
|
||||||
|
### Backend Changes
|
||||||
|
|
||||||
|
**Local development:**
|
||||||
|
1. Edit Python files
|
||||||
|
2. Save
|
||||||
|
3. Restart `python app.py` (or use a tool like `watchdog` for auto-reload)
|
||||||
|
|
||||||
|
**Docker development:**
|
||||||
|
1. Edit Python files
|
||||||
|
2. Files are synced via Docker watch mode
|
||||||
|
3. Container automatically restarts
|
||||||
|
|
||||||
|
### Frontend Changes
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd raggr-frontend
|
||||||
|
|
||||||
|
# Development mode with hot reload
|
||||||
|
yarn dev
|
||||||
|
|
||||||
|
# Production build (for testing)
|
||||||
|
yarn build
|
||||||
|
```
|
||||||
|
|
||||||
|
The backend serves built files from `raggr-frontend/dist/`.
|
||||||
|
|
||||||
|
### Database Model Changes
|
||||||
|
|
||||||
|
When you modify Tortoise ORM models:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Generate migration
|
||||||
|
aerich migrate --name "describe_your_change"
|
||||||
|
|
||||||
|
# Apply migration
|
||||||
|
aerich upgrade
|
||||||
|
|
||||||
|
# View history
|
||||||
|
aerich history
|
||||||
|
```
|
||||||
|
|
||||||
|
See [deployment.md](deployment.md) for detailed migration workflows.
|
||||||
|
|
||||||
|
### Adding Dependencies
|
||||||
|
|
||||||
|
**Backend:**
|
||||||
|
```bash
|
||||||
|
# Add to requirements.txt or use uv
|
||||||
|
pip install package-name
|
||||||
|
pip freeze > requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
**Frontend:**
|
||||||
|
```bash
|
||||||
|
cd raggr-frontend
|
||||||
|
yarn add package-name
|
||||||
|
```
|
||||||
|
|
||||||
|
## Useful Commands
|
||||||
|
|
||||||
|
### Database
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Connect to PostgreSQL
|
||||||
|
docker compose -f docker-compose.dev.yml exec postgres psql -U raggr -d raggr
|
||||||
|
|
||||||
|
# Reset database
|
||||||
|
docker compose -f docker-compose.dev.yml down -v
|
||||||
|
docker compose -f docker-compose.dev.yml up -d
|
||||||
|
aerich init-db
|
||||||
|
```
|
||||||
|
|
||||||
|
### Vector Store
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Show statistics
|
||||||
|
python scripts/manage_vectorstore.py stats
|
||||||
|
|
||||||
|
# Index new documents from Paperless
|
||||||
|
python scripts/manage_vectorstore.py index
|
||||||
|
|
||||||
|
# Clear and reindex everything
|
||||||
|
python scripts/manage_vectorstore.py reindex
|
||||||
|
```
|
||||||
|
|
||||||
|
See [vectorstore.md](vectorstore.md) for details.
|
||||||
|
|
||||||
|
### Scripts
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Add a new user
|
||||||
|
python scripts/add_user.py
|
||||||
|
|
||||||
|
# View message statistics
|
||||||
|
python scripts/user_message_stats.py
|
||||||
|
|
||||||
|
# Inspect vector store contents
|
||||||
|
python scripts/inspect_vector_store.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
Copy `.env.example` to `.env` and configure:
|
||||||
|
|
||||||
|
| Variable | Description | Example |
|
||||||
|
|----------|-------------|---------|
|
||||||
|
| `DATABASE_URL` | PostgreSQL connection | `postgres://user:pass@localhost:5432/db` |
|
||||||
|
| `CHROMADB_PATH` | ChromaDB storage path | `./chromadb` |
|
||||||
|
| `OLLAMA_URL` | Ollama server URL | `http://localhost:11434` |
|
||||||
|
| `OPENAI_API_KEY` | OpenAI API key (fallback LLM) | `sk-...` |
|
||||||
|
| `PAPERLESS_TOKEN` | Paperless-NGX API token | `...` |
|
||||||
|
| `BASE_URL` | Paperless-NGX URL | `https://paperless.example.com` |
|
||||||
|
| `OIDC_ISSUER` | OIDC provider URL | `https://auth.example.com` |
|
||||||
|
| `OIDC_CLIENT_ID` | OIDC client ID | `simbarag` |
|
||||||
|
| `OIDC_CLIENT_SECRET` | OIDC client secret | `...` |
|
||||||
|
| `JWT_SECRET_KEY` | JWT signing key | `random-secret` |
|
||||||
|
| `TAVILY_KEY` | Tavily web search API key | `tvly-...` |
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Port Already in Use
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Find and kill process on port 8080
|
||||||
|
lsof -ti:8080 | xargs kill -9
|
||||||
|
|
||||||
|
# Or change the port in app.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Database Connection Errors
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check if PostgreSQL is running
|
||||||
|
docker compose -f docker-compose.dev.yml ps postgres
|
||||||
|
|
||||||
|
# View PostgreSQL logs
|
||||||
|
docker compose -f docker-compose.dev.yml logs postgres
|
||||||
|
```
|
||||||
|
|
||||||
|
### Frontend Not Building
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd raggr-frontend
|
||||||
|
rm -rf node_modules dist
|
||||||
|
yarn install
|
||||||
|
yarn build
|
||||||
|
```
|
||||||
|
|
||||||
|
### ChromaDB Errors
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Clear and recreate ChromaDB
|
||||||
|
rm -rf chromadb/
|
||||||
|
python scripts/manage_vectorstore.py reindex
|
||||||
|
```
|
||||||
|
|
||||||
|
### Import Errors After Reorganization
|
||||||
|
|
||||||
|
Ensure you're in the project root directory when running scripts, or use:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Add project root to Python path
|
||||||
|
export PYTHONPATH="${PYTHONPATH}:$(pwd)"
|
||||||
|
python scripts/your_script.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## Hot Tips
|
||||||
|
|
||||||
|
- Use `python -m pdb app.py` for debugging
|
||||||
|
- Enable Quart debug mode in `app.py`: `app.run(debug=True)`
|
||||||
|
- Check API logs: They appear in the terminal running `python app.py`
|
||||||
|
- Frontend logs: Open browser DevTools console
|
||||||
|
- Use `docker compose -f docker-compose.dev.yml down -v` for a clean slate
|
||||||
203
docs/index.md
Normal file
203
docs/index.md
Normal file
@@ -0,0 +1,203 @@
|
|||||||
|
# SimbaRAG Documentation
|
||||||
|
|
||||||
|
Welcome to the SimbaRAG documentation! This guide will help you understand, develop, and deploy the SimbaRAG conversational AI system.
|
||||||
|
|
||||||
|
## Getting Started
|
||||||
|
|
||||||
|
New to SimbaRAG? Start here:
|
||||||
|
|
||||||
|
1. Read the main [README](../README.md) for project overview and architecture
|
||||||
|
2. Follow the [Development Guide](development.md) to set up your environment
|
||||||
|
3. Learn about [Authentication](authentication.md) setup with OIDC and LDAP
|
||||||
|
|
||||||
|
## Documentation Structure
|
||||||
|
|
||||||
|
### Core Guides
|
||||||
|
|
||||||
|
- **[Development Guide](development.md)** - Local development setup, project structure, and workflows
|
||||||
|
- **[Deployment Guide](deployment.md)** - Database migrations, deployment workflows, and troubleshooting
|
||||||
|
- **[Vector Store Guide](VECTORSTORE.md)** - Managing ChromaDB, indexing documents, and RAG operations
|
||||||
|
- **[Migrations Guide](MIGRATIONS.md)** - Database migration reference
|
||||||
|
- **[Authentication Guide](authentication.md)** - OIDC, Authelia, LLDAP configuration and user management
|
||||||
|
|
||||||
|
### Quick Reference
|
||||||
|
|
||||||
|
| Task | Documentation |
|
||||||
|
|------|---------------|
|
||||||
|
| Set up local dev environment | [Development Guide → Quick Start](development.md#quick-start) |
|
||||||
|
| Run database migrations | [Deployment Guide → Migration Workflow](deployment.md#migration-workflow) |
|
||||||
|
| Index documents | [Vector Store Guide → Management Commands](VECTORSTORE.md#management-commands) |
|
||||||
|
| Configure authentication | [Authentication Guide](authentication.md) |
|
||||||
|
| Run administrative scripts | [Development Guide → Scripts](development.md#scripts) |
|
||||||
|
|
||||||
|
## Common Tasks
|
||||||
|
|
||||||
|
### Development
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start local development
|
||||||
|
docker compose -f docker-compose.dev.yml up -d
|
||||||
|
export DATABASE_URL="postgres://raggr:raggr_dev_password@localhost:5432/raggr"
|
||||||
|
export CHROMADB_PATH="./chromadb"
|
||||||
|
python app.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Database Migrations
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Generate migration
|
||||||
|
aerich migrate --name "your_change"
|
||||||
|
|
||||||
|
# Apply migrations
|
||||||
|
aerich upgrade
|
||||||
|
|
||||||
|
# View history
|
||||||
|
aerich history
|
||||||
|
```
|
||||||
|
|
||||||
|
### Vector Store Management
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Show statistics
|
||||||
|
python scripts/manage_vectorstore.py stats
|
||||||
|
|
||||||
|
# Index new documents
|
||||||
|
python scripts/manage_vectorstore.py index
|
||||||
|
|
||||||
|
# Reindex everything
|
||||||
|
python scripts/manage_vectorstore.py reindex
|
||||||
|
```
|
||||||
|
|
||||||
|
## Architecture Overview
|
||||||
|
|
||||||
|
SimbaRAG is built with:
|
||||||
|
|
||||||
|
- **Backend**: Quart (async Python), LangChain, Tortoise ORM
|
||||||
|
- **Frontend**: React 19, Rsbuild, Tailwind CSS
|
||||||
|
- **Database**: PostgreSQL (users, conversations)
|
||||||
|
- **Vector Store**: ChromaDB (document embeddings)
|
||||||
|
- **LLM**: Ollama (primary), OpenAI (fallback)
|
||||||
|
- **Auth**: Authelia (OIDC), LLDAP (user directory)
|
||||||
|
|
||||||
|
See the [README](../README.md#system-architecture) for detailed architecture diagram.
|
||||||
|
|
||||||
|
## Project Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
simbarag/
|
||||||
|
├── app.py # Quart app entry point
|
||||||
|
├── main.py # RAG & LangChain agent
|
||||||
|
├── llm.py # LLM client
|
||||||
|
├── blueprints/ # API routes
|
||||||
|
├── config/ # Configuration
|
||||||
|
├── utils/ # Utilities
|
||||||
|
├── scripts/ # Admin scripts
|
||||||
|
├── raggr-frontend/ # React UI
|
||||||
|
├── migrations/ # Database migrations
|
||||||
|
├── docs/ # This documentation
|
||||||
|
├── docker-compose.yml # Production Docker setup
|
||||||
|
└── docker-compose.dev.yml # Development Docker setup
|
||||||
|
```
|
||||||
|
|
||||||
|
## Key Concepts
|
||||||
|
|
||||||
|
### RAG (Retrieval-Augmented Generation)
|
||||||
|
|
||||||
|
SimbaRAG uses RAG to answer questions about Simba:
|
||||||
|
|
||||||
|
1. Documents are fetched from Paperless-NGX
|
||||||
|
2. Documents are chunked and embedded using OpenAI
|
||||||
|
3. Embeddings are stored in ChromaDB
|
||||||
|
4. User queries are embedded and matched against the store
|
||||||
|
5. Relevant chunks are passed to the LLM for context
|
||||||
|
6. LLM generates an answer using retrieved context
|
||||||
|
|
||||||
|
### LangChain Agent
|
||||||
|
|
||||||
|
The conversational agent has two tools:
|
||||||
|
|
||||||
|
- **simba_search**: Queries the vector store for Simba's documents
|
||||||
|
- **web_search**: Searches the web via Tavily API
|
||||||
|
|
||||||
|
The agent automatically selects tools based on the query.
|
||||||
|
|
||||||
|
### Authentication Flow
|
||||||
|
|
||||||
|
1. User initiates OIDC login via Authelia
|
||||||
|
2. Authelia authenticates against LLDAP
|
||||||
|
3. Backend receives OIDC tokens and issues JWT
|
||||||
|
4. Frontend stores JWT in localStorage
|
||||||
|
5. Subsequent requests use JWT for authorization
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
Key environment variables (see `.env.example` for complete list):
|
||||||
|
|
||||||
|
| Variable | Purpose |
|
||||||
|
|----------|---------|
|
||||||
|
| `DATABASE_URL` | PostgreSQL connection |
|
||||||
|
| `CHROMADB_PATH` | Vector store location |
|
||||||
|
| `OLLAMA_URL` | Local LLM server |
|
||||||
|
| `OPENAI_API_KEY` | OpenAI for embeddings/fallback |
|
||||||
|
| `PAPERLESS_TOKEN` | Document source API |
|
||||||
|
| `OIDC_*` | Authentication configuration |
|
||||||
|
| `TAVILY_KEY` | Web search API |
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### Authentication
|
||||||
|
- `GET /api/user/oidc/login` - Start OIDC flow
|
||||||
|
- `GET /api/user/oidc/callback` - OIDC callback
|
||||||
|
- `POST /api/user/refresh` - Refresh JWT
|
||||||
|
|
||||||
|
### Conversations
|
||||||
|
- `POST /api/conversation/` - Create conversation
|
||||||
|
- `GET /api/conversation/` - List conversations
|
||||||
|
- `POST /api/conversation/query` - Chat message
|
||||||
|
|
||||||
|
### RAG (Admin Only)
|
||||||
|
- `GET /api/rag/stats` - Vector store stats
|
||||||
|
- `POST /api/rag/index` - Index documents
|
||||||
|
- `POST /api/rag/reindex` - Reindex all
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Common Issues
|
||||||
|
|
||||||
|
| Issue | Solution |
|
||||||
|
|-------|----------|
|
||||||
|
| Port already in use | Check if services are running: `lsof -ti:8080` |
|
||||||
|
| Database connection error | Ensure PostgreSQL is running: `docker compose ps` |
|
||||||
|
| ChromaDB errors | Clear and reindex: `python scripts/manage_vectorstore.py reindex` |
|
||||||
|
| Import errors | Check you're in `services/raggr/` directory |
|
||||||
|
| Frontend not building | `cd raggr-frontend && yarn install && yarn build` |
|
||||||
|
|
||||||
|
See individual guides for detailed troubleshooting.
|
||||||
|
|
||||||
|
## Contributing
|
||||||
|
|
||||||
|
1. Read the [Development Guide](development.md)
|
||||||
|
2. Set up your local environment
|
||||||
|
3. Make changes and test locally
|
||||||
|
4. Generate migrations if needed
|
||||||
|
5. Submit a pull request
|
||||||
|
|
||||||
|
## Additional Resources
|
||||||
|
|
||||||
|
- [LangChain Documentation](https://python.langchain.com/)
|
||||||
|
- [ChromaDB Documentation](https://docs.trychroma.com/)
|
||||||
|
- [Quart Documentation](https://quart.palletsprojects.com/)
|
||||||
|
- [Tortoise ORM Documentation](https://tortoise.github.io/)
|
||||||
|
- [Authelia Documentation](https://www.authelia.com/)
|
||||||
|
|
||||||
|
## Need Help?
|
||||||
|
|
||||||
|
- Check the relevant guide in this documentation
|
||||||
|
- Review troubleshooting sections
|
||||||
|
- Check application logs: `docker compose logs -f`
|
||||||
|
- Inspect database: `docker compose exec postgres psql -U raggr`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Documentation Version**: 1.0
|
||||||
|
**Last Updated**: January 2026
|
||||||
81
index.html
Normal file
81
index.html
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
<!doctype html>
|
||||||
|
|
||||||
|
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
|
||||||
|
<meta name="author" content="Paperless-ngx project and contributors">
|
||||||
|
<meta name="robots" content="noindex,nofollow">
|
||||||
|
|
||||||
|
<title>
|
||||||
|
|
||||||
|
Paperless-ngx sign in
|
||||||
|
|
||||||
|
</title>
|
||||||
|
<link href="/static/bootstrap.min.css" rel="stylesheet">
|
||||||
|
<link href="/static/base.css" rel="stylesheet">
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body class="text-center">
|
||||||
|
<div class="position-absolute top-50 start-50 translate-middle">
|
||||||
|
<form class="form-accounts" id="form-account" method="post">
|
||||||
|
<input type="hidden" name="csrfmiddlewaretoken" value="KLQ3mMraTFHfK9sMmc6DJcNIS6YixeHnSJiT3A12LYB49HeEXOpx5RnY9V6uPSrD">
|
||||||
|
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 2897.4 896.6" width='300' class='logo mb-4'>
|
||||||
|
<path class="leaf" d="M140,713.7c-3.4-16.4-10.3-49.1-11.2-49.1c-145.7-87.1-128.4-238-80.2-324.2C59,449,251.2,524,139.1,656.8 c-0.9,1.7,5.2,22.4,10.3,41.4c22.4-37.9,56-83.6,54.3-87.9C65.9,273.9,496.9,248.1,586.6,39.4c40.5,201.8-20.7,513.9-367.2,593.2 c-1.7,0.9-62.9,108.6-65.5,109.5c0-1.7-25.9-0.9-22.4-9.5C133.1,727.4,136.6,720.6,140,713.7L140,713.7z M135.7,632.6 c44-50.9-7.8-137.9-38.8-166.4C149.5,556.7,146,609.3,135.7,632.6L135.7,632.6z" transform="translate(0)" style="fill:#17541f"/>
|
||||||
|
<g class="text" style="fill:#000">
|
||||||
|
<path d="M1022.3,428.7c-17.8-19.9-42.7-29.8-74.7-29.8c-22.3,0-42.4,5.7-60.5,17.3c-18.1,11.6-32.3,27.5-42.5,47.8 s-15.3,42.9-15.3,67.8c0,24.9,5.1,47.5,15.3,67.8c10.3,20.3,24.4,36.2,42.5,47.8c18.1,11.5,38.3,17.3,60.5,17.3 c32,0,56.9-9.9,74.7-29.8v20.4v0.2h84.5V408.3h-84.5V428.7z M1010.5,575c-10.2,11.7-23.6,17.6-40.2,17.6s-29.9-5.9-40-17.6 s-15.1-26.1-15.1-43.3c0-17.1,5-31.6,15.1-43.3s23.4-17.6,40-17.6c16.6,0,30,5.9,40.2,17.6s15.3,26.1,15.3,43.3 S1020.7,563.3,1010.5,575z" transform="translate(0)"/>
|
||||||
|
<path d="M1381,416.1c-18.1-11.5-38.3-17.3-60.5-17.4c-32,0-56.9,9.9-74.7,29.8v-20.4h-84.5v390.7h84.5v-164 c17.8,19.9,42.7,29.8,74.7,29.8c22.3,0,42.4-5.7,60.5-17.3s32.3-27.5,42.5-47.8c10.2-20.3,15.3-42.9,15.3-67.8s-5.1-47.5-15.3-67.8 C1413.2,443.6,1399.1,427.7,1381,416.1z M1337.9,575c-10.1,11.7-23.4,17.6-40,17.6s-29.9-5.9-40-17.6s-15.1-26.1-15.1-43.3 c0-17.1,5-31.6,15.1-43.3s23.4-17.6,40-17.6s29.9,5.9,40,17.6s15.1,26.1,15.1,43.3S1347.9,563.3,1337.9,575z" transform="translate(0)"/>
|
||||||
|
<path d="M1672.2,416.8c-20.5-12-43-18-67.6-18c-24.9,0-47.6,5.9-68,17.6c-20.4,11.7-36.5,27.7-48.2,48s-17.6,42.7-17.6,67.3 c0.3,25.2,6.2,47.8,17.8,68c11.5,20.2,28,36,49.3,47.6c21.3,11.5,45.9,17.3,73.8,17.3c48.6,0,86.8-14.7,114.7-44l-52.5-48.9 c-8.6,8.3-17.6,14.6-26.7,19c-9.3,4.3-21.1,6.4-35.3,6.4c-11.6,0-22.5-3.6-32.7-10.9c-10.3-7.3-17.1-16.5-20.7-27.8h180l0.4-11.6 c0-29.6-6-55.7-18-78.2S1692.6,428.8,1672.2,416.8z M1558.3,503.2c2.1-12.1,7.5-21.8,16.2-29.1s18.7-10.9,30-10.9 s21.2,3.6,29.8,10.9c8.6,7.2,13.9,16.9,16,29.1H1558.3z" transform="translate(0)"/>
|
||||||
|
<path d="M1895.3,411.7c-11,5.6-20.3,13.7-28,24.4h-0.1v-28h-84.5v247.3h84.5V536.3c0-22.6,4.7-38.1,14.2-46.5 c9.5-8.5,22.7-12.7,39.6-12.7c6.2,0,13.5,1,21.8,3.1l10.7-72c-5.9-3.3-14.5-4.9-25.8-4.9C1917.1,403.3,1906.3,406.1,1895.3,411.7z" transform="translate(0)"/>
|
||||||
|
<rect x="1985" y="277.4" width="84.5" height="377.8" transform="translate(0)"/>
|
||||||
|
<path d="M2313.2,416.8c-20.5-12-43-18-67.6-18c-24.9,0-47.6,5.9-68,17.6s-36.5,27.7-48.2,48c-11.7,20.3-17.6,42.7-17.6,67.3 c0.3,25.2,6.2,47.8,17.8,68c11.5,20.2,28,36,49.3,47.6c21.3,11.5,45.9,17.3,73.8,17.3c48.6,0,86.8-14.7,114.7-44l-52.5-48.9 c-8.6,8.3-17.6,14.6-26.7,19c-9.3,4.3-21.1,6.4-35.3,6.4c-11.6,0-22.5-3.6-32.7-10.9c-10.3-7.3-17.1-16.5-20.7-27.8h180l0.4-11.6 c0-29.6-6-55.7-18-78.2S2333.6,428.8,2313.2,416.8z M2199.3,503.2c2.1-12.1,7.5-21.8,16.2-29.1s18.7-10.9,30-10.9 s21.2,3.6,29.8,10.9c8.6,7.2,13.9,16.9,16,29.1H2199.3z" transform="translate(0)"/>
|
||||||
|
<path d="M2583.6,507.7c-13.8-4.4-30.6-8.1-50.5-11.1c-15.1-2.7-26.1-5.2-32.9-7.6c-6.8-2.4-10.2-6.1-10.2-11.1s2.3-8.7,6.7-10.9 c4.4-2.2,11.5-3.3,21.3-3.3c11.6,0,24.3,2.4,38.1,7.2c13.9,4.8,26.2,11,36.9,18.4l32.4-58.2c-11.3-7.4-26.2-14.7-44.9-21.8 c-18.7-7.1-39.6-10.7-62.7-10.7c-33.7,0-60.2,7.6-79.3,22.7c-19.1,15.1-28.7,36.1-28.7,63.1c0,19,4.8,33.9,14.4,44.7 c9.6,10.8,21,18.5,34,22.9c13.1,4.5,28.9,8.3,47.6,11.6c14.6,2.7,25.1,5.3,31.6,7.8s9.8,6.5,9.8,11.8c0,10.4-9.7,15.6-29.3,15.6 c-13.7,0-28.5-2.3-44.7-6.9c-16.1-4.6-29.2-11.3-39.3-20.2l-33.3,60c9.2,7.4,24.6,14.7,46.2,22c21.7,7.3,45.2,10.9,70.7,10.9 c34.7,0,62.9-7.4,84.5-22.4c21.7-15,32.5-37.3,32.5-66.9c0-19.3-5-34.2-15.1-44.9S2597.4,512.1,2583.6,507.7z" transform="translate(0)"/>
|
||||||
|
<path d="M2883.4,575.3c0-19.3-5-34.2-15.1-44.9s-22-18.3-35.8-22.7c-13.8-4.4-30.6-8.1-50.5-11.1c-15.1-2.7-26.1-5.2-32.9-7.6 c-6.8-2.4-10.2-6.1-10.2-11.1s2.3-8.7,6.7-10.9c4.4-2.2,11.5-3.3,21.3-3.3c11.6,0,24.3,2.4,38.1,7.2c13.9,4.8,26.2,11,36.9,18.4 l32.4-58.2c-11.3-7.4-26.2-14.7-44.9-21.8c-18.7-7.1-39.6-10.7-62.7-10.7c-33.7,0-60.2,7.6-79.3,22.7 c-19.1,15.1-28.7,36.1-28.7,63.1c0,19,4.8,33.9,14.4,44.7c9.6,10.8,21,18.5,34,22.9c13.1,4.5,28.9,8.3,47.6,11.6 c14.6,2.7,25.1,5.3,31.6,7.8s9.8,6.5,9.8,11.8c0,10.4-9.7,15.6-29.3,15.6c-13.7,0-28.5-2.3-44.7-6.9c-16.1-4.6-29.2-11.3-39.3-20.2 l-33.3,60c9.2,7.4,24.6,14.7,46.2,22c21.7,7.3,45.2,10.9,70.7,10.9c34.7,0,62.9-7.4,84.5-22.4 C2872.6,627.2,2883.4,604.9,2883.4,575.3z" transform="translate(0)"/>
|
||||||
|
<rect x="2460.7" y="738.7" width="59.6" height="17.2" transform="translate(0)"/>
|
||||||
|
<path d="M2596.5,706.4c-5.7,0-11,1-15.8,3s-9,5-12.5,8.9v-9.4h-19.4v93.6h19.4v-52c0-8.6,2.1-15.3,6.3-20c4.2-4.7,9.5-7.1,15.9-7.1 c7.8,0,13.4,2.3,16.8,6.7c3.4,4.5,5.1,11.3,5.1,20.5v52h19.4v-56.8c0-12.8-3.2-22.6-9.5-29.3 C2615.8,709.8,2607.3,706.4,2596.5,706.4z" transform="translate(0)"/>
|
||||||
|
<path d="M2733.8,717.7c-3.6-3.4-7.9-6.1-13.1-8.2s-10.6-3.1-16.2-3.1c-8.7,0-16.5,2.1-23.5,6.3s-12.5,10-16.5,17.3 c-4,7.3-6,15.4-6,24.4c0,8.9,2,17.1,6,24.3c4,7.3,9.5,13,16.5,17.2s14.9,6.3,23.5,6.3c5.6,0,11-1,16.2-3.1 c5.1-2.1,9.5-4.8,13.1-8.2v24.4c0,8.5-2.5,14.8-7.6,18.7c-5,3.9-11,5.9-18,5.9c-6.7,0-12.4-1.6-17.3-4.7c-4.8-3.1-7.6-7.7-8.3-13.8 h-19.4c0.6,7.7,2.9,14.2,7.1,19.5s9.6,9.3,16.2,12c6.6,2.7,13.8,4,21.7,4c12.8,0,23.5-3.4,32-10.1c8.6-6.7,12.8-17.1,12.8-31.1 V708.9h-19.2V717.7z M2732.2,770.1c-2.5,4.7-6,8.3-10.4,11.2c-4.4,2.7-9.4,4-14.9,4c-5.7,0-10.8-1.4-15.2-4.3s-7.8-6.7-10.2-11.4 c-2.3-4.8-3.5-9.8-3.5-15.2c0-5.5,1.1-10.6,3.5-15.3s5.8-8.5,10.2-11.3s9.5-4.2,15.2-4.2c5.5,0,10.5,1.4,14.9,4s7.9,6.3,10.4,11 s3.8,10,3.8,15.8S2734.7,765.4,2732.2,770.1z" transform="translate(0)"/>
|
||||||
|
<polygon points="2867.9,708.9 2846.5,708.9 2820.9,741.9 2795.5,708.9 2773.1,708.9 2809.1,755 2771.5,802.5 2792.9,802.5 2820.1,767.9 2847.2,802.6 2869.6,802.6 2832,754.4 " transform="translate(0)"/>
|
||||||
|
<path d="M757.6,293.7c-20-10.8-42.6-16.2-67.8-16.2H600c-8.5,39.2-21.1,76.4-37.6,111.3c-9.9,20.8-21.1,40.6-33.6,59.4v207.2h88.9 V521.5h72c25.2,0,47.8-5.4,67.8-16.2s35.7-25.6,47.1-44.2c11.4-18.7,17.1-39.1,17.1-61.3c0.1-22.7-5.6-43.3-17-61.9 C793.3,319.2,777.6,304.5,757.6,293.7z M716.6,434.3c-9.3,8.9-21.6,13.3-36.7,13.3l-62.2,0.4v-92.5l62.2-0.4 c15.1,0,27.3,4.4,36.7,13.3c9.4,8.9,14,19.9,14,32.9C730.6,414.5,726,425.4,716.6,434.3z" transform="translate(0)"/>
|
||||||
|
</g>
|
||||||
|
</svg>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Please sign in.
|
||||||
|
|
||||||
|
</p>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<div class="form-floating form-stacked-top">
|
||||||
|
<input type="text" name="login" id="inputUsername" placeholder="Username" class="form-control" autocorrect="off" autocapitalize="none" required autofocus>
|
||||||
|
<label for="inputUsername">Username</label>
|
||||||
|
</div>
|
||||||
|
<div class="form-floating form-stacked-bottom">
|
||||||
|
<input type="password" name="password" id="inputPassword" placeholder="Password" class="form-control" required>
|
||||||
|
<label for="inputPassword">Password</label>
|
||||||
|
</div>
|
||||||
|
<div class="d-grid mt-3">
|
||||||
|
<button class="btn btn-lg btn-primary" type="submit">Sign in</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</form>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
15
llm.py
15
llm.py
@@ -4,15 +4,20 @@ from ollama import Client
|
|||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
TRY_OLLAMA = os.getenv("TRY_OLLAMA", False)
|
||||||
|
|
||||||
|
|
||||||
class LLMClient:
|
class LLMClient:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
try:
|
try:
|
||||||
self.ollama_client = Client(
|
self.ollama_client = Client(
|
||||||
host=os.getenv("OLLAMA_URL", "http://localhost:11434"), timeout=10.0
|
host=os.getenv("OLLAMA_URL", "http://localhost:11434"), timeout=1.0
|
||||||
)
|
)
|
||||||
self.ollama_client.chat(
|
self.ollama_client.chat(
|
||||||
model="gemma3:4b", messages=[{"role": "system", "content": "test"}]
|
model="gemma3:4b", messages=[{"role": "system", "content": "test"}]
|
||||||
@@ -30,7 +35,9 @@ class LLMClient:
|
|||||||
prompt: str,
|
prompt: str,
|
||||||
system_prompt: str,
|
system_prompt: str,
|
||||||
):
|
):
|
||||||
|
# Instituting a fallback if my gaming PC is not on
|
||||||
if self.PROVIDER == "ollama":
|
if self.PROVIDER == "ollama":
|
||||||
|
try:
|
||||||
response = self.ollama_client.chat(
|
response = self.ollama_client.chat(
|
||||||
model="gemma3:4b",
|
model="gemma3:4b",
|
||||||
messages=[
|
messages=[
|
||||||
@@ -41,9 +48,11 @@ class LLMClient:
|
|||||||
{"role": "user", "content": prompt},
|
{"role": "user", "content": prompt},
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
print(response)
|
|
||||||
output = response.message.content
|
output = response.message.content
|
||||||
elif self.PROVIDER == "openai":
|
return output
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Could not connect to OLLAMA: {str(e)}")
|
||||||
|
|
||||||
response = self.openai_client.responses.create(
|
response = self.openai_client.responses.create(
|
||||||
model="gpt-4o-mini",
|
model="gpt-4o-mini",
|
||||||
input=[
|
input=[
|
||||||
|
|||||||
101
main.py
101
main.py
@@ -1,22 +1,20 @@
|
|||||||
|
import argparse
|
||||||
import datetime
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
import time
|
||||||
|
|
||||||
import argparse
|
|
||||||
import chromadb
|
|
||||||
import ollama
|
import ollama
|
||||||
|
|
||||||
|
|
||||||
from request import PaperlessNGXService
|
|
||||||
from chunker import Chunker
|
|
||||||
from cleaner import pdf_to_image, summarize_pdf_image
|
|
||||||
from llm import LLMClient
|
|
||||||
from query import QueryGenerator
|
|
||||||
|
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
import chromadb
|
||||||
|
from utils.chunker import Chunker
|
||||||
|
from utils.cleaner import pdf_to_image, summarize_pdf_image
|
||||||
|
from llm import LLMClient
|
||||||
|
from scripts.query import QueryGenerator
|
||||||
|
from utils.request import PaperlessNGXService
|
||||||
|
|
||||||
_dotenv_loaded = load_dotenv()
|
_dotenv_loaded = load_dotenv()
|
||||||
|
|
||||||
# Configure ollama client with URL from environment or default to localhost
|
# Configure ollama client with URL from environment or default to localhost
|
||||||
@@ -36,6 +34,7 @@ parser.add_argument("query", type=str, help="questions about simba's health")
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--reindex", action="store_true", help="re-index the simba documents"
|
"--reindex", action="store_true", help="re-index the simba documents"
|
||||||
)
|
)
|
||||||
|
parser.add_argument("--classify", action="store_true", help="test classification")
|
||||||
parser.add_argument("--index", help="index a file")
|
parser.add_argument("--index", help="index a file")
|
||||||
|
|
||||||
ppngx = PaperlessNGXService()
|
ppngx = PaperlessNGXService()
|
||||||
@@ -77,7 +76,7 @@ def chunk_data(docs, collection, doctypes):
|
|||||||
|
|
||||||
logging.info(f"chunking {len(docs)} documents")
|
logging.info(f"chunking {len(docs)} documents")
|
||||||
texts: list[str] = [doc["content"] for doc in docs]
|
texts: list[str] = [doc["content"] for doc in docs]
|
||||||
with sqlite3.connect("visited.db") as conn:
|
with sqlite3.connect("database/visited.db") as conn:
|
||||||
to_insert = []
|
to_insert = []
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
for index, text in enumerate(texts):
|
for index, text in enumerate(texts):
|
||||||
@@ -113,9 +112,22 @@ def chunk_text(texts: list[str], collection):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def consult_oracle(input: str, collection):
|
def classify_query(query: str, transcript: str) -> bool:
|
||||||
import time
|
logging.info("Starting query generation")
|
||||||
|
qg_start = time.time()
|
||||||
|
qg = QueryGenerator()
|
||||||
|
query_type = qg.get_query_type(input=query, transcript=transcript)
|
||||||
|
logging.info(query_type)
|
||||||
|
qg_end = time.time()
|
||||||
|
logging.info(f"Query generation took {qg_end - qg_start:.2f} seconds")
|
||||||
|
return query_type == "Simba"
|
||||||
|
|
||||||
|
|
||||||
|
def consult_oracle(
|
||||||
|
input: str,
|
||||||
|
collection,
|
||||||
|
transcript: str = "",
|
||||||
|
):
|
||||||
chunker = Chunker(collection)
|
chunker = Chunker(collection)
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
@@ -153,7 +165,10 @@ def consult_oracle(input: str, collection):
|
|||||||
logging.info("Starting LLM generation")
|
logging.info("Starting LLM generation")
|
||||||
llm_start = time.time()
|
llm_start = time.time()
|
||||||
system_prompt = "You are a helpful assistant that understands veterinary terms."
|
system_prompt = "You are a helpful assistant that understands veterinary terms."
|
||||||
prompt = f"Using the following data, help answer the user's query by providing as many details as possible. Using this data: {results}. Respond to this prompt: {input}"
|
transcript_prompt = f"Here is the message transcript thus far {transcript}."
|
||||||
|
prompt = f"""Using the following data, help answer the user's query by providing as many details as possible.
|
||||||
|
Using this data: {results}. {transcript_prompt if len(transcript) > 0 else ""}
|
||||||
|
Respond to this prompt: {input}"""
|
||||||
output = llm_client.chat(prompt=prompt, system_prompt=system_prompt)
|
output = llm_client.chat(prompt=prompt, system_prompt=system_prompt)
|
||||||
llm_end = time.time()
|
llm_end = time.time()
|
||||||
logging.info(f"LLM generation took {llm_end - llm_start:.2f} seconds")
|
logging.info(f"LLM generation took {llm_end - llm_start:.2f} seconds")
|
||||||
@@ -164,6 +179,16 @@ def consult_oracle(input: str, collection):
|
|||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
def llm_chat(input: str, transcript: str = "") -> str:
|
||||||
|
system_prompt = "You are a helpful assistant that understands veterinary terms."
|
||||||
|
transcript_prompt = f"Here is the message transcript thus far {transcript}."
|
||||||
|
prompt = f"""Answer the user in as if you were a cat named Simba. Don't act too catlike. Be assertive.
|
||||||
|
{transcript_prompt if len(transcript) > 0 else ""}
|
||||||
|
Respond to this prompt: {input}"""
|
||||||
|
output = llm_client.chat(prompt=prompt, system_prompt=system_prompt)
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
def paperless_workflow(input):
|
def paperless_workflow(input):
|
||||||
# Step 1: Get the text
|
# Step 1: Get the text
|
||||||
ppngx = PaperlessNGXService()
|
ppngx = PaperlessNGXService()
|
||||||
@@ -173,15 +198,24 @@ def paperless_workflow(input):
|
|||||||
consult_oracle(input, simba_docs)
|
consult_oracle(input, simba_docs)
|
||||||
|
|
||||||
|
|
||||||
def consult_simba_oracle(input: str):
|
def consult_simba_oracle(input: str, transcript: str = ""):
|
||||||
|
is_simba_related = classify_query(query=input, transcript=transcript)
|
||||||
|
|
||||||
|
if is_simba_related:
|
||||||
|
logging.info("Query is related to simba")
|
||||||
return consult_oracle(
|
return consult_oracle(
|
||||||
input=input,
|
input=input,
|
||||||
collection=simba_docs,
|
collection=simba_docs,
|
||||||
|
transcript=transcript,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
logging.info("Query is NOT related to simba")
|
||||||
|
|
||||||
|
return llm_chat(input=input, transcript=transcript)
|
||||||
|
|
||||||
|
|
||||||
def filter_indexed_files(docs):
|
def filter_indexed_files(docs):
|
||||||
with sqlite3.connect("visited.db") as conn:
|
with sqlite3.connect("database/visited.db") as conn:
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
c.execute(
|
c.execute(
|
||||||
"CREATE TABLE IF NOT EXISTS indexed_documents (id INTEGER PRIMARY KEY AUTOINCREMENT, paperless_id INTEGER)"
|
"CREATE TABLE IF NOT EXISTS indexed_documents (id INTEGER PRIMARY KEY AUTOINCREMENT, paperless_id INTEGER)"
|
||||||
@@ -194,12 +228,16 @@ def filter_indexed_files(docs):
|
|||||||
return [doc for doc in docs if doc["id"] not in visited]
|
return [doc for doc in docs if doc["id"] not in visited]
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def reindex():
|
||||||
args = parser.parse_args()
|
with sqlite3.connect("database/visited.db") as conn:
|
||||||
if args.reindex:
|
|
||||||
with sqlite3.connect("./visited.db") as conn:
|
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
c.execute("DELETE FROM indexed_documents")
|
c.execute("DELETE FROM indexed_documents")
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
# Delete all documents from the collection
|
||||||
|
all_docs = simba_docs.get()
|
||||||
|
if all_docs["ids"]:
|
||||||
|
simba_docs.delete(ids=all_docs["ids"])
|
||||||
|
|
||||||
logging.info("Fetching documents from Paperless-NGX")
|
logging.info("Fetching documents from Paperless-NGX")
|
||||||
ppngx = PaperlessNGXService()
|
ppngx = PaperlessNGXService()
|
||||||
@@ -215,21 +253,20 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
# Chunk documents
|
# Chunk documents
|
||||||
logging.info("Chunking documents now ...")
|
logging.info("Chunking documents now ...")
|
||||||
tag_lookup = ppngx.get_tags()
|
|
||||||
doctype_lookup = ppngx.get_doctypes()
|
doctype_lookup = ppngx.get_doctypes()
|
||||||
chunk_data(docs, collection=simba_docs, doctypes=doctype_lookup)
|
chunk_data(docs, collection=simba_docs, doctypes=doctype_lookup)
|
||||||
logging.info("Done chunking documents")
|
logging.info("Done chunking documents")
|
||||||
|
|
||||||
# if args.index:
|
|
||||||
# with open(args.index) as file:
|
if __name__ == "__main__":
|
||||||
# extension = args.index.split(".")[-1]
|
args = parser.parse_args()
|
||||||
# if extension == "pdf":
|
if args.reindex:
|
||||||
# pdf_path = ppngx.download_pdf_from_id(id=document_id)
|
reindex()
|
||||||
# image_paths = pdf_to_image(filepath=pdf_path)
|
|
||||||
# print(f"summarizing {file}")
|
if args.classify:
|
||||||
# generated_summary = summarize_pdf_image(filepaths=image_paths)
|
consult_simba_oracle(input="yohohoho testing")
|
||||||
# elif extension in [".md", ".txt"]:
|
consult_simba_oracle(input="write an email")
|
||||||
# chunk_text(texts=[file.readall()], collection=simba_docs)
|
consult_simba_oracle(input="how much does simba weigh")
|
||||||
|
|
||||||
if args.query:
|
if args.query:
|
||||||
logging.info("Consulting oracle ...")
|
logging.info("Consulting oracle ...")
|
||||||
|
|||||||
@@ -1,63 +0,0 @@
|
|||||||
from tortoise import BaseDBAsyncClient
|
|
||||||
|
|
||||||
RUN_IN_TRANSACTION = True
|
|
||||||
|
|
||||||
|
|
||||||
async def upgrade(db: BaseDBAsyncClient) -> str:
|
|
||||||
return """
|
|
||||||
CREATE TABLE IF NOT EXISTS "conversations" (
|
|
||||||
"id" CHAR(36) NOT NULL PRIMARY KEY,
|
|
||||||
"name" VARCHAR(255) NOT NULL,
|
|
||||||
"created_at" TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
||||||
"updated_at" TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
||||||
);
|
|
||||||
CREATE TABLE IF NOT EXISTS "conversation_messages" (
|
|
||||||
"id" CHAR(36) NOT NULL PRIMARY KEY,
|
|
||||||
"text" TEXT NOT NULL,
|
|
||||||
"created_at" TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
||||||
"speaker" VARCHAR(10) NOT NULL /* USER: user\nSIMBA: simba */,
|
|
||||||
"conversation_id" CHAR(36) NOT NULL REFERENCES "conversations" ("id") ON DELETE CASCADE
|
|
||||||
);
|
|
||||||
CREATE TABLE IF NOT EXISTS "users" (
|
|
||||||
"id" CHAR(36) NOT NULL PRIMARY KEY,
|
|
||||||
"username" VARCHAR(255) NOT NULL,
|
|
||||||
"password" BLOB NOT NULL,
|
|
||||||
"email" VARCHAR(100) NOT NULL UNIQUE,
|
|
||||||
"created_at" TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
||||||
"updated_at" TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
||||||
);
|
|
||||||
CREATE TABLE IF NOT EXISTS "aerich" (
|
|
||||||
"id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
|
||||||
"version" VARCHAR(255) NOT NULL,
|
|
||||||
"app" VARCHAR(100) NOT NULL,
|
|
||||||
"content" JSON NOT NULL
|
|
||||||
);"""
|
|
||||||
|
|
||||||
|
|
||||||
async def downgrade(db: BaseDBAsyncClient) -> str:
|
|
||||||
return """
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
MODELS_STATE = (
|
|
||||||
"eJztmG1v4jgQx79KlFddaa9q2W53VZ1OCpTecrvACcLdPtwqMskAVhMnazvboorvfrbJE4"
|
|
||||||
"kJpWq3UPGmhRkPtn8ztv/2nRmEHvjsuBWSn0AZ4jgk5oVxZxIUgPig9b82TBRFuVcaOBr7"
|
|
||||||
"KsAttFQeNGacIpcL5wT5DITJA+ZSHCWdkdj3pTF0RUNMprkpJvhHDA4Pp8BnQIXj23dhxs"
|
|
||||||
"SDW2Dp1+jamWDwvZVxY0/2rewOn0fKNhp1Lq9US9nd2HFDPw5I3jqa81lIsuZxjL1jGSN9"
|
|
||||||
"UyBAEQevMA05ymTaqWk5YmHgNIZsqF5u8GCCYl/CMH+fxMSVDAzVk/xz9oe5BR6BWqLFhE"
|
|
||||||
"sWd4vlrPI5K6spu2p9sAZHb85fqVmGjE+pcioi5kIFIo6WoYprDlL9r6BszRDVo0zbl2CK"
|
|
||||||
"gT4EY2rIOeY1lIJMAT2MmhmgW8cHMuUz8bXx9m0Nxn+sgSIpWimUoajrZdX3Eldj6ZNIc4"
|
|
||||||
"QuBTllB/EqyEvh4TgAPczVyBJSLwk9Tj/sKGAxB69P/HmyCGr42p1ue2hb3b/lTALGfvgK"
|
|
||||||
"kWW3paehrPOS9ei8lIrsR4x/O/YHQ341vvZ77XLtZ+3sr6YcE4p56JDwxkFeYb2m1hTMSm"
|
|
||||||
"LjyHtgYlcjD4l91sSqwcuTZHJd2AKlYYzc6xtEPWfFUzgdgTE0BVZNfzOJvPo4AD87NkuJ"
|
|
||||||
"1hyu3eUv7mbGF2kZp9YivLARrqNXdQWNoGxBRMzbS/qWPdXQ2aBQChDvJ1ScYiIPgmWvBQ"
|
|
||||||
"uHW812bAurHmXafl8ES9022/5sr+ywqSw56lqfX63ssp/6vT/T5gUZ0/rUbx7Uy0s85Krq"
|
|
||||||
"hUWAroHqxX2bxIHKakfgQMSFSnYL4c+8dMzRsD24MGIG9D8y7HSb1oXBcDBG5gNuAKcn97"
|
|
||||||
"gAnJ6s1f/SVVpAxYNmu21eE/qYe/6zblYbtviKHtMDrdK8CingKfkI80r9bpZfO02xoruE"
|
|
||||||
"maKbTEzoykV8EJMEvlzY1rBlXbbNxXpt+5RKbsSUJKpIN2Wv1WpyaR+02f5rM5nHbR+Uij"
|
|
||||||
"H7otF+waNShBi7CammMpuYIDrXwyxGlWCO53x5/9k9nDX0mlKwFvWWYNbs9KzBF73mTdsX"
|
|
||||||
"C7f5xW5bJbwQIOxvU6ZZwOPU6OYl/5gVenpyP9VTJ3uquudwcXiZF4fDs+eLSOy2z55PKQ"
|
|
||||||
"0toNid6cRh4qmVhyhvszP6sEPWvDdp5aHU9KVqTxL2rIeEemr9rXF69u7s/Zvzs/eiiRpJ"
|
|
||||||
"ZnlXU/2dnr1BDsrLivYOt/6YLYQcxGAGUi6NLSAmzfcT4NNolZBwIJrz7K9hv7f2bSYNKY"
|
|
||||||
"EcETHBbx52+WvDx4x/302sNRTlrOsfkstvxqXDSP5AU/eK8yuPl8X/Etg7Fw=="
|
|
||||||
)
|
|
||||||
@@ -1,60 +0,0 @@
|
|||||||
from tortoise import BaseDBAsyncClient
|
|
||||||
|
|
||||||
RUN_IN_TRANSACTION = True
|
|
||||||
|
|
||||||
|
|
||||||
async def upgrade(db: BaseDBAsyncClient) -> str:
|
|
||||||
return """
|
|
||||||
-- SQLite doesn't support ADD CONSTRAINT, so we need to recreate the table
|
|
||||||
CREATE TABLE "conversations_new" (
|
|
||||||
"id" CHAR(36) NOT NULL PRIMARY KEY,
|
|
||||||
"name" VARCHAR(255) NOT NULL,
|
|
||||||
"created_at" TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
||||||
"updated_at" TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
||||||
"user_id" CHAR(36),
|
|
||||||
FOREIGN KEY ("user_id") REFERENCES "users" ("id") ON DELETE CASCADE
|
|
||||||
);
|
|
||||||
INSERT INTO "conversations_new" ("id", "name", "created_at", "updated_at")
|
|
||||||
SELECT "id", "name", "created_at", "updated_at" FROM "conversations";
|
|
||||||
DROP TABLE "conversations";
|
|
||||||
ALTER TABLE "conversations_new" RENAME TO "conversations";"""
|
|
||||||
|
|
||||||
|
|
||||||
async def downgrade(db: BaseDBAsyncClient) -> str:
|
|
||||||
return """
|
|
||||||
-- Recreate table without user_id column
|
|
||||||
CREATE TABLE "conversations_new" (
|
|
||||||
"id" CHAR(36) NOT NULL PRIMARY KEY,
|
|
||||||
"name" VARCHAR(255) NOT NULL,
|
|
||||||
"created_at" TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
||||||
"updated_at" TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
||||||
);
|
|
||||||
INSERT INTO "conversations_new" ("id", "name", "created_at", "updated_at")
|
|
||||||
SELECT "id", "name", "created_at", "updated_at" FROM "conversations";
|
|
||||||
DROP TABLE "conversations";
|
|
||||||
ALTER TABLE "conversations_new" RENAME TO "conversations";"""
|
|
||||||
|
|
||||||
|
|
||||||
MODELS_STATE = (
|
|
||||||
"eJztmWtP2zAUhv9KlE8gbQg6xhCaJqWlbB20ndp0F9gUuYnbWiROiJ1Bhfjvs91cnMRNKe"
|
|
||||||
"PSon6B9vic2H5s57w+vdU934Eu2Wn4+C8MCaDIx/qRdqtj4EH2Qdn+RtNBEGSt3EDB0BUB"
|
|
||||||
"tuQpWsCQ0BDYlDWOgEsgMzmQ2CEK4s5w5Lrc6NvMEeFxZoowuoqgRf0xpBMYsoaLP8yMsA"
|
|
||||||
"NvIEm+BpfWCEHXyY0bObxvYbfoNBC2waB1fCI8eXdDy/bdyMOZdzClEx+n7lGEnB0ew9vG"
|
|
||||||
"EMMQUOhI0+CjjKedmGYjZgYaRjAdqpMZHDgCkcth6B9HEbY5A030xP/sf9KXwMNQc7QIU8"
|
|
||||||
"7i9m42q2zOwqrzrhpfjN7Wu4NtMUuf0HEoGgUR/U4EAgpmoYJrBlL8L6FsTECoRpn4F2Cy"
|
|
||||||
"gT4EY2LIOGZ7KAGZAHoYNd0DN5YL8ZhO2Nfa+/cVGL8bPUGSeQmUPtvXs13fiZtqszaONE"
|
|
||||||
"Noh5BP2QK0DPKYtVDkQTXMfGQBqROH7iQfVhQwm4PTxe40PgQVfM1Wu9k3jfY3PhOPkCtX"
|
|
||||||
"IDLMJm+pCeu0YN06KCxF+hDtR8v8ovGv2nm30yzu/dTPPNf5mEBEfQv71xZwpPOaWBMwuY"
|
|
||||||
"WNAueBC5uP3Czsiy5sPHhpXQkMreUyiBTyH2kkHtszLuLDkwZPvaNLZc7gMMrwTvwQojE+"
|
|
||||||
"hVOBsMXGAbCtShax6BjEj1lVaJk1G0UIrlM1Im8KNjs2J0hn2dPoN4zjpi4YDoF9eQ1Cx5"
|
|
||||||
"oD04OEgDEkZaD1OPLktAfdVJqpWcoCrj174mq+VeaxFaz8mi8xytErN3k1r2gBmM3bifvm"
|
|
||||||
"PVXQWaCCJYj3E8OWvJAbUbzWopjCG0XKN5lVjTLxXxdRXJXKmz/NXBZPpO9W2/i5ncvkZ9"
|
|
||||||
"3O58RdksqNs259o5Bfo5AqK2QSQHCpEgP8AtnEkVeSArnVlcJf+Ojog36zd6TxjP4b91vt"
|
|
||||||
"unGkEeQNgX6/Jc7dMvd273HJ3Nude8fkTYUDJCea5V7zitDHfOevqYS1CwWv/5SyxfrZyl"
|
|
||||||
"JcqGkV22VZbfuUSk7cGRTSLblLzNdq/GhvtNn6azO+jssWLeWYddFoz1C4DAAh136o2Jl1"
|
|
||||||
"hEE4VcOUowowh1M6u/+sHs4KenUuWGW9xZjVWx2j90uteRN/eePWf5lNo4AXegC5y2zTNO"
|
|
||||||
"Bx9ujiI/+YO3Rv936qp0r2lHXP5uLwOi8Om9L6q1jYtHJXEoCLyp6l35Efp/a5VvXkJ615"
|
|
||||||
"GjBE9kRXaOW4pVItg8xnZeRyC88pvynVMsdc2Azxyr9ozhSV57e1vf0P+4fvDvYPmYsYSW"
|
|
||||||
"r5UPEyaHXMBeqYHwTllXa+6pBCNto4BcmPxhIQY/f1BPg00s3HFGJFev/a73bmlqqSkALI"
|
|
||||||
"AWYTvHCQTd9oLiL0z2piraDIZ11dVy+W0Au5mT+gripqPWch5u4f/FVgYA=="
|
|
||||||
)
|
|
||||||
72
migrations/models/1_20260131214411_None.py
Normal file
72
migrations/models/1_20260131214411_None.py
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
from tortoise import BaseDBAsyncClient
|
||||||
|
|
||||||
|
RUN_IN_TRANSACTION = True
|
||||||
|
|
||||||
|
|
||||||
|
async def upgrade(db: BaseDBAsyncClient) -> str:
|
||||||
|
return """
|
||||||
|
CREATE TABLE IF NOT EXISTS "users" (
|
||||||
|
"id" UUID NOT NULL PRIMARY KEY,
|
||||||
|
"username" VARCHAR(255) NOT NULL,
|
||||||
|
"password" BYTEA,
|
||||||
|
"email" VARCHAR(100) NOT NULL UNIQUE,
|
||||||
|
"oidc_subject" VARCHAR(255) UNIQUE,
|
||||||
|
"auth_provider" VARCHAR(50) NOT NULL DEFAULT 'local',
|
||||||
|
"ldap_groups" JSONB NOT NULL,
|
||||||
|
"created_at" TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_at" TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS "idx_users_oidc_su_5aec5a" ON "users" ("oidc_subject");
|
||||||
|
CREATE TABLE IF NOT EXISTS "conversations" (
|
||||||
|
"id" UUID NOT NULL PRIMARY KEY,
|
||||||
|
"name" VARCHAR(255) NOT NULL,
|
||||||
|
"created_at" TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_at" TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"user_id" UUID REFERENCES "users" ("id") ON DELETE CASCADE
|
||||||
|
);
|
||||||
|
CREATE TABLE IF NOT EXISTS "conversation_messages" (
|
||||||
|
"id" UUID NOT NULL PRIMARY KEY,
|
||||||
|
"text" TEXT NOT NULL,
|
||||||
|
"created_at" TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"speaker" VARCHAR(10) NOT NULL,
|
||||||
|
"conversation_id" UUID NOT NULL REFERENCES "conversations" ("id") ON DELETE CASCADE
|
||||||
|
);
|
||||||
|
COMMENT ON COLUMN "conversation_messages"."speaker" IS 'USER: user\nSIMBA: simba';
|
||||||
|
CREATE TABLE IF NOT EXISTS "aerich" (
|
||||||
|
"id" SERIAL NOT NULL PRIMARY KEY,
|
||||||
|
"version" VARCHAR(255) NOT NULL,
|
||||||
|
"app" VARCHAR(100) NOT NULL,
|
||||||
|
"content" JSONB NOT NULL
|
||||||
|
);"""
|
||||||
|
|
||||||
|
|
||||||
|
async def downgrade(db: BaseDBAsyncClient) -> str:
|
||||||
|
return """
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
MODELS_STATE = (
|
||||||
|
"eJztmm1v4jgQx78Kyquu1KtatnRX1emkQOkttwuceNinXhWZ2ICviZ1NnG1R1e9+tkmIkz"
|
||||||
|
"gUKFDY401bxh5s/zzO/Mfpo+FSiJzgpEbJT+QHgGFKjMvSo0GAi/gf2vbjkgE8L2kVBgYG"
|
||||||
|
"jnSwlZ6yBQwC5gOb8cYhcALETRAFto+9aDASOo4wUpt3xGSUmEKCf4TIYnSE2Bj5vOHmlp"
|
||||||
|
"sxgegBBfFH784aYuTA1LwxFGNLu8UmnrT1+42ra9lTDDewbOqELkl6exM2pmTWPQwxPBE+"
|
||||||
|
"om2ECPIBQ1BZhphltOzYNJ0xNzA/RLOpwsQA0RCEjoBh/D4MiS0YlORI4sf5H8YSeDhqgR"
|
||||||
|
"YTJlg8Pk1XlaxZWg0xVO2D2Tl6e/FGrpIGbOTLRknEeJKOgIGpq+SagJS/cyhrY+DrUcb9"
|
||||||
|
"MzD5RFfBGBsSjkkMxSBjQKtRM1zwYDmIjNiYfyxXKnMwfjY7kiTvJVFSHtfTqG9FTeVpm0"
|
||||||
|
"CaILR9JJZsAZYHecVbGHaRHmbaM4MURq4n8R87CpivAbaJM4kOwRy+vUaz3u2Zzb/FStwg"
|
||||||
|
"+OFIRGavLlrK0jrJWI8uMlsx+5LSl0bvQ0l8LH1vt+rZ2J/16303xJxAyKhF6L0FoHJeY2"
|
||||||
|
"sMJrWxoQdX3Ni052FjX3Vjo8kr+xog31ougyguL0gj0dy2uImrJw2Reod32pwhYOThXVMf"
|
||||||
|
"4RH5iCYSYYPPAxBblywi0dGPvmZXoSXWZBY+uJ+pETUo+Or4mhCbZk+zWzOv6oZkOAD23T"
|
||||||
|
"3woVUA00VBAEYoyAOtRp7XHzvImUkzPUtVwDWn37ibT5UitpIVLVOFUYpevsktu1kLIHzd"
|
||||||
|
"MBpbjDSHzjMqWIG4mBi21I08iOK9FsUMPWhSfo9b9Sjj/vsiiuel8vrXXiqLx9L3qGl+fZ"
|
||||||
|
"PK5J/arT/j7opUrn1qVw8K+VcUUnmFHHgI3OnEgCgg6yR0c1IgtbuK+ysfHaPfrXcuSyKj"
|
||||||
|
"/0O6jWbVvCwF2B0AY7EtTlWZZ6cLFJlnp4U1pmjKHCA10Sz3mNe4rvOZv6cS1s5ceL1Qym"
|
||||||
|
"bvz3aW4rOaVhMuy2rbTSo5WTNopFtcSxRrNXG0D9ps/7WZ2MdlLy1Vn33RaFu4uPRAENxT"
|
||||||
|
"XxOZVUyAP9HDVL0yMAcTNq1/drWk18GrCr2qyi2OrNpomZ1veskb91fjtvqtVzczdJELsL"
|
||||||
|
"NMlM4c1hOiz5/4dQbo2eliomee6snJHoqhbQXh4F9kayqHYpJZv5WAZoN0uzw3cuC5lh9b"
|
||||||
|
"nk9/Ylgk2vVAc47be4oaDrWB84I0lOZaWSRMK8VRWskFqQOBZ418GnqaO7y/uu2WHmnGLQ"
|
||||||
|
"O0T/gqbyC22XHJwQG73Rjem9vNpHix8vkXCdk7g8wzVXzB4SLhf3KRcHjV9kts7OwmP1cQ"
|
||||||
|
"PvcaJPd/Jet5F7LLYnS770BM5GN7bGhq56jleF71DJI+O1M+N0jBdby2ehaYM8EQ7fyrim"
|
||||||
|
"j5Juq38tn5u/P3by/O3/MuciYzy7s5D4NGq/dMtSwOgvaKq1jrKS6HWjmRzvxoLCOYp933"
|
||||||
|
"E+BGajk+IkNEk96LJbLi8lryeGO3DmuTx0tk2/Wnl6f/AHvgrXs="
|
||||||
|
)
|
||||||
25
mkdocs.yml
Normal file
25
mkdocs.yml
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
site_name: SimbaRAG Documentation
|
||||||
|
site_description: Documentation for SimbaRAG - RAG-powered conversational AI
|
||||||
|
|
||||||
|
theme:
|
||||||
|
name: material
|
||||||
|
features:
|
||||||
|
- content.code.copy
|
||||||
|
- navigation.sections
|
||||||
|
- navigation.expand
|
||||||
|
|
||||||
|
markdown_extensions:
|
||||||
|
- admonition
|
||||||
|
- pymdownx.highlight:
|
||||||
|
anchor_linenums: true
|
||||||
|
- pymdownx.superfences
|
||||||
|
- pymdownx.tabbed:
|
||||||
|
alternate_style: true
|
||||||
|
- tables
|
||||||
|
- toc:
|
||||||
|
permalink: true
|
||||||
|
|
||||||
|
nav:
|
||||||
|
- Home: index.md
|
||||||
|
- Architecture:
|
||||||
|
- Authentication: authentication.md
|
||||||
@@ -4,7 +4,39 @@ version = "0.1.0"
|
|||||||
description = "Add your description here"
|
description = "Add your description here"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.13"
|
requires-python = ">=3.13"
|
||||||
dependencies = ["chromadb>=1.1.0", "python-dotenv>=1.0.0", "flask>=3.1.2", "httpx>=0.28.1", "ollama>=0.6.0", "openai>=2.0.1", "pydantic>=2.11.9", "pillow>=10.0.0", "pymupdf>=1.24.0", "black>=25.9.0", "pillow-heif>=1.1.1", "flask-jwt-extended>=4.7.1", "bcrypt>=5.0.0", "pony>=0.7.19", "flask-login>=0.6.3", "quart>=0.20.0", "tortoise-orm>=0.25.1", "quart-jwt-extended>=0.1.0", "pre-commit>=4.3.0", "tortoise-orm-stubs>=1.0.2", "aerich>=0.8.0", "tomlkit>=0.13.3"]
|
dependencies = [
|
||||||
|
"chromadb>=1.1.0",
|
||||||
|
"python-dotenv>=1.0.0",
|
||||||
|
"flask>=3.1.2",
|
||||||
|
"httpx>=0.28.1",
|
||||||
|
"ollama>=0.6.0",
|
||||||
|
"openai>=2.0.1",
|
||||||
|
"pydantic>=2.11.9",
|
||||||
|
"pillow>=10.0.0",
|
||||||
|
"pymupdf>=1.24.0",
|
||||||
|
"black>=25.9.0",
|
||||||
|
"pillow-heif>=1.1.1",
|
||||||
|
"flask-jwt-extended>=4.7.1",
|
||||||
|
"bcrypt>=5.0.0",
|
||||||
|
"pony>=0.7.19",
|
||||||
|
"flask-login>=0.6.3",
|
||||||
|
"quart>=0.20.0",
|
||||||
|
"tortoise-orm>=0.25.1",
|
||||||
|
"quart-jwt-extended>=0.1.0",
|
||||||
|
"pre-commit>=4.3.0",
|
||||||
|
"tortoise-orm-stubs>=1.0.2",
|
||||||
|
"aerich>=0.8.0",
|
||||||
|
"tomlkit>=0.13.3",
|
||||||
|
"authlib>=1.3.0",
|
||||||
|
"asyncpg>=0.30.0",
|
||||||
|
"langchain-openai>=1.1.6",
|
||||||
|
"langchain>=1.2.0",
|
||||||
|
"langchain-chroma>=1.0.0",
|
||||||
|
"langchain-community>=0.4.1",
|
||||||
|
"jq>=1.10.0",
|
||||||
|
"langchain-ollama>=1.0.1",
|
||||||
|
"tavily-python>=0.7.17",
|
||||||
|
]
|
||||||
|
|
||||||
[tool.aerich]
|
[tool.aerich]
|
||||||
tortoise_orm = "app.TORTOISE_CONFIG"
|
tortoise_orm = "app.TORTOISE_CONFIG"
|
||||||
|
|||||||
9
raggr-frontend/.dockerignore
Normal file
9
raggr-frontend/.dockerignore
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
.git
|
||||||
|
.gitignore
|
||||||
|
README.md
|
||||||
|
.DS_Store
|
||||||
|
node_modules
|
||||||
|
dist
|
||||||
|
.cache
|
||||||
|
coverage
|
||||||
|
*.log
|
||||||
1
raggr-frontend/.gitignore
vendored
1
raggr-frontend/.gitignore
vendored
@@ -6,6 +6,7 @@
|
|||||||
# Dist
|
# Dist
|
||||||
node_modules
|
node_modules
|
||||||
dist/
|
dist/
|
||||||
|
.yarn
|
||||||
|
|
||||||
# Profile
|
# Profile
|
||||||
.rspack-profile-*/
|
.rspack-profile-*/
|
||||||
|
|||||||
1
raggr-frontend/.yarnrc.yml
Normal file
1
raggr-frontend/.yarnrc.yml
Normal file
@@ -0,0 +1 @@
|
|||||||
|
nodeLinker: node-modules
|
||||||
18
raggr-frontend/Dockerfile.dev
Normal file
18
raggr-frontend/Dockerfile.dev
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
FROM node:20-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy package files
|
||||||
|
COPY package.json yarn.lock* ./
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
RUN yarn install
|
||||||
|
|
||||||
|
# Copy application source code
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Expose rsbuild dev server port (default 3000)
|
||||||
|
EXPOSE 3000
|
||||||
|
|
||||||
|
# Default command
|
||||||
|
CMD ["sh", "-c", "yarn build && yarn watch:build"]
|
||||||
2677
raggr-frontend/package-lock.json
generated
Normal file
2677
raggr-frontend/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -6,21 +6,37 @@
|
|||||||
"scripts": {
|
"scripts": {
|
||||||
"build": "rsbuild build",
|
"build": "rsbuild build",
|
||||||
"dev": "rsbuild dev --open",
|
"dev": "rsbuild dev --open",
|
||||||
"preview": "rsbuild preview"
|
"preview": "rsbuild preview",
|
||||||
|
"watch": "npm-watch build",
|
||||||
|
"watch:build": "rsbuild build --watch"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"axios": "^1.12.2",
|
"axios": "^1.12.2",
|
||||||
"marked": "^16.3.0",
|
"marked": "^16.3.0",
|
||||||
|
"npm-watch": "^0.13.0",
|
||||||
"react": "^19.1.1",
|
"react": "^19.1.1",
|
||||||
"react-dom": "^19.1.1",
|
"react-dom": "^19.1.1",
|
||||||
"react-markdown": "^10.1.0"
|
"react-markdown": "^10.1.0",
|
||||||
|
"watch": "^1.0.2"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
"@biomejs/biome": "2.3.10",
|
||||||
"@rsbuild/core": "^1.5.6",
|
"@rsbuild/core": "^1.5.6",
|
||||||
"@rsbuild/plugin-react": "^1.4.0",
|
"@rsbuild/plugin-react": "^1.4.0",
|
||||||
"@tailwindcss/postcss": "^4.0.0",
|
"@tailwindcss/postcss": "^4.0.0",
|
||||||
"@types/react": "^19.1.13",
|
"@types/react": "^19.1.13",
|
||||||
"@types/react-dom": "^19.1.9",
|
"@types/react-dom": "^19.1.9",
|
||||||
"typescript": "^5.9.2"
|
"typescript": "^5.9.2"
|
||||||
|
},
|
||||||
|
"watch": {
|
||||||
|
"build": {
|
||||||
|
"patterns": [
|
||||||
|
"src"
|
||||||
|
],
|
||||||
|
"extensions": "ts,tsx,css,js,jsx",
|
||||||
|
"delay": 1000,
|
||||||
|
"quiet": false,
|
||||||
|
"inherit": true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,4 +3,8 @@ import { pluginReact } from '@rsbuild/plugin-react';
|
|||||||
|
|
||||||
export default defineConfig({
|
export default defineConfig({
|
||||||
plugins: [pluginReact()],
|
plugins: [pluginReact()],
|
||||||
|
html: {
|
||||||
|
title: 'Raggr',
|
||||||
|
favicon: './src/assets/favicon.svg',
|
||||||
|
},
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -3,4 +3,5 @@
|
|||||||
body {
|
body {
|
||||||
margin: 0;
|
margin: 0;
|
||||||
font-family: Inter, Avenir, Helvetica, Arial, sans-serif;
|
font-family: Inter, Avenir, Helvetica, Arial, sans-serif;
|
||||||
|
background-color: #F9F5EB;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ const AppContainer = () => {
|
|||||||
|
|
||||||
// Try to verify token by making a request
|
// Try to verify token by making a request
|
||||||
try {
|
try {
|
||||||
await conversationService.getMessages();
|
await conversationService.getAllConversations();
|
||||||
// If successful, user is authenticated
|
// If successful, user is authenticated
|
||||||
setAuthenticated(true);
|
setAuthenticated(true);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
|||||||
@@ -10,9 +10,10 @@ interface Message {
|
|||||||
interface Conversation {
|
interface Conversation {
|
||||||
id: string;
|
id: string;
|
||||||
name: string;
|
name: string;
|
||||||
messages: Message[];
|
messages?: Message[];
|
||||||
created_at: string;
|
created_at: string;
|
||||||
updated_at: string;
|
updated_at: string;
|
||||||
|
user_id?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface QueryRequest {
|
interface QueryRequest {
|
||||||
@@ -23,15 +24,23 @@ interface QueryResponse {
|
|||||||
response: string;
|
response: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface CreateConversationRequest {
|
||||||
|
user_id: string;
|
||||||
|
}
|
||||||
|
|
||||||
class ConversationService {
|
class ConversationService {
|
||||||
private baseUrl = "/api";
|
private baseUrl = "/api";
|
||||||
|
private conversationBaseUrl = "/api/conversation";
|
||||||
|
|
||||||
async sendQuery(query: string): Promise<QueryResponse> {
|
async sendQuery(
|
||||||
|
query: string,
|
||||||
|
conversation_id: string,
|
||||||
|
): Promise<QueryResponse> {
|
||||||
const response = await userService.fetchWithRefreshToken(
|
const response = await userService.fetchWithRefreshToken(
|
||||||
`${this.baseUrl}/query`,
|
`${this.conversationBaseUrl}/query`,
|
||||||
{
|
{
|
||||||
method: "POST",
|
method: "POST",
|
||||||
body: JSON.stringify({ query }),
|
body: JSON.stringify({ query, conversation_id }),
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -56,6 +65,51 @@ class ConversationService {
|
|||||||
|
|
||||||
return await response.json();
|
return await response.json();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async getConversation(conversationId: string): Promise<Conversation> {
|
||||||
|
const response = await userService.fetchWithRefreshToken(
|
||||||
|
`${this.conversationBaseUrl}/${conversationId}`,
|
||||||
|
{
|
||||||
|
method: "GET",
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error("Failed to fetch conversation");
|
||||||
|
}
|
||||||
|
|
||||||
|
return await response.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
async createConversation(): Promise<Conversation> {
|
||||||
|
const response = await userService.fetchWithRefreshToken(
|
||||||
|
`${this.conversationBaseUrl}/`,
|
||||||
|
{
|
||||||
|
method: "POST",
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error("Failed to create conversation");
|
||||||
|
}
|
||||||
|
|
||||||
|
return await response.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
async getAllConversations(): Promise<Conversation[]> {
|
||||||
|
const response = await userService.fetchWithRefreshToken(
|
||||||
|
`${this.conversationBaseUrl}/`,
|
||||||
|
{
|
||||||
|
method: "GET",
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error("Failed to fetch conversations");
|
||||||
|
}
|
||||||
|
|
||||||
|
return await response.json();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export const conversationService = new ConversationService();
|
export const conversationService = new ConversationService();
|
||||||
|
|||||||
94
raggr-frontend/src/api/oidcService.ts
Normal file
94
raggr-frontend/src/api/oidcService.ts
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
/**
|
||||||
|
* OIDC Authentication Service
|
||||||
|
* Handles OAuth 2.0 Authorization Code flow with PKCE
|
||||||
|
*/
|
||||||
|
|
||||||
|
interface OIDCLoginResponse {
|
||||||
|
auth_url: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface OIDCCallbackResponse {
|
||||||
|
access_token: string;
|
||||||
|
refresh_token: string;
|
||||||
|
user: {
|
||||||
|
id: string;
|
||||||
|
username: string;
|
||||||
|
email: string;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
class OIDCService {
|
||||||
|
private baseUrl = "/api/user/oidc";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initiate OIDC login flow
|
||||||
|
* Returns authorization URL to redirect user to
|
||||||
|
*/
|
||||||
|
async initiateLogin(redirectAfterLogin: string = "/"): Promise<string> {
|
||||||
|
const response = await fetch(
|
||||||
|
`${this.baseUrl}/login?redirect=${encodeURIComponent(redirectAfterLogin)}`,
|
||||||
|
{
|
||||||
|
method: "GET",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error("Failed to initiate OIDC login");
|
||||||
|
}
|
||||||
|
|
||||||
|
const data: OIDCLoginResponse = await response.json();
|
||||||
|
return data.auth_url;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handle OIDC callback
|
||||||
|
* Exchanges authorization code for tokens
|
||||||
|
*/
|
||||||
|
async handleCallback(
|
||||||
|
code: string,
|
||||||
|
state: string
|
||||||
|
): Promise<OIDCCallbackResponse> {
|
||||||
|
const response = await fetch(
|
||||||
|
`${this.baseUrl}/callback?code=${encodeURIComponent(code)}&state=${encodeURIComponent(state)}`,
|
||||||
|
{
|
||||||
|
method: "GET",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error("OIDC callback failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
return await response.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract OIDC callback parameters from URL
|
||||||
|
*/
|
||||||
|
getCallbackParamsFromURL(): { code: string; state: string } | null {
|
||||||
|
const params = new URLSearchParams(window.location.search);
|
||||||
|
const code = params.get("code");
|
||||||
|
const state = params.get("state");
|
||||||
|
|
||||||
|
if (code && state) {
|
||||||
|
return { code, state };
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear callback parameters from URL without reload
|
||||||
|
*/
|
||||||
|
clearCallbackParams(): void {
|
||||||
|
const url = new URL(window.location.href);
|
||||||
|
url.searchParams.delete("code");
|
||||||
|
url.searchParams.delete("state");
|
||||||
|
url.searchParams.delete("error");
|
||||||
|
window.history.replaceState({}, "", url.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const oidcService = new OIDCService();
|
||||||
@@ -4,6 +4,7 @@ interface LoginResponse {
|
|||||||
user: {
|
user: {
|
||||||
id: string;
|
id: string;
|
||||||
username: string;
|
username: string;
|
||||||
|
email?: string;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -55,6 +56,21 @@ class UserService {
|
|||||||
return data.access_token;
|
return data.access_token;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async validateToken(): Promise<boolean> {
|
||||||
|
const refreshToken = localStorage.getItem("refresh_token");
|
||||||
|
|
||||||
|
if (!refreshToken) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
await this.refreshToken();
|
||||||
|
return true;
|
||||||
|
} catch (error) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async fetchWithAuth(
|
async fetchWithAuth(
|
||||||
url: string,
|
url: string,
|
||||||
options: RequestInit = {},
|
options: RequestInit = {},
|
||||||
|
|||||||
BIN
raggr-frontend/src/assets/cat.png
Normal file
BIN
raggr-frontend/src/assets/cat.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 5.8 KiB |
3
raggr-frontend/src/assets/favicon.svg
Normal file
3
raggr-frontend/src/assets/favicon.svg
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
|
||||||
|
<text y="80" font-size="80" font-family="system-ui, -apple-system, sans-serif">🐱</text>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 163 B |
@@ -7,7 +7,7 @@ type AnswerBubbleProps = {
|
|||||||
|
|
||||||
export const AnswerBubble = ({ text, loading }: AnswerBubbleProps) => {
|
export const AnswerBubble = ({ text, loading }: AnswerBubbleProps) => {
|
||||||
return (
|
return (
|
||||||
<div className="rounded-md bg-orange-100 p-3">
|
<div className="rounded-md bg-orange-100 p-3 sm:p-4 w-2/3">
|
||||||
{loading ? (
|
{loading ? (
|
||||||
<div className="flex flex-col w-full animate-pulse gap-2">
|
<div className="flex flex-col w-full animate-pulse gap-2">
|
||||||
<div className="flex flex-row gap-2 w-full">
|
<div className="flex flex-row gap-2 w-full">
|
||||||
@@ -20,8 +20,10 @@ export const AnswerBubble = ({ text, loading }: AnswerBubbleProps) => {
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
) : (
|
) : (
|
||||||
<div className="flex flex-col">
|
<div className=" flex flex-col break-words overflow-wrap-anywhere text-sm sm:text-base [&>*]:break-words">
|
||||||
<ReactMarkdown>{"🐈: " + text}</ReactMarkdown>
|
<ReactMarkdown>
|
||||||
|
{"🐈: " + text}
|
||||||
|
</ReactMarkdown>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -1,7 +1,10 @@
|
|||||||
import { useEffect, useState } from "react";
|
import { useEffect, useState, useRef } from "react";
|
||||||
import { conversationService } from "../api/conversationService";
|
import { conversationService } from "../api/conversationService";
|
||||||
import { QuestionBubble } from "./QuestionBubble";
|
import { QuestionBubble } from "./QuestionBubble";
|
||||||
import { AnswerBubble } from "./AnswerBubble";
|
import { AnswerBubble } from "./AnswerBubble";
|
||||||
|
import { MessageInput } from "./MessageInput";
|
||||||
|
import { ConversationList } from "./ConversationList";
|
||||||
|
import catIcon from "../assets/cat.png";
|
||||||
|
|
||||||
type Message = {
|
type Message = {
|
||||||
text: string;
|
text: string;
|
||||||
@@ -33,13 +36,88 @@ export const ChatScreen = ({ setAuthenticated }: ChatScreenProps) => {
|
|||||||
const [conversations, setConversations] = useState<Conversation[]>([
|
const [conversations, setConversations] = useState<Conversation[]>([
|
||||||
{ title: "simba meow meow", id: "uuid" },
|
{ title: "simba meow meow", id: "uuid" },
|
||||||
]);
|
]);
|
||||||
|
const [showConversations, setShowConversations] = useState<boolean>(false);
|
||||||
|
const [selectedConversation, setSelectedConversation] =
|
||||||
|
useState<Conversation | null>(null);
|
||||||
|
const [sidebarCollapsed, setSidebarCollapsed] = useState<boolean>(false);
|
||||||
|
const [isLoading, setIsLoading] = useState<boolean>(false);
|
||||||
|
|
||||||
|
const messagesEndRef = useRef<HTMLDivElement>(null);
|
||||||
const simbaAnswers = ["meow.", "hiss...", "purrrrrr", "yowOWROWWowowr"];
|
const simbaAnswers = ["meow.", "hiss...", "purrrrrr", "yowOWROWWowowr"];
|
||||||
|
|
||||||
|
const scrollToBottom = () => {
|
||||||
|
messagesEndRef.current?.scrollIntoView({ behavior: "smooth" });
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleSelectConversation = (conversation: Conversation) => {
|
||||||
|
setShowConversations(false);
|
||||||
|
setSelectedConversation(conversation);
|
||||||
|
const loadMessages = async () => {
|
||||||
|
try {
|
||||||
|
const fetchedConversation = await conversationService.getConversation(
|
||||||
|
conversation.id,
|
||||||
|
);
|
||||||
|
setMessages(
|
||||||
|
fetchedConversation.messages.map((message) => ({
|
||||||
|
text: message.text,
|
||||||
|
speaker: message.speaker,
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Failed to load messages:", error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
loadMessages();
|
||||||
|
};
|
||||||
|
|
||||||
|
const loadConversations = async () => {
|
||||||
|
try {
|
||||||
|
const fetchedConversations =
|
||||||
|
await conversationService.getAllConversations();
|
||||||
|
const parsedConversations = fetchedConversations.map((conversation) => ({
|
||||||
|
id: conversation.id,
|
||||||
|
title: conversation.name,
|
||||||
|
}));
|
||||||
|
setConversations(parsedConversations);
|
||||||
|
setSelectedConversation(parsedConversations[0]);
|
||||||
|
console.log(parsedConversations);
|
||||||
|
console.log("JELLYFISH@");
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Failed to load messages:", error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleCreateNewConversation = async () => {
|
||||||
|
const newConversation = await conversationService.createConversation();
|
||||||
|
await loadConversations();
|
||||||
|
setSelectedConversation({
|
||||||
|
title: newConversation.name,
|
||||||
|
id: newConversation.id,
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
loadConversations();
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
scrollToBottom();
|
||||||
|
}, [messages]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
const loadMessages = async () => {
|
const loadMessages = async () => {
|
||||||
|
console.log(selectedConversation);
|
||||||
|
console.log("JELLYFISH");
|
||||||
|
if (selectedConversation == null) return;
|
||||||
try {
|
try {
|
||||||
const conversation = await conversationService.getMessages();
|
const conversation = await conversationService.getConversation(
|
||||||
|
selectedConversation.id,
|
||||||
|
);
|
||||||
|
// Update the conversation title in case it changed
|
||||||
|
setSelectedConversation({
|
||||||
|
id: conversation.id,
|
||||||
|
title: conversation.name,
|
||||||
|
});
|
||||||
setMessages(
|
setMessages(
|
||||||
conversation.messages.map((message) => ({
|
conversation.messages.map((message) => ({
|
||||||
text: message.text,
|
text: message.text,
|
||||||
@@ -51,11 +129,15 @@ export const ChatScreen = ({ setAuthenticated }: ChatScreenProps) => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
loadMessages();
|
loadMessages();
|
||||||
}, []);
|
}, [selectedConversation?.id]);
|
||||||
|
|
||||||
const handleQuestionSubmit = async () => {
|
const handleQuestionSubmit = async () => {
|
||||||
|
if (!query.trim() || isLoading) return; // Don't submit empty messages or while loading
|
||||||
|
|
||||||
const currMessages = messages.concat([{ text: query, speaker: "user" }]);
|
const currMessages = messages.concat([{ text: query, speaker: "user" }]);
|
||||||
setMessages(currMessages);
|
setMessages(currMessages);
|
||||||
|
setQuery(""); // Clear input immediately after submission
|
||||||
|
setIsLoading(true);
|
||||||
|
|
||||||
if (simbaMode) {
|
if (simbaMode) {
|
||||||
console.log("simba mode activated");
|
console.log("simba mode activated");
|
||||||
@@ -70,24 +152,29 @@ export const ChatScreen = ({ setAuthenticated }: ChatScreenProps) => {
|
|||||||
},
|
},
|
||||||
]),
|
]),
|
||||||
);
|
);
|
||||||
|
setIsLoading(false);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const result = await conversationService.sendQuery(query);
|
const result = await conversationService.sendQuery(
|
||||||
|
query,
|
||||||
|
selectedConversation.id,
|
||||||
|
);
|
||||||
setQuestionsAnswers(
|
setQuestionsAnswers(
|
||||||
questionsAnswers.concat([{ question: query, answer: result.response }]),
|
questionsAnswers.concat([{ question: query, answer: result.response }]),
|
||||||
);
|
);
|
||||||
setMessages(
|
setMessages(
|
||||||
currMessages.concat([{ text: result.response, speaker: "simba" }]),
|
currMessages.concat([{ text: result.response, speaker: "simba" }]),
|
||||||
);
|
);
|
||||||
setQuery(""); // Clear input after successful send
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Failed to send query:", error);
|
console.error("Failed to send query:", error);
|
||||||
// If session expired, redirect to login
|
// If session expired, redirect to login
|
||||||
if (error instanceof Error && error.message.includes("Session expired")) {
|
if (error instanceof Error && error.message.includes("Session expired")) {
|
||||||
setAuthenticated(false);
|
setAuthenticated(false);
|
||||||
}
|
}
|
||||||
|
} finally {
|
||||||
|
setIsLoading(false);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -95,56 +182,129 @@ export const ChatScreen = ({ setAuthenticated }: ChatScreenProps) => {
|
|||||||
setQuery(event.target.value);
|
setQuery(event.target.value);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const handleKeyDown = (event: React.KeyboardEvent<HTMLTextAreaElement>) => {
|
||||||
|
// Submit on Enter, but allow Shift+Enter for new line
|
||||||
|
if (event.key === "Enter" && !event.shiftKey) {
|
||||||
|
event.preventDefault();
|
||||||
|
handleQuestionSubmit();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="h-screen bg-opacity-20">
|
<div className="h-screen flex flex-row bg-[#F9F5EB]">
|
||||||
<div className="bg-white/85 h-screen">
|
{/* Sidebar - Expanded */}
|
||||||
<div className="flex flex-row justify-center py-4">
|
<aside
|
||||||
<div className="flex flex-col gap-4 min-w-xl max-w-xl">
|
className={`hidden md:flex md:flex-col bg-[#F9F5EB] border-r border-gray-200 p-4 overflow-y-auto transition-all duration-300 ${sidebarCollapsed ? "w-20" : "w-64"}`}
|
||||||
<div className="flex flex-row justify-between">
|
>
|
||||||
<header className="flex flex-row justify-center gap-2 grow sticky top-0 z-10 bg-white">
|
{!sidebarCollapsed ? (
|
||||||
<h1 className="text-3xl">ask simba!</h1>
|
<div className="bg-[#F9F5EB]">
|
||||||
</header>
|
<div className="flex flex-row items-center gap-2 mb-6">
|
||||||
|
<img
|
||||||
|
src={catIcon}
|
||||||
|
alt="Simba"
|
||||||
|
className="cursor-pointer hover:opacity-80"
|
||||||
|
onClick={() => setSidebarCollapsed(true)}
|
||||||
|
/>
|
||||||
|
<h2 className="text-3xl bg-[#F9F5EB] font-semibold">asksimba!</h2>
|
||||||
|
</div>
|
||||||
|
<ConversationList
|
||||||
|
conversations={conversations}
|
||||||
|
onCreateNewConversation={handleCreateNewConversation}
|
||||||
|
onSelectConversation={handleSelectConversation}
|
||||||
|
/>
|
||||||
|
<div className="mt-auto pt-4">
|
||||||
<button
|
<button
|
||||||
className="p-4 border border-red-400 bg-red-200 hover:bg-red-400 cursor-pointer rounded-md"
|
className="w-full p-2 border border-red-400 bg-red-200 hover:bg-red-400 cursor-pointer rounded-md text-sm"
|
||||||
onClick={() => setAuthenticated(false)}
|
onClick={() => setAuthenticated(false)}
|
||||||
>
|
>
|
||||||
logout
|
logout
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<div className="flex flex-col items-center gap-4">
|
||||||
|
<img
|
||||||
|
src={catIcon}
|
||||||
|
alt="Simba"
|
||||||
|
className="cursor-pointer hover:opacity-80"
|
||||||
|
onClick={() => setSidebarCollapsed(false)}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</aside>
|
||||||
|
|
||||||
|
{/* Main chat area */}
|
||||||
|
<div className="flex-1 flex flex-col h-screen overflow-hidden">
|
||||||
|
{/* Mobile header */}
|
||||||
|
<header className="md:hidden flex flex-row justify-between items-center gap-3 p-4 border-b border-gray-200 bg-white">
|
||||||
|
<div className="flex flex-row items-center gap-2">
|
||||||
|
<img src={catIcon} alt="Simba" className="w-10 h-10" />
|
||||||
|
<h1 className="text-xl">asksimba!</h1>
|
||||||
|
</div>
|
||||||
|
<div className="flex flex-row gap-2">
|
||||||
|
<button
|
||||||
|
className="p-2 border border-green-400 bg-green-200 hover:bg-green-400 cursor-pointer rounded-md text-sm"
|
||||||
|
onClick={() => setShowConversations(!showConversations)}
|
||||||
|
>
|
||||||
|
{showConversations ? "hide" : "show"}
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
className="p-2 border border-red-400 bg-red-200 hover:bg-red-400 cursor-pointer rounded-md text-sm"
|
||||||
|
onClick={() => setAuthenticated(false)}
|
||||||
|
>
|
||||||
|
logout
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
{/* Messages area */}
|
||||||
|
{selectedConversation && (
|
||||||
|
<div className="sticky top-0 mx-auto w-full">
|
||||||
|
<div className="bg-[#F9F5EB] text-black px-6 w-full py-3">
|
||||||
|
<h2 className="text-lg font-semibold">
|
||||||
|
{selectedConversation.title || "Untitled Conversation"}
|
||||||
|
</h2>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
<div className="flex-1 overflow-y-auto relative px-4 py-6">
|
||||||
|
{/* Floating conversation name */}
|
||||||
|
|
||||||
|
<div className="max-w-2xl mx-auto flex flex-col gap-4">
|
||||||
|
{showConversations && (
|
||||||
|
<div className="md:hidden">
|
||||||
|
<ConversationList
|
||||||
|
conversations={conversations}
|
||||||
|
onCreateNewConversation={handleCreateNewConversation}
|
||||||
|
onSelectConversation={handleSelectConversation}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
{messages.map((msg, index) => {
|
{messages.map((msg, index) => {
|
||||||
if (msg.speaker === "simba") {
|
if (msg.speaker === "simba") {
|
||||||
return <AnswerBubble key={index} text={msg.text} />;
|
return <AnswerBubble key={index} text={msg.text} />;
|
||||||
}
|
}
|
||||||
return <QuestionBubble key={index} text={msg.text} />;
|
return <QuestionBubble key={index} text={msg.text} />;
|
||||||
})}
|
})}
|
||||||
<footer className="flex flex-col gap-2 sticky bottom-0">
|
{isLoading && <AnswerBubble text="" loading={true} />}
|
||||||
<div className="flex flex-row justify-between gap-2 grow">
|
<div ref={messagesEndRef} />
|
||||||
<textarea
|
|
||||||
className="p-4 border border-blue-200 rounded-md grow bg-white"
|
|
||||||
onChange={handleQueryChange}
|
|
||||||
value={query}
|
|
||||||
/>
|
|
||||||
</div>
|
</div>
|
||||||
<div className="flex flex-row justify-between gap-2 grow">
|
|
||||||
<button
|
|
||||||
className="p-4 border border-blue-400 bg-blue-200 hover:bg-blue-400 cursor-pointer rounded-md flex-grow"
|
|
||||||
onClick={() => handleQuestionSubmit()}
|
|
||||||
type="submit"
|
|
||||||
>
|
|
||||||
Submit
|
|
||||||
</button>
|
|
||||||
</div>
|
</div>
|
||||||
<div className="flex flex-row justify-center gap-2 grow">
|
|
||||||
<input
|
{/* Input area */}
|
||||||
type="checkbox"
|
<footer className="p-4 bg-[#F9F5EB]">
|
||||||
onChange={(event) => setSimbaMode(event.target.checked)}
|
<div className="max-w-2xl mx-auto">
|
||||||
|
<MessageInput
|
||||||
|
query={query}
|
||||||
|
handleQueryChange={handleQueryChange}
|
||||||
|
handleKeyDown={handleKeyDown}
|
||||||
|
handleQuestionSubmit={handleQuestionSubmit}
|
||||||
|
setSimbaMode={setSimbaMode}
|
||||||
|
isLoading={isLoading}
|
||||||
/>
|
/>
|
||||||
<p>simba mode?</p>
|
|
||||||
</div>
|
</div>
|
||||||
</footer>
|
</footer>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|||||||
69
raggr-frontend/src/components/ConversationList.tsx
Normal file
69
raggr-frontend/src/components/ConversationList.tsx
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
import { useState, useEffect } from "react";
|
||||||
|
|
||||||
|
import { conversationService } from "../api/conversationService";
|
||||||
|
type Conversation = {
|
||||||
|
title: string;
|
||||||
|
id: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
type ConversationProps = {
|
||||||
|
conversations: Conversation[];
|
||||||
|
onSelectConversation: (conversation: Conversation) => void;
|
||||||
|
onCreateNewConversation: () => void;
|
||||||
|
};
|
||||||
|
|
||||||
|
export const ConversationList = ({
|
||||||
|
conversations,
|
||||||
|
onSelectConversation,
|
||||||
|
onCreateNewConversation,
|
||||||
|
}: ConversationProps) => {
|
||||||
|
const [conservations, setConversations] = useState(conversations);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const loadConversations = async () => {
|
||||||
|
try {
|
||||||
|
let fetchedConversations =
|
||||||
|
await conversationService.getAllConversations();
|
||||||
|
|
||||||
|
if (conversations.length == 0) {
|
||||||
|
await conversationService.createConversation();
|
||||||
|
fetchedConversations =
|
||||||
|
await conversationService.getAllConversations();
|
||||||
|
}
|
||||||
|
setConversations(
|
||||||
|
fetchedConversations.map((conversation) => ({
|
||||||
|
id: conversation.id,
|
||||||
|
title: conversation.name,
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Failed to load messages:", error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
loadConversations();
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="bg-indigo-300 rounded-md p-3 sm:p-4 flex flex-col gap-1">
|
||||||
|
{conservations.map((conversation) => {
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
key={conversation.id}
|
||||||
|
className="border-blue-400 bg-indigo-300 hover:bg-indigo-200 cursor-pointer rounded-md p-3 min-h-[44px] flex items-center"
|
||||||
|
onClick={() => onSelectConversation(conversation)}
|
||||||
|
>
|
||||||
|
<p className="text-sm sm:text-base truncate w-full">
|
||||||
|
{conversation.title}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
<div
|
||||||
|
className="border-blue-400 bg-indigo-300 hover:bg-indigo-200 cursor-pointer rounded-md p-3 min-h-[44px] flex items-center"
|
||||||
|
onClick={() => onCreateNewConversation()}
|
||||||
|
>
|
||||||
|
<p className="text-sm sm:text-base"> + Start a new thread</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
@@ -1,76 +1,126 @@
|
|||||||
import { useState } from "react";
|
import { useState, useEffect } from "react";
|
||||||
import { userService } from "../api/userService";
|
import { userService } from "../api/userService";
|
||||||
|
import { oidcService } from "../api/oidcService";
|
||||||
|
|
||||||
type LoginScreenProps = {
|
type LoginScreenProps = {
|
||||||
setAuthenticated: (isAuth: boolean) => void;
|
setAuthenticated: (isAuth: boolean) => void;
|
||||||
};
|
};
|
||||||
|
|
||||||
export const LoginScreen = ({ setAuthenticated }: LoginScreenProps) => {
|
export const LoginScreen = ({ setAuthenticated }: LoginScreenProps) => {
|
||||||
const [username, setUsername] = useState<string>("");
|
|
||||||
const [password, setPassword] = useState<string>("");
|
|
||||||
const [error, setError] = useState<string>("");
|
const [error, setError] = useState<string>("");
|
||||||
|
const [isChecking, setIsChecking] = useState<boolean>(true);
|
||||||
|
const [isLoggingIn, setIsLoggingIn] = useState<boolean>(false);
|
||||||
|
|
||||||
const handleLogin = async () => {
|
useEffect(() => {
|
||||||
if (!username || !password) {
|
const initAuth = async () => {
|
||||||
setError("Please enter username and password");
|
// First, check for OIDC callback parameters
|
||||||
return;
|
const callbackParams = oidcService.getCallbackParamsFromURL();
|
||||||
}
|
|
||||||
|
|
||||||
|
if (callbackParams) {
|
||||||
|
// Handle OIDC callback
|
||||||
try {
|
try {
|
||||||
const result = await userService.login(username, password);
|
setIsLoggingIn(true);
|
||||||
|
const result = await oidcService.handleCallback(
|
||||||
|
callbackParams.code,
|
||||||
|
callbackParams.state
|
||||||
|
);
|
||||||
|
|
||||||
|
// Store tokens
|
||||||
localStorage.setItem("access_token", result.access_token);
|
localStorage.setItem("access_token", result.access_token);
|
||||||
localStorage.setItem("refresh_token", result.refresh_token);
|
localStorage.setItem("refresh_token", result.refresh_token);
|
||||||
|
|
||||||
|
// Clear URL parameters
|
||||||
|
oidcService.clearCallbackParams();
|
||||||
|
|
||||||
setAuthenticated(true);
|
setAuthenticated(true);
|
||||||
setError("");
|
setIsChecking(false);
|
||||||
|
return;
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
setError("Login failed. Please check your credentials.");
|
console.error("OIDC callback error:", err);
|
||||||
console.error("Login error:", err);
|
setError("Login failed. Please try again.");
|
||||||
|
oidcService.clearCallbackParams();
|
||||||
|
setIsLoggingIn(false);
|
||||||
|
setIsChecking(false);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if user is already authenticated
|
||||||
|
const isValid = await userService.validateToken();
|
||||||
|
if (isValid) {
|
||||||
|
setAuthenticated(true);
|
||||||
|
}
|
||||||
|
setIsChecking(false);
|
||||||
|
};
|
||||||
|
|
||||||
|
initAuth();
|
||||||
|
}, [setAuthenticated]);
|
||||||
|
|
||||||
|
const handleOIDCLogin = async () => {
|
||||||
|
try {
|
||||||
|
setIsLoggingIn(true);
|
||||||
|
setError("");
|
||||||
|
|
||||||
|
// Get authorization URL from backend
|
||||||
|
const authUrl = await oidcService.initiateLogin();
|
||||||
|
|
||||||
|
// Redirect to Authelia
|
||||||
|
window.location.href = authUrl;
|
||||||
|
} catch (err) {
|
||||||
|
setError("Failed to initiate login. Please try again.");
|
||||||
|
console.error("OIDC login error:", err);
|
||||||
|
setIsLoggingIn(false);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Show loading state while checking authentication or processing callback
|
||||||
|
if (isChecking || isLoggingIn) {
|
||||||
|
return (
|
||||||
|
<div className="h-screen bg-opacity-20">
|
||||||
|
<div className="bg-white/85 h-screen flex items-center justify-center">
|
||||||
|
<div className="text-center">
|
||||||
|
<p className="text-lg sm:text-xl">
|
||||||
|
{isLoggingIn ? "Logging in..." : "Checking authentication..."}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="h-screen bg-opacity-20">
|
<div className="h-screen bg-opacity-20">
|
||||||
<div className="bg-white/85 h-screen">
|
<div className="bg-white/85 h-screen">
|
||||||
<div className="flex flex-row justify-center py-4">
|
<div className="flex flex-row justify-center py-4">
|
||||||
<div className="flex flex-col gap-4 min-w-xl max-w-xl">
|
<div className="flex flex-col gap-4 w-full px-4 sm:w-11/12 sm:max-w-2xl lg:max-w-4xl sm:px-0">
|
||||||
<div className="flex flex-col gap-1">
|
<div className="flex flex-col gap-4">
|
||||||
<div className="flex flex-grow justify-center w-full bg-amber-400">
|
<div className="flex flex-grow justify-center w-full bg-amber-400 p-2">
|
||||||
<h1 className="text-xl font-bold">
|
<h1 className="text-base sm:text-xl font-bold text-center">
|
||||||
I AM LOOKING FOR A DESIGNER. THIS APP WILL REMAIN UGLY UNTIL A
|
I AM LOOKING FOR A DESIGNER. THIS APP WILL REMAIN UGLY UNTIL A
|
||||||
DESIGNER COMES.
|
DESIGNER COMES.
|
||||||
</h1>
|
</h1>
|
||||||
</div>
|
</div>
|
||||||
<header className="flex flex-row justify-center gap-2 grow sticky top-0 z-10 bg-white">
|
<header className="flex flex-row justify-center gap-2 grow sticky top-0 z-10 bg-white">
|
||||||
<h1 className="text-3xl">ask simba!</h1>
|
<h1 className="text-2xl sm:text-3xl">ask simba!</h1>
|
||||||
</header>
|
</header>
|
||||||
<label htmlFor="username">username</label>
|
|
||||||
<input
|
|
||||||
type="text"
|
|
||||||
id="username"
|
|
||||||
name="username"
|
|
||||||
value={username}
|
|
||||||
onChange={(e) => setUsername(e.target.value)}
|
|
||||||
className="border border-s-slate-950 p-3 rounded-md"
|
|
||||||
/>
|
|
||||||
<label htmlFor="password">password</label>
|
|
||||||
<input
|
|
||||||
type="password"
|
|
||||||
id="password"
|
|
||||||
name="password"
|
|
||||||
value={password}
|
|
||||||
onChange={(e) => setPassword(e.target.value)}
|
|
||||||
className="border border-s-slate-950 p-3 rounded-md"
|
|
||||||
/>
|
|
||||||
{error && (
|
{error && (
|
||||||
<div className="text-red-600 font-semibold">{error}</div>
|
<div className="text-red-600 font-semibold text-sm sm:text-base bg-red-50 p-3 rounded-md">
|
||||||
|
{error}
|
||||||
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
<div className="text-center text-sm sm:text-base text-gray-600 py-2">
|
||||||
|
Click below to login with Authelia
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<button
|
<button
|
||||||
className="p-4 border border-blue-400 bg-blue-200 hover:bg-blue-400 cursor-pointer rounded-md flex-grow"
|
className="p-3 sm:p-4 min-h-[44px] border border-blue-400 bg-blue-200 hover:bg-blue-400 cursor-pointer rounded-md flex-grow text-sm sm:text-base font-semibold"
|
||||||
onClick={handleLogin}
|
onClick={handleOIDCLogin}
|
||||||
|
disabled={isLoggingIn}
|
||||||
>
|
>
|
||||||
login
|
{isLoggingIn ? "Redirecting..." : "Login with Authelia"}
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
56
raggr-frontend/src/components/MessageInput.tsx
Normal file
56
raggr-frontend/src/components/MessageInput.tsx
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
import { useEffect, useState, useRef } from "react";
|
||||||
|
|
||||||
|
type MessageInputProps = {
|
||||||
|
handleQueryChange: (event: React.ChangeEvent<HTMLTextAreaElement>) => void;
|
||||||
|
handleKeyDown: (event: React.ChangeEvent<HTMLTextAreaElement>) => void;
|
||||||
|
handleQuestionSubmit: () => void;
|
||||||
|
setSimbaMode: (sdf: boolean) => void;
|
||||||
|
query: string;
|
||||||
|
isLoading: boolean;
|
||||||
|
};
|
||||||
|
|
||||||
|
export const MessageInput = ({
|
||||||
|
query,
|
||||||
|
handleKeyDown,
|
||||||
|
handleQueryChange,
|
||||||
|
handleQuestionSubmit,
|
||||||
|
setSimbaMode,
|
||||||
|
isLoading,
|
||||||
|
}: MessageInputProps) => {
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col gap-4 sticky bottom-0 bg-[#3D763A] p-6 rounded-xl">
|
||||||
|
<div className="flex flex-row justify-between grow">
|
||||||
|
<textarea
|
||||||
|
className="p-3 sm:p-4 border border-blue-200 rounded-md grow bg-[#F9F5EB] min-h-[44px] resize-y"
|
||||||
|
onChange={handleQueryChange}
|
||||||
|
onKeyDown={handleKeyDown}
|
||||||
|
value={query}
|
||||||
|
rows={2}
|
||||||
|
placeholder="Type your message... (Press Enter to send, Shift+Enter for new line)"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div className="flex flex-row justify-between gap-2 grow">
|
||||||
|
<button
|
||||||
|
className={`p-3 sm:p-4 min-h-[44px] border border-blue-400 rounded-md flex-grow text-sm sm:text-base ${
|
||||||
|
isLoading
|
||||||
|
? "bg-gray-400 cursor-not-allowed opacity-50"
|
||||||
|
: "bg-[#EDA541] hover:bg-blue-400 cursor-pointer"
|
||||||
|
}`}
|
||||||
|
onClick={() => handleQuestionSubmit()}
|
||||||
|
type="submit"
|
||||||
|
disabled={isLoading}
|
||||||
|
>
|
||||||
|
{isLoading ? "Sending..." : "Submit"}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<div className="flex flex-row justify-center gap-2 grow items-center">
|
||||||
|
<input
|
||||||
|
type="checkbox"
|
||||||
|
onChange={(event) => setSimbaMode(event.target.checked)}
|
||||||
|
className="w-5 h-5 cursor-pointer"
|
||||||
|
/>
|
||||||
|
<p className="text-sm sm:text-base">simba mode?</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
@@ -3,5 +3,9 @@ type QuestionBubbleProps = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
export const QuestionBubble = ({ text }: QuestionBubbleProps) => {
|
export const QuestionBubble = ({ text }: QuestionBubbleProps) => {
|
||||||
return <div className="rounded-md bg-stone-200 p-3">🤦: {text}</div>;
|
return (
|
||||||
|
<div className="w-2/3 rounded-md bg-stone-200 p-3 sm:p-4 break-words overflow-wrap-anywhere text-sm sm:text-base ml-auto">
|
||||||
|
🤦: {text}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
};
|
};
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
0
scripts/__init__.py
Normal file
0
scripts/__init__.py
Normal file
@@ -1,16 +1,27 @@
|
|||||||
# GENERATED BY CLAUDE
|
# GENERATED BY CLAUDE
|
||||||
|
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
import uuid
|
import uuid
|
||||||
import asyncio
|
import asyncio
|
||||||
from tortoise import Tortoise
|
from tortoise import Tortoise
|
||||||
from blueprints.users.models import User
|
from blueprints.users.models import User
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Database configuration with environment variable support
|
||||||
|
DATABASE_PATH = os.getenv("DATABASE_PATH", "database/raggr.db")
|
||||||
|
DATABASE_URL = os.getenv("DATABASE_URL", f"sqlite://{DATABASE_PATH}")
|
||||||
|
|
||||||
|
print(DATABASE_URL)
|
||||||
|
|
||||||
|
|
||||||
async def add_user(username: str, email: str, password: str):
|
async def add_user(username: str, email: str, password: str):
|
||||||
"""Add a new user to the database"""
|
"""Add a new user to the database"""
|
||||||
await Tortoise.init(
|
await Tortoise.init(
|
||||||
db_url="sqlite://raggr.db",
|
db_url=DATABASE_URL,
|
||||||
modules={
|
modules={
|
||||||
"models": [
|
"models": [
|
||||||
"blueprints.users.models",
|
"blueprints.users.models",
|
||||||
@@ -56,7 +67,7 @@ async def add_user(username: str, email: str, password: str):
|
|||||||
async def list_users():
|
async def list_users():
|
||||||
"""List all users in the database"""
|
"""List all users in the database"""
|
||||||
await Tortoise.init(
|
await Tortoise.init(
|
||||||
db_url="sqlite://raggr.db",
|
db_url=DATABASE_URL,
|
||||||
modules={
|
modules={
|
||||||
"models": [
|
"models": [
|
||||||
"blueprints.users.models",
|
"blueprints.users.models",
|
||||||
@@ -94,6 +105,11 @@ def print_usage():
|
|||||||
print("\nExamples:")
|
print("\nExamples:")
|
||||||
print(" python add_user.py add ryan ryan@example.com mypassword123")
|
print(" python add_user.py add ryan ryan@example.com mypassword123")
|
||||||
print(" python add_user.py list")
|
print(" python add_user.py list")
|
||||||
|
print("\nEnvironment Variables:")
|
||||||
|
print(" DATABASE_PATH - Path to database file (default: database/raggr.db)")
|
||||||
|
print(" DATABASE_URL - Full database URL (overrides DATABASE_PATH)")
|
||||||
|
print("\n Example with custom database:")
|
||||||
|
print(" DATABASE_PATH=dev.db python add_user.py list")
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
@@ -1,18 +1,21 @@
|
|||||||
import httpx
|
|
||||||
import os
|
|
||||||
from pathlib import Path
|
|
||||||
import logging
|
import logging
|
||||||
import tempfile
|
import os
|
||||||
|
|
||||||
from image_process import describe_simba_image
|
|
||||||
from request import PaperlessNGXService
|
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Add parent directory to path for imports
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from utils.image_process import describe_simba_image
|
||||||
|
from utils.request import PaperlessNGXService
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
# Configuration from environment variables
|
# Configuration from environment variables
|
||||||
@@ -27,7 +30,7 @@ headers = {"x-api-key": API_KEY, "Content-Type": "application/json"}
|
|||||||
VISITED = {}
|
VISITED = {}
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
conn = sqlite3.connect("./visited.db")
|
conn = sqlite3.connect("./database/visited.db")
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
c.execute("select immich_id from visited")
|
c.execute("select immich_id from visited")
|
||||||
rows = c.fetchall()
|
rows = c.fetchall()
|
||||||
@@ -89,7 +92,7 @@ if __name__ == "__main__":
|
|||||||
image_date = description.image_date
|
image_date = description.image_date
|
||||||
|
|
||||||
description_filepath = os.path.join(
|
description_filepath = os.path.join(
|
||||||
"/Users/ryanchen/Programs/raggr", f"SIMBA_DESCRIBE_001.txt"
|
"/Users/ryanchen/Programs/raggr", "SIMBA_DESCRIBE_001.txt"
|
||||||
)
|
)
|
||||||
file = open(description_filepath, "w+")
|
file = open(description_filepath, "w+")
|
||||||
file.write(image_description)
|
file.write(image_description)
|
||||||
92
scripts/inspect_vector_store.py
Normal file
92
scripts/inspect_vector_store.py
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""CLI tool to inspect the vector store contents."""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
from blueprints.rag.logic import (
|
||||||
|
get_vector_store_stats,
|
||||||
|
index_documents,
|
||||||
|
list_all_documents,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Load .env from the root directory
|
||||||
|
root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
|
||||||
|
env_path = os.path.join(root_dir, ".env")
|
||||||
|
load_dotenv(env_path)
|
||||||
|
|
||||||
|
|
||||||
|
def print_stats():
|
||||||
|
"""Print vector store statistics."""
|
||||||
|
stats = get_vector_store_stats()
|
||||||
|
print("=== Vector Store Statistics ===")
|
||||||
|
print(f"Collection Name: {stats['collection_name']}")
|
||||||
|
print(f"Total Documents: {stats['total_documents']}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
def print_documents(limit: int = 10, show_content: bool = False):
|
||||||
|
"""Print documents in the vector store."""
|
||||||
|
docs = list_all_documents(limit=limit)
|
||||||
|
print(f"=== Documents (showing {len(docs)} of {limit} requested) ===\n")
|
||||||
|
|
||||||
|
for i, doc in enumerate(docs, 1):
|
||||||
|
print(f"Document {i}:")
|
||||||
|
print(f" ID: {doc['id']}")
|
||||||
|
print(f" Metadata: {doc['metadata']}")
|
||||||
|
if show_content:
|
||||||
|
print(f" Content Preview: {doc['content_preview']}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
async def run_index():
|
||||||
|
"""Run the indexing process."""
|
||||||
|
print("Starting indexing process...")
|
||||||
|
await index_documents()
|
||||||
|
print("Indexing complete!")
|
||||||
|
print_stats()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="Inspect the vector store contents")
|
||||||
|
parser.add_argument(
|
||||||
|
"--stats", action="store_true", help="Show vector store statistics"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--list", type=int, metavar="N", help="List N documents from the vector store"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--show-content",
|
||||||
|
action="store_true",
|
||||||
|
help="Show content preview when listing documents",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--index",
|
||||||
|
action="store_true",
|
||||||
|
help="Index documents from Paperless-NGX into the vector store",
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Handle indexing first if requested
|
||||||
|
if args.index:
|
||||||
|
asyncio.run(run_index())
|
||||||
|
return
|
||||||
|
|
||||||
|
# If no arguments provided, show stats by default
|
||||||
|
if not any([args.stats, args.list]):
|
||||||
|
args.stats = True
|
||||||
|
|
||||||
|
if args.stats:
|
||||||
|
print_stats()
|
||||||
|
|
||||||
|
if args.list:
|
||||||
|
print_documents(limit=args.list, show_content=args.show_content)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
121
scripts/manage_vectorstore.py
Normal file
121
scripts/manage_vectorstore.py
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Management script for vector store operations."""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from blueprints.rag.logic import (
|
||||||
|
get_vector_store_stats,
|
||||||
|
index_documents,
|
||||||
|
list_all_documents,
|
||||||
|
vector_store,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def stats():
|
||||||
|
"""Show vector store statistics."""
|
||||||
|
stats = get_vector_store_stats()
|
||||||
|
print("=== Vector Store Statistics ===")
|
||||||
|
print(f"Collection: {stats['collection_name']}")
|
||||||
|
print(f"Total Documents: {stats['total_documents']}")
|
||||||
|
|
||||||
|
|
||||||
|
async def index():
|
||||||
|
"""Index documents from Paperless-NGX."""
|
||||||
|
print("Starting indexing process...")
|
||||||
|
print("Fetching documents from Paperless-NGX...")
|
||||||
|
await index_documents()
|
||||||
|
print("✓ Indexing complete!")
|
||||||
|
stats()
|
||||||
|
|
||||||
|
|
||||||
|
async def reindex():
|
||||||
|
"""Clear and reindex all documents."""
|
||||||
|
print("Clearing existing documents...")
|
||||||
|
collection = vector_store._collection
|
||||||
|
all_docs = collection.get()
|
||||||
|
|
||||||
|
if all_docs["ids"]:
|
||||||
|
print(f"Deleting {len(all_docs['ids'])} existing documents...")
|
||||||
|
collection.delete(ids=all_docs["ids"])
|
||||||
|
print("✓ Cleared")
|
||||||
|
else:
|
||||||
|
print("Collection is already empty")
|
||||||
|
|
||||||
|
await index()
|
||||||
|
|
||||||
|
|
||||||
|
def list_docs(limit: int = 10, show_content: bool = False):
|
||||||
|
"""List documents in the vector store."""
|
||||||
|
docs = list_all_documents(limit=limit)
|
||||||
|
print(f"\n=== Documents (showing {len(docs)}) ===\n")
|
||||||
|
|
||||||
|
for i, doc in enumerate(docs, 1):
|
||||||
|
print(f"Document {i}:")
|
||||||
|
print(f" ID: {doc['id']}")
|
||||||
|
print(f" Metadata: {doc['metadata']}")
|
||||||
|
if show_content:
|
||||||
|
print(f" Content: {doc['content_preview']}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Manage vector store for RAG system",
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog="""
|
||||||
|
Examples:
|
||||||
|
%(prog)s stats # Show vector store statistics
|
||||||
|
%(prog)s index # Index new documents from Paperless-NGX
|
||||||
|
%(prog)s reindex # Clear and reindex all documents
|
||||||
|
%(prog)s list 10 # List first 10 documents
|
||||||
|
%(prog)s list 20 --show-content # List 20 documents with content preview
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
|
||||||
|
subparsers = parser.add_subparsers(dest="command", help="Command to execute")
|
||||||
|
|
||||||
|
# Stats command
|
||||||
|
subparsers.add_parser("stats", help="Show vector store statistics")
|
||||||
|
|
||||||
|
# Index command
|
||||||
|
subparsers.add_parser("index", help="Index documents from Paperless-NGX")
|
||||||
|
|
||||||
|
# Reindex command
|
||||||
|
subparsers.add_parser("reindex", help="Clear and reindex all documents")
|
||||||
|
|
||||||
|
# List command
|
||||||
|
list_parser = subparsers.add_parser("list", help="List documents in vector store")
|
||||||
|
list_parser.add_argument(
|
||||||
|
"limit", type=int, default=10, nargs="?", help="Number of documents to list"
|
||||||
|
)
|
||||||
|
list_parser.add_argument(
|
||||||
|
"--show-content", action="store_true", help="Show content preview"
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if not args.command:
|
||||||
|
parser.print_help()
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if args.command == "stats":
|
||||||
|
stats()
|
||||||
|
elif args.command == "index":
|
||||||
|
asyncio.run(index())
|
||||||
|
elif args.command == "reindex":
|
||||||
|
asyncio.run(reindex())
|
||||||
|
elif args.command == "list":
|
||||||
|
list_docs(limit=args.limit, show_content=args.show_content)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\n\nOperation cancelled by user")
|
||||||
|
sys.exit(1)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\n❌ Error: {e}", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -49,11 +49,20 @@ DOCTYPE_OPTIONS = [
|
|||||||
"Letter",
|
"Letter",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
QUERY_TYPE_OPTIONS = [
|
||||||
|
"Simba",
|
||||||
|
"Other",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class DocumentType(BaseModel):
|
class DocumentType(BaseModel):
|
||||||
type: list[str] = Field(description="type of document", enum=DOCTYPE_OPTIONS)
|
type: list[str] = Field(description="type of document", enum=DOCTYPE_OPTIONS)
|
||||||
|
|
||||||
|
|
||||||
|
class QueryType(BaseModel):
|
||||||
|
type: str = Field(desciption="type of query", enum=QUERY_TYPE_OPTIONS)
|
||||||
|
|
||||||
|
|
||||||
PROMPT = """
|
PROMPT = """
|
||||||
You are an information specialist that processes user queries. The current year is 2025. The user queries are all about
|
You are an information specialist that processes user queries. The current year is 2025. The user queries are all about
|
||||||
a cat, Simba, and its records. The types of records are listed below. Using the query, extract the
|
a cat, Simba, and its records. The types of records are listed below. Using the query, extract the
|
||||||
@@ -111,6 +120,27 @@ Query: "Who does Simba know?"
|
|||||||
Tags: ["Letter", "Documentation"]
|
Tags: ["Letter", "Documentation"]
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
QUERY_TYPE_PROMPT = f"""You are an information specialist that processes user queries.
|
||||||
|
A query can have one tag attached from the following options. Based on the query and the transcript which is listed below, determine
|
||||||
|
which of the following options is most appropriate: {",".join(QUERY_TYPE_OPTIONS)}
|
||||||
|
|
||||||
|
### Example 1
|
||||||
|
Query: "Who is Simba's current vet?"
|
||||||
|
Tags: ["Simba"]
|
||||||
|
|
||||||
|
|
||||||
|
### Example 2
|
||||||
|
Query: "What is the capital of Tokyo?"
|
||||||
|
Tags: ["Other"]
|
||||||
|
|
||||||
|
|
||||||
|
### Example 3
|
||||||
|
Query: "Can you help me write an email?"
|
||||||
|
Tags: ["Other"]
|
||||||
|
|
||||||
|
TRANSCRIPT:
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
class QueryGenerator:
|
class QueryGenerator:
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
@@ -154,6 +184,33 @@ class QueryGenerator:
|
|||||||
metadata_query = {"document_type": {"$in": type_data["type"]}}
|
metadata_query = {"document_type": {"$in": type_data["type"]}}
|
||||||
return metadata_query
|
return metadata_query
|
||||||
|
|
||||||
|
def get_query_type(self, input: str, transcript: str):
|
||||||
|
client = OpenAI()
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are an information specialist that is really good at deciding what tags a query should have",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f"{QUERY_TYPE_PROMPT}\nTRANSCRIPT:\n{transcript}\nQUERY:{input}",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
model="gpt-4o",
|
||||||
|
response_format={
|
||||||
|
"type": "json_schema",
|
||||||
|
"json_schema": {
|
||||||
|
"name": "query_type",
|
||||||
|
"schema": QueryType.model_json_schema(),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
response_json_str = response.choices[0].message.content
|
||||||
|
type_data = json.loads(response_json_str)
|
||||||
|
return type_data["type"]
|
||||||
|
|
||||||
def get_query(self, input: str):
|
def get_query(self, input: str):
|
||||||
client = OpenAI()
|
client = OpenAI()
|
||||||
response = client.responses.parse(
|
response = client.responses.parse(
|
||||||
39
scripts/test_query.py
Normal file
39
scripts/test_query.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Test the query_vector_store function."""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
from blueprints.rag.logic import query_vector_store
|
||||||
|
|
||||||
|
# Load .env from the root directory
|
||||||
|
root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
|
||||||
|
env_path = os.path.join(root_dir, ".env")
|
||||||
|
load_dotenv(env_path)
|
||||||
|
|
||||||
|
|
||||||
|
async def test_query(query: str):
|
||||||
|
"""Test a query against the vector store."""
|
||||||
|
print(f"Query: {query}\n")
|
||||||
|
result, docs = await query_vector_store(query)
|
||||||
|
print(f"Found {len(docs)} documents\n")
|
||||||
|
print("Serialized result:")
|
||||||
|
print(result)
|
||||||
|
print("\n" + "=" * 80 + "\n")
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
queries = [
|
||||||
|
"What is Simba's weight?",
|
||||||
|
"What medications is Simba taking?",
|
||||||
|
"Tell me about Simba's recent vet visits",
|
||||||
|
]
|
||||||
|
|
||||||
|
for query in queries:
|
||||||
|
await test_query(query)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
79
scripts/user_message_stats.py
Normal file
79
scripts/user_message_stats.py
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Script to show how many messages each user has written
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from tortoise import Tortoise
|
||||||
|
from blueprints.users.models import User
|
||||||
|
from blueprints.conversation.models import Speaker
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
async def get_user_message_stats():
|
||||||
|
"""Get message count statistics per user"""
|
||||||
|
|
||||||
|
# Initialize database connection
|
||||||
|
database_url = os.getenv("DATABASE_URL", "sqlite://raggr.db")
|
||||||
|
await Tortoise.init(
|
||||||
|
db_url=database_url,
|
||||||
|
modules={
|
||||||
|
"models": [
|
||||||
|
"blueprints.users.models",
|
||||||
|
"blueprints.conversation.models",
|
||||||
|
]
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
print("\n📊 User Message Statistics\n")
|
||||||
|
print(
|
||||||
|
f"{'Username':<20} {'Total Messages':<15} {'User Messages':<15} {'Conversations':<15}"
|
||||||
|
)
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
# Get all users
|
||||||
|
users = await User.all()
|
||||||
|
|
||||||
|
total_users = 0
|
||||||
|
total_messages = 0
|
||||||
|
|
||||||
|
for user in users:
|
||||||
|
# Get all conversations for this user
|
||||||
|
conversations = await user.conversations.all()
|
||||||
|
|
||||||
|
if not conversations:
|
||||||
|
continue
|
||||||
|
|
||||||
|
total_users += 1
|
||||||
|
|
||||||
|
# Count messages across all conversations
|
||||||
|
user_message_count = 0
|
||||||
|
total_message_count = 0
|
||||||
|
|
||||||
|
for conversation in conversations:
|
||||||
|
messages = await conversation.messages.all()
|
||||||
|
total_message_count += len(messages)
|
||||||
|
|
||||||
|
# Count only user messages (not assistant responses)
|
||||||
|
user_messages = [msg for msg in messages if msg.speaker == Speaker.USER]
|
||||||
|
user_message_count += len(user_messages)
|
||||||
|
|
||||||
|
total_messages += user_message_count
|
||||||
|
|
||||||
|
print(
|
||||||
|
f"{user.username:<20} {total_message_count:<15} {user_message_count:<15} {len(conversations):<15}"
|
||||||
|
)
|
||||||
|
|
||||||
|
print("=" * 70)
|
||||||
|
print("\n📈 Summary:")
|
||||||
|
print(f" Total active users: {total_users}")
|
||||||
|
print(f" Total user messages: {total_messages}")
|
||||||
|
print(
|
||||||
|
f" Average messages per user: {total_messages / total_users if total_users > 0 else 0:.1f}\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
await Tortoise.close_connections()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(get_user_message_stats())
|
||||||
25
startup-dev.sh
Executable file
25
startup-dev.sh
Executable file
@@ -0,0 +1,25 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
echo "Initializing directories..."
|
||||||
|
mkdir -p /app/data/chromadb
|
||||||
|
|
||||||
|
echo "Rebuilding frontend..."
|
||||||
|
cd /app/raggr-frontend
|
||||||
|
yarn build
|
||||||
|
cd /app
|
||||||
|
|
||||||
|
echo "Setting up database..."
|
||||||
|
# Give PostgreSQL a moment to be ready (healthcheck in docker-compose handles this)
|
||||||
|
sleep 3
|
||||||
|
|
||||||
|
if ls migrations/models/0_*.py 1> /dev/null 2>&1; then
|
||||||
|
echo "Running database migrations..."
|
||||||
|
aerich upgrade
|
||||||
|
else
|
||||||
|
echo "No migrations found, initializing database..."
|
||||||
|
aerich init-db
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Starting Flask application in debug mode..."
|
||||||
|
python app.py
|
||||||
0
utils/__init__.py
Normal file
0
utils/__init__.py
Normal file
@@ -14,7 +14,7 @@ from llm import LLMClient
|
|||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
ollama_client = Client(
|
ollama_client = Client(
|
||||||
host=os.getenv("OLLAMA_HOST", "http://localhost:11434"), timeout=10.0
|
host=os.getenv("OLLAMA_HOST", "http://localhost:11434"), timeout=1.0
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -8,7 +8,7 @@ import ollama
|
|||||||
from PIL import Image
|
from PIL import Image
|
||||||
import fitz
|
import fitz
|
||||||
|
|
||||||
from request import PaperlessNGXService
|
from .request import PaperlessNGXService
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
Reference in New Issue
Block a user