diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..c300885 Binary files /dev/null and b/.DS_Store differ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..9761200 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,6 @@ +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.8.2 + hooks: + - id: ruff # Linter + - id: ruff-format # Formatter diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..a2e51c1 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,109 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +SimbaRAG is a RAG (Retrieval-Augmented Generation) conversational AI system for querying information about Simba (a cat). It ingests documents from Paperless-NGX, stores embeddings in ChromaDB, and uses LLMs (Ollama or OpenAI) to answer questions. + +## Commands + +### Development + +```bash +# Start dev environment with hot reload +docker compose -f docker-compose.dev.yml up --build + +# View logs +docker compose -f docker-compose.dev.yml logs -f raggr +``` + +### Database Migrations (Aerich/Tortoise ORM) + +```bash +# Generate migration (must run in Docker with DB access) +docker compose -f docker-compose.dev.yml exec raggr aerich migrate --name describe_change + +# Apply migrations (auto-runs on startup, manual if needed) +docker compose -f docker-compose.dev.yml exec raggr aerich upgrade + +# View migration history +docker compose exec raggr aerich history +``` + +### Frontend + +```bash +cd raggr-frontend +yarn install +yarn build # Production build +yarn dev # Dev server (rarely needed, backend serves frontend) +``` + +### Production + +```bash +docker compose build raggr +docker compose up -d +``` + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Docker Compose │ +├─────────────────────────────────────────────────────────────┤ +│ raggr (port 8080) │ postgres (port 5432) │ +│ ├── Quart backend │ PostgreSQL 16 │ +│ ├── React frontend (served) │ │ +│ └── ChromaDB (volume) │ │ +└─────────────────────────────────────────────────────────────┘ +``` + +**Backend** (root directory): +- `app.py` - Quart application entry, serves API and static frontend +- `main.py` - RAG logic, document indexing, LLM interaction, LangChain agent +- `llm.py` - LLM client with Ollama primary, OpenAI fallback +- `aerich_config.py` - Database migration configuration +- `blueprints/` - API routes organized as Quart blueprints + - `users/` - OIDC auth, JWT tokens, RBAC with LDAP groups + - `conversation/` - Chat conversations and message history + - `rag/` - Document indexing endpoints (admin-only) +- `config/` - Configuration modules + - `oidc_config.py` - OIDC authentication configuration +- `utils/` - Reusable utilities + - `chunker.py` - Document chunking for embeddings + - `cleaner.py` - PDF cleaning and summarization + - `image_process.py` - Image description with LLM + - `request.py` - Paperless-NGX API client +- `scripts/` - Administrative and utility scripts + - `add_user.py` - Create users manually + - `user_message_stats.py` - User message statistics + - `manage_vectorstore.py` - Vector store management CLI + - `inspect_vector_store.py` - Inspect ChromaDB contents + - `query.py` - Query generation utilities +- `migrations/` - Database migration files + +**Frontend** (`raggr-frontend/`): +- React 19 with Rsbuild bundler +- Tailwind CSS for styling +- Built to `dist/`, served by backend at `/` + +**Auth Flow**: LLDAP → Authelia (OIDC) → Backend JWT → Frontend localStorage + +## Key Patterns + +- All endpoints are async (`async def`) +- Use `@jwt_refresh_token_required` for authenticated endpoints +- Use `@admin_required` for admin-only endpoints (checks `lldap_admin` group) +- Tortoise ORM models in `blueprints/*/models.py` +- Frontend API services in `raggr-frontend/src/api/` + +## Environment Variables + +See `.env.example`. Key ones: +- `DATABASE_URL` - PostgreSQL connection +- `OIDC_*` - Authelia OIDC configuration +- `OLLAMA_URL` - Local LLM server +- `OPENAI_API_KEY` - Fallback LLM +- `PAPERLESS_TOKEN` / `BASE_URL` - Document source diff --git a/DEV-README.md b/DEV-README.md deleted file mode 100644 index ecbcb2a..0000000 --- a/DEV-README.md +++ /dev/null @@ -1,110 +0,0 @@ -# Development Environment Setup - -This guide explains how to run the application in development mode with hot reload enabled. - -## Quick Start - -### Development Mode (Hot Reload) - -```bash -# Start all services in development mode -docker-compose -f docker-compose.dev.yml up --build - -# Or run in detached mode -docker-compose -f docker-compose.dev.yml up -d --build -``` - -### Production Mode - -```bash -# Start production services -docker-compose up --build -``` - -## What's Different in Dev Mode? - -### Backend (Quart/Flask) -- **Hot Reload**: Python code changes are automatically detected and the server restarts -- **Source Mounted**: Your local `services/raggr` directory is mounted as a volume -- **Debug Mode**: Flask runs with `debug=True` for better error messages -- **Environment**: `FLASK_ENV=development` and `PYTHONUNBUFFERED=1` for immediate log output - -### Frontend (React + rsbuild) -- **Auto Rebuild**: Frontend automatically rebuilds when files change -- **Watch Mode**: rsbuild runs in watch mode, rebuilding to `dist/` on save -- **Source Mounted**: Your local `services/raggr/raggr-frontend` directory is mounted as a volume -- **Served by Backend**: Built files are served by the backend, no separate dev server - -## Ports - -- **Application**: 8080 (accessible at `http://localhost:8080` or `http://YOUR_IP:8080`) - -The backend serves both the API and the auto-rebuilt frontend, making it accessible from other machines on your network. - -## Useful Commands - -```bash -# View logs -docker-compose -f docker-compose.dev.yml logs -f - -# View logs for specific service -docker-compose -f docker-compose.dev.yml logs -f raggr-backend -docker-compose -f docker-compose.dev.yml logs -f raggr-frontend - -# Rebuild after dependency changes -docker-compose -f docker-compose.dev.yml up --build - -# Stop all services -docker-compose -f docker-compose.dev.yml down - -# Stop and remove volumes (fresh start) -docker-compose -f docker-compose.dev.yml down -v -``` - -## Making Changes - -### Backend Changes -1. Edit any Python file in `services/raggr/` -2. Save the file -3. The Quart server will automatically restart -4. Check logs to confirm reload - -### Frontend Changes -1. Edit any file in `services/raggr/raggr-frontend/src/` -2. Save the file -3. The browser will automatically refresh (Hot Module Replacement) -4. No need to rebuild - -### Dependency Changes - -**Backend** (pyproject.toml): -```bash -# Rebuild the backend service -docker-compose -f docker-compose.dev.yml up --build raggr-backend -``` - -**Frontend** (package.json): -```bash -# Rebuild the frontend service -docker-compose -f docker-compose.dev.yml up --build raggr-frontend -``` - -## Troubleshooting - -### Port Already in Use -If you see port binding errors, make sure no other services are running on ports 8080 or 3000. - -### Changes Not Reflected -1. Check if the file is properly mounted (check docker-compose.dev.yml volumes) -2. Verify the file isn't in an excluded directory (node_modules, __pycache__) -3. Check container logs for errors - -### Frontend Not Connecting to Backend -Make sure your frontend API calls point to the correct backend URL. If accessing from the same machine, use `http://localhost:8080`. If accessing from another device on the network, use `http://YOUR_IP:8080`. - -## Notes - -- Both services bind to `0.0.0.0` and expose ports, making them accessible on your network -- Node modules and Python cache are excluded from volume mounts to use container versions -- Database and ChromaDB data persist in Docker volumes across restarts -- Access the app from any device on your network using your host machine's IP address diff --git a/services/raggr/Dockerfile b/Dockerfile similarity index 100% rename from services/raggr/Dockerfile rename to Dockerfile diff --git a/services/raggr/Dockerfile.dev b/Dockerfile.dev similarity index 100% rename from services/raggr/Dockerfile.dev rename to Dockerfile.dev diff --git a/README.md b/README.md index f801690..df8b779 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,370 @@ -# simbarag +# SimbaRAG 🐱 -**Goal:** Learn how retrieval-augmented generation works and also create a neat little tool to ask about Simba's health. +A Retrieval-Augmented Generation (RAG) conversational AI system for querying information about Simba the cat. Built with LangChain, ChromaDB, and modern web technologies. -**Current objectives:** +## Features -- [ ] Successfully use RAG to ask a question about existing information (e.g. how many teeth has Simba had extracted) +- 🤖 **Intelligent Conversations** - LangChain-powered agent with tool use and memory +- 📚 **Document Retrieval** - RAG system using ChromaDB vector store +- 🔍 **Web Search** - Integrated Tavily API for real-time web searches +- 🔐 **OIDC Authentication** - Secure auth via Authelia with LDAP group support +- 💬 **Multi-Conversation** - Manage multiple conversation threads per user +- 🎨 **Modern UI** - React 19 frontend with Tailwind CSS +- 🐳 **Docker Ready** - Containerized deployment with Docker Compose + +## System Architecture + +```mermaid +graph TB + subgraph "Client Layer" + Browser[Web Browser] + end + + subgraph "Frontend - React" + UI[React UI
Tailwind CSS] + Auth[Auth Service] + API[API Client] + end + + subgraph "Backend - Quart/Python" + App[Quart App
app.py] + + subgraph "Blueprints" + Users[Users Blueprint
OIDC + JWT] + Conv[Conversation Blueprint
Chat Management] + RAG[RAG Blueprint
Document Indexing] + end + + Agent[LangChain Agent
main.py] + LLM[LLM Client
llm.py] + end + + subgraph "Tools & Utilities" + Search[Simba Search Tool] + Web[Web Search Tool
Tavily] + end + + subgraph "Data Layer" + Postgres[(PostgreSQL
Users & Conversations)] + Chroma[(ChromaDB
Vector Store)] + end + + subgraph "External Services" + Authelia[Authelia
OIDC Provider] + LLDAP[LLDAP
User Directory] + Ollama[Ollama
Local LLM] + OpenAI[OpenAI
Fallback LLM] + Paperless[Paperless-NGX
Documents] + TavilyAPI[Tavily API
Web Search] + end + + Browser --> UI + UI --> Auth + UI --> API + API --> App + + App --> Users + App --> Conv + App --> RAG + + Conv --> Agent + Agent --> Search + Agent --> Web + Agent --> LLM + + Search --> Chroma + Web --> TavilyAPI + RAG --> Chroma + RAG --> Paperless + + Users --> Postgres + Conv --> Postgres + + Users --> Authelia + Authelia --> LLDAP + + LLM --> Ollama + LLM -.Fallback.-> OpenAI + + style Browser fill:#e1f5ff + style UI fill:#fff3cd + style App fill:#d4edda + style Agent fill:#d4edda + style Postgres fill:#f8d7da + style Chroma fill:#f8d7da + style Ollama fill:#e2e3e5 + style OpenAI fill:#e2e3e5 +``` + +## Quick Start + +### Prerequisites + +- Docker & Docker Compose +- PostgreSQL (or use Docker) +- Ollama (optional, for local LLM) +- Paperless-NGX instance (for document source) + +### Installation + +1. **Clone the repository** + +```bash +git clone https://github.com/yourusername/simbarag.git +cd simbarag +``` + +2. **Configure environment variables** + +```bash +cp .env.example .env +# Edit .env with your configuration +``` + +3. **Start the services** + +```bash +# Development (local PostgreSQL only) +docker compose -f docker-compose.dev.yml up -d + +# Or full Docker deployment +docker compose up -d +``` + +4. **Access the application** + +Open `http://localhost:8080` in your browser. + +## Development + +### Local Development Setup + +```bash +# 1. Start PostgreSQL +docker compose -f docker-compose.dev.yml up -d + +# 2. Set environment variables +export DATABASE_URL="postgres://raggr:raggr_dev_password@localhost:5432/raggr" +export CHROMADB_PATH="./chromadb" +export $(grep -v '^#' .env | xargs) + +# 3. Install dependencies +pip install -r requirements.txt +cd raggr-frontend && yarn install && yarn build && cd .. + +# 4. Run migrations +aerich upgrade + +# 5. Start the server +python app.py +``` + +See [docs/development.md](docs/development.md) for detailed development guide. + +## Project Structure + +``` +simbarag/ +├── app.py # Quart application entry point +├── main.py # RAG logic & LangChain agent +├── llm.py # LLM client with Ollama/OpenAI +├── aerich_config.py # Database migration configuration +│ +├── blueprints/ # API route blueprints +│ ├── users/ # Authentication & authorization +│ ├── conversation/ # Chat conversations +│ └── rag/ # Document indexing +│ +├── config/ # Configuration modules +│ └── oidc_config.py # OIDC authentication settings +│ +├── utils/ # Reusable utilities +│ ├── chunker.py # Document chunking for embeddings +│ ├── cleaner.py # PDF cleaning and summarization +│ ├── image_process.py # Image description with LLM +│ └── request.py # Paperless-NGX API client +│ +├── scripts/ # Administrative scripts +│ ├── add_user.py +│ ├── user_message_stats.py +│ ├── manage_vectorstore.py +│ └── inspect_vector_store.py +│ +├── raggr-frontend/ # React frontend +│ └── src/ +│ +├── migrations/ # Database migrations +│ +├── docs/ # Documentation +│ ├── index.md # Documentation hub +│ ├── development.md # Development guide +│ ├── deployment.md # Deployment & migrations +│ ├── VECTORSTORE.md # Vector store management +│ ├── MIGRATIONS.md # Migration reference +│ └── authentication.md # Authentication setup +│ +├── docker-compose.yml # Production compose +├── docker-compose.dev.yml # Development compose +├── Dockerfile # Production Dockerfile +├── Dockerfile.dev # Development Dockerfile +├── CLAUDE.md # AI assistant instructions +└── README.md # This file +``` + +## Key Technologies + +### Backend +- **Quart** - Async Python web framework +- **LangChain** - Agent framework with tool use +- **Tortoise ORM** - Async ORM for PostgreSQL +- **Aerich** - Database migration tool +- **ChromaDB** - Vector database for embeddings +- **OpenAI** - Embeddings & LLM (fallback) +- **Ollama** - Local LLM (primary) + +### Frontend +- **React 19** - UI framework +- **Rsbuild** - Fast bundler +- **Tailwind CSS** - Utility-first styling +- **Axios** - HTTP client + +### Authentication +- **Authelia** - OIDC provider +- **LLDAP** - Lightweight LDAP server +- **JWT** - Token-based auth + +## API Endpoints + +### Authentication +- `GET /api/user/oidc/login` - Initiate OIDC login +- `GET /api/user/oidc/callback` - OIDC callback handler +- `POST /api/user/refresh` - Refresh JWT token + +### Conversations +- `POST /api/conversation/` - Create conversation +- `GET /api/conversation/` - List conversations +- `GET /api/conversation/` - Get conversation with messages +- `POST /api/conversation/query` - Send message and get response + +### RAG (Admin Only) +- `GET /api/rag/stats` - Vector store statistics +- `POST /api/rag/index` - Index new documents +- `POST /api/rag/reindex` - Clear and reindex all + +## Configuration + +### Environment Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `DATABASE_URL` | PostgreSQL connection string | `postgres://...` | +| `CHROMADB_PATH` | ChromaDB storage path | `./chromadb` | +| `OLLAMA_URL` | Ollama server URL | `http://localhost:11434` | +| `OPENAI_API_KEY` | OpenAI API key | - | +| `PAPERLESS_TOKEN` | Paperless-NGX API token | - | +| `BASE_URL` | Paperless-NGX base URL | - | +| `OIDC_ISSUER` | OIDC provider URL | - | +| `OIDC_CLIENT_ID` | OIDC client ID | - | +| `OIDC_CLIENT_SECRET` | OIDC client secret | - | +| `JWT_SECRET_KEY` | JWT signing key | - | +| `TAVILY_KEY` | Tavily web search API key | - | + +See `.env.example` for full list. + +## Scripts + +### User Management +```bash +# Add a new user +python scripts/add_user.py + +# View message statistics +python scripts/user_message_stats.py +``` + +### Vector Store Management +```bash +# Show vector store statistics +python scripts/manage_vectorstore.py stats + +# Index new documents from Paperless +python scripts/manage_vectorstore.py index + +# Clear and reindex everything +python scripts/manage_vectorstore.py reindex + +# Inspect vector store contents +python scripts/inspect_vector_store.py +``` + +See [docs/vectorstore.md](docs/vectorstore.md) for details. + +## Database Migrations + +```bash +# Generate a new migration +aerich migrate --name "describe_your_changes" + +# Apply pending migrations +aerich upgrade + +# View migration history +aerich history + +# Rollback last migration +aerich downgrade +``` + +See [docs/deployment.md](docs/deployment.md) for detailed migration workflows. + +## LangChain Agent + +The conversational agent has access to two tools: + +1. **simba_search** - Query the vector store for Simba's documents + - Used for: Medical records, veterinary history, factual information + +2. **web_search** - Search the web via Tavily API + - Used for: Recent events, external knowledge, general questions + +The agent automatically selects the appropriate tool based on the user's query. + +## Authentication Flow + +``` +User → Authelia (OIDC) → Backend (JWT) → Frontend (localStorage) + ↓ + LLDAP +``` + +1. User clicks "Login" +2. Frontend redirects to Authelia +3. User authenticates via Authelia (backed by LLDAP) +4. Authelia redirects back with authorization code +5. Backend exchanges code for OIDC tokens +6. Backend issues JWT tokens +7. Frontend stores tokens in localStorage + +## Contributing + +1. Fork the repository +2. Create a feature branch +3. Make your changes +4. Run tests and linting +5. Submit a pull request + +## Documentation + +- [Development Guide](docs/development.md) - Setup and development workflow +- [Deployment Guide](docs/deployment.md) - Deployment and migrations +- [Vector Store Guide](docs/vectorstore.md) - Managing the vector database +- [Authentication Guide](docs/authentication.md) - OIDC and LDAP setup + +## License + +[Your License Here] + +## Acknowledgments + +- Built for Simba, the most important cat in the world 🐱 +- Powered by LangChain, ChromaDB, and the open-source community diff --git a/services/raggr/aerich_config.py b/aerich_config.py similarity index 100% rename from services/raggr/aerich_config.py rename to aerich_config.py diff --git a/services/raggr/app.py b/app.py similarity index 100% rename from services/raggr/app.py rename to app.py diff --git a/services/raggr/blueprints/__init__.py b/blueprints/__init__.py similarity index 100% rename from services/raggr/blueprints/__init__.py rename to blueprints/__init__.py diff --git a/services/raggr/blueprints/conversation/__init__.py b/blueprints/conversation/__init__.py similarity index 100% rename from services/raggr/blueprints/conversation/__init__.py rename to blueprints/conversation/__init__.py diff --git a/services/raggr/blueprints/conversation/agents.py b/blueprints/conversation/agents.py similarity index 100% rename from services/raggr/blueprints/conversation/agents.py rename to blueprints/conversation/agents.py diff --git a/services/raggr/blueprints/conversation/logic.py b/blueprints/conversation/logic.py similarity index 100% rename from services/raggr/blueprints/conversation/logic.py rename to blueprints/conversation/logic.py diff --git a/services/raggr/blueprints/conversation/models.py b/blueprints/conversation/models.py similarity index 100% rename from services/raggr/blueprints/conversation/models.py rename to blueprints/conversation/models.py diff --git a/services/raggr/blueprints/rag/__init__.py b/blueprints/rag/__init__.py similarity index 100% rename from services/raggr/blueprints/rag/__init__.py rename to blueprints/rag/__init__.py diff --git a/services/raggr/blueprints/rag/fetchers.py b/blueprints/rag/fetchers.py similarity index 100% rename from services/raggr/blueprints/rag/fetchers.py rename to blueprints/rag/fetchers.py diff --git a/services/raggr/blueprints/rag/logic.py b/blueprints/rag/logic.py similarity index 100% rename from services/raggr/blueprints/rag/logic.py rename to blueprints/rag/logic.py diff --git a/services/raggr/blueprints/rag/models.py b/blueprints/rag/models.py similarity index 100% rename from services/raggr/blueprints/rag/models.py rename to blueprints/rag/models.py diff --git a/services/raggr/blueprints/users/__init__.py b/blueprints/users/__init__.py similarity index 99% rename from services/raggr/blueprints/users/__init__.py rename to blueprints/users/__init__.py index 69073c9..5412c50 100644 --- a/services/raggr/blueprints/users/__init__.py +++ b/blueprints/users/__init__.py @@ -7,7 +7,7 @@ from quart_jwt_extended import ( ) from .models import User from .oidc_service import OIDCUserService -from oidc_config import oidc_config +from config.oidc_config import oidc_config import secrets import httpx from urllib.parse import urlencode diff --git a/services/raggr/blueprints/users/decorators.py b/blueprints/users/decorators.py similarity index 100% rename from services/raggr/blueprints/users/decorators.py rename to blueprints/users/decorators.py diff --git a/services/raggr/blueprints/users/models.py b/blueprints/users/models.py similarity index 100% rename from services/raggr/blueprints/users/models.py rename to blueprints/users/models.py diff --git a/services/raggr/blueprints/users/oidc_service.py b/blueprints/users/oidc_service.py similarity index 100% rename from services/raggr/blueprints/users/oidc_service.py rename to blueprints/users/oidc_service.py diff --git a/classifier.py b/classifier.py deleted file mode 100644 index e49886a..0000000 --- a/classifier.py +++ /dev/null @@ -1,13 +0,0 @@ -import os - -from llm import LLMClient - -USE_OPENAI = os.getenv("OLLAMA_URL") - - -class Classifier: - def __init__(self): - self.llm_client = LLMClient() - - def classify_query_by_action(self, query): - _prompt = "Classify the query into one of the following options: " diff --git a/config/__init__.py b/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/services/raggr/oidc_config.py b/config/oidc_config.py similarity index 100% rename from services/raggr/oidc_config.py rename to config/oidc_config.py diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 9227d7a..29feb0b 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -15,53 +15,56 @@ services: timeout: 5s retries: 5 - raggr: - build: - context: ./services/raggr - dockerfile: Dockerfile.dev - image: torrtle/simbarag:dev - ports: - - "8080:8080" - env_file: - - .env - environment: - - PAPERLESS_TOKEN=${PAPERLESS_TOKEN} - - BASE_URL=${BASE_URL} - - OLLAMA_URL=${OLLAMA_URL:-http://localhost:11434} - - CHROMADB_PATH=/app/data/chromadb - - OPENAI_API_KEY=${OPENAI_API_KEY} - - JWT_SECRET_KEY=${JWT_SECRET_KEY} - - OIDC_ISSUER=${OIDC_ISSUER} - - OIDC_CLIENT_ID=${OIDC_CLIENT_ID} - - OIDC_CLIENT_SECRET=${OIDC_CLIENT_SECRET} - - OIDC_REDIRECT_URI=${OIDC_REDIRECT_URI} - - OIDC_USE_DISCOVERY=${OIDC_USE_DISCOVERY:-true} - - DATABASE_URL=postgres://raggr:raggr_dev_password@postgres:5432/raggr - - FLASK_ENV=development - - PYTHONUNBUFFERED=1 - - NODE_ENV=development - - TAVILY_KEY=${TAVILIY_KEY} - depends_on: - postgres: - condition: service_healthy - volumes: - - chromadb_data:/app/data/chromadb - - ./services/raggr/migrations:/app/migrations # Bind mount for migrations (bidirectional) - develop: - watch: - # Sync+restart on any file change under services/raggr - - action: sync+restart - path: ./services/raggr - target: /app - ignore: - - __pycache__/ - - "*.pyc" - - "*.pyo" - - "*.pyd" - - .git/ - - chromadb/ - - node_modules/ - - raggr-frontend/dist/ + # raggr service disabled - run locally for development + # raggr: + # build: + # context: . + # dockerfile: Dockerfile.dev + # image: torrtle/simbarag:dev + # ports: + # - "8080:8080" + # env_file: + # - .env + # environment: + # - PAPERLESS_TOKEN=${PAPERLESS_TOKEN} + # - BASE_URL=${BASE_URL} + # - OLLAMA_URL=${OLLAMA_URL:-http://localhost:11434} + # - CHROMADB_PATH=/app/data/chromadb + # - OPENAI_API_KEY=${OPENAI_API_KEY} + # - JWT_SECRET_KEY=${JWT_SECRET_KEY} + # - OIDC_ISSUER=${OIDC_ISSUER} + # - OIDC_CLIENT_ID=${OIDC_CLIENT_ID} + # - OIDC_CLIENT_SECRET=${OIDC_CLIENT_SECRET} + # - OIDC_REDIRECT_URI=${OIDC_REDIRECT_URI} + # - OIDC_USE_DISCOVERY=${OIDC_USE_DISCOVERY:-true} + # - DATABASE_URL=postgres://raggr:raggr_dev_password@postgres:5432/raggr + # - FLASK_ENV=development + # - PYTHONUNBUFFERED=1 + # - NODE_ENV=development + # - TAVILY_KEY=${TAVILIY_KEY} + # depends_on: + # postgres: + # condition: service_healthy + # volumes: + # - chromadb_data:/app/data/chromadb + # - ./migrations:/app/migrations # Bind mount for migrations (bidirectional) + # develop: + # watch: + # # Sync+restart on any file change in root directory + # - action: sync+restart + # path: . + # target: /app + # ignore: + # - __pycache__/ + # - "*.pyc" + # - "*.pyo" + # - "*.pyd" + # - .git/ + # - chromadb/ + # - node_modules/ + # - raggr-frontend/dist/ + # - docs/ + # - .venv/ volumes: chromadb_data: diff --git a/docker-compose.yml b/docker-compose.yml index 5d03d56..b9cd7b8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -20,7 +20,7 @@ services: raggr: build: - context: ./services/raggr + context: . dockerfile: Dockerfile image: torrtle/simbarag:latest ports: diff --git a/services/raggr/MIGRATIONS.md b/docs/MIGRATIONS.md similarity index 100% rename from services/raggr/MIGRATIONS.md rename to docs/MIGRATIONS.md diff --git a/services/raggr/VECTORSTORE.md b/docs/VECTORSTORE.md similarity index 75% rename from services/raggr/VECTORSTORE.md rename to docs/VECTORSTORE.md index 645f4f0..616ed69 100644 --- a/services/raggr/VECTORSTORE.md +++ b/docs/VECTORSTORE.md @@ -13,21 +13,21 @@ The vector store location is controlled by the `CHROMADB_PATH` environment varia ### CLI (Command Line) -Use the `manage_vectorstore.py` script for vector store operations: +Use the `scripts/manage_vectorstore.py` script for vector store operations: ```bash # Show statistics -python manage_vectorstore.py stats +python scripts/manage_vectorstore.py stats # Index documents from Paperless-NGX (incremental) -python manage_vectorstore.py index +python scripts/manage_vectorstore.py index # Clear and reindex all documents -python manage_vectorstore.py reindex +python scripts/manage_vectorstore.py reindex # List documents -python manage_vectorstore.py list 10 -python manage_vectorstore.py list 20 --show-content +python scripts/manage_vectorstore.py list 10 +python scripts/manage_vectorstore.py list 20 --show-content ``` ### Docker @@ -36,10 +36,10 @@ Run commands inside the Docker container: ```bash # Show statistics -docker compose -f docker-compose.dev.yml exec -T raggr python manage_vectorstore.py stats +docker compose exec raggr python scripts/manage_vectorstore.py stats # Reindex all documents -docker compose -f docker-compose.dev.yml exec -T raggr python manage_vectorstore.py reindex +docker compose exec raggr python scripts/manage_vectorstore.py reindex ``` ### API Endpoints @@ -65,7 +65,7 @@ The following authenticated endpoints are available: This indicates a corrupted index. Solution: ```bash -python manage_vectorstore.py reindex +python scripts/manage_vectorstore.py reindex ``` ### Empty results @@ -73,20 +73,20 @@ python manage_vectorstore.py reindex Check if documents are indexed: ```bash -python manage_vectorstore.py stats +python scripts/manage_vectorstore.py stats ``` If count is 0, run: ```bash -python manage_vectorstore.py index +python scripts/manage_vectorstore.py index ``` ### Different results in Docker vs local Docker and local environments use separate ChromaDB instances. To sync: -1. Index inside Docker: `docker compose exec -T raggr python manage_vectorstore.py reindex` +1. Index inside Docker: `docker compose exec raggr python scripts/manage_vectorstore.py reindex` 2. Or mount the same volume for both environments ## Production Considerations diff --git a/docs/deployment.md b/docs/deployment.md index 620075f..1f37b4f 100644 --- a/docs/deployment.md +++ b/docs/deployment.md @@ -170,11 +170,12 @@ docker compose exec raggr bash -c "sleep 5 && aerich upgrade" | File | Purpose | |------|---------| -| `services/raggr/pyproject.toml` | Aerich config (`[tool.aerich]` section) | -| `services/raggr/migrations/models/` | Migration files | -| `services/raggr/startup.sh` | Production startup (runs `aerich upgrade`) | -| `services/raggr/startup-dev.sh` | Dev startup (runs `aerich upgrade` or `init-db`) | -| `services/raggr/app.py` | Contains `TORTOISE_CONFIG` | +| `pyproject.toml` | Aerich config (`[tool.aerich]` section) | +| `migrations/models/` | Migration files | +| `startup.sh` | Production startup (runs `aerich upgrade`) | +| `startup-dev.sh` | Dev startup (runs `aerich upgrade` or `init-db`) | +| `app.py` | Contains `TORTOISE_CONFIG` | +| `aerich_config.py` | Aerich initialization configuration | ## Quick Reference diff --git a/docs/development.md b/docs/development.md new file mode 100644 index 0000000..fde26ce --- /dev/null +++ b/docs/development.md @@ -0,0 +1,258 @@ +# Development Guide + +This guide explains how to run SimbaRAG in development mode. + +## Quick Start + +### Option 1: Local Development (Recommended) + +Run PostgreSQL in Docker and the application locally for faster iteration: + +```bash +# 1. Start PostgreSQL +docker compose -f docker-compose.dev.yml up -d + +# 2. Set environment variables +export DATABASE_URL="postgres://raggr:raggr_dev_password@localhost:5432/raggr" +export CHROMADB_PATH="./chromadb" +export $(grep -v '^#' .env | xargs) # Load other vars from .env + +# 3. Install dependencies (first time) +pip install -r requirements.txt +cd raggr-frontend && yarn install && yarn build && cd .. + +# 4. Run migrations +aerich upgrade + +# 5. Start the server +python app.py +``` + +The application will be available at `http://localhost:8080`. + +### Option 2: Full Docker Development + +Run everything in Docker with hot reload (slower, but matches production): + +```bash +# Uncomment the raggr service in docker-compose.dev.yml first! + +# Start all services +docker compose -f docker-compose.dev.yml up --build + +# View logs +docker compose -f docker-compose.dev.yml logs -f raggr +``` + +## Project Structure + +``` +raggr/ +├── app.py # Quart application entry point +├── main.py # RAG logic and LangChain agent +├── llm.py # LLM client (Ollama + OpenAI fallback) +├── aerich_config.py # Database migration configuration +│ +├── blueprints/ # API route blueprints +│ ├── users/ # Authentication (OIDC, JWT, RBAC) +│ ├── conversation/ # Chat conversations and messages +│ └── rag/ # Document indexing (admin only) +│ +├── config/ # Configuration modules +│ └── oidc_config.py # OIDC authentication settings +│ +├── utils/ # Reusable utilities +│ ├── chunker.py # Document chunking for embeddings +│ ├── cleaner.py # PDF cleaning and summarization +│ ├── image_process.py # Image description with LLM +│ └── request.py # Paperless-NGX API client +│ +├── scripts/ # Administrative scripts +│ ├── add_user.py # Create users manually +│ ├── user_message_stats.py # User message statistics +│ ├── manage_vectorstore.py # Vector store management +│ ├── inspect_vector_store.py # Inspect ChromaDB contents +│ └── query.py # Query generation utilities +│ +├── raggr-frontend/ # React frontend +│ └── src/ # Frontend source code +│ +├── migrations/ # Database migrations +└── docs/ # Documentation +``` + +## Making Changes + +### Backend Changes + +**Local development:** +1. Edit Python files +2. Save +3. Restart `python app.py` (or use a tool like `watchdog` for auto-reload) + +**Docker development:** +1. Edit Python files +2. Files are synced via Docker watch mode +3. Container automatically restarts + +### Frontend Changes + +```bash +cd raggr-frontend + +# Development mode with hot reload +yarn dev + +# Production build (for testing) +yarn build +``` + +The backend serves built files from `raggr-frontend/dist/`. + +### Database Model Changes + +When you modify Tortoise ORM models: + +```bash +# Generate migration +aerich migrate --name "describe_your_change" + +# Apply migration +aerich upgrade + +# View history +aerich history +``` + +See [deployment.md](deployment.md) for detailed migration workflows. + +### Adding Dependencies + +**Backend:** +```bash +# Add to requirements.txt or use uv +pip install package-name +pip freeze > requirements.txt +``` + +**Frontend:** +```bash +cd raggr-frontend +yarn add package-name +``` + +## Useful Commands + +### Database + +```bash +# Connect to PostgreSQL +docker compose -f docker-compose.dev.yml exec postgres psql -U raggr -d raggr + +# Reset database +docker compose -f docker-compose.dev.yml down -v +docker compose -f docker-compose.dev.yml up -d +aerich init-db +``` + +### Vector Store + +```bash +# Show statistics +python scripts/manage_vectorstore.py stats + +# Index new documents from Paperless +python scripts/manage_vectorstore.py index + +# Clear and reindex everything +python scripts/manage_vectorstore.py reindex +``` + +See [vectorstore.md](vectorstore.md) for details. + +### Scripts + +```bash +# Add a new user +python scripts/add_user.py + +# View message statistics +python scripts/user_message_stats.py + +# Inspect vector store contents +python scripts/inspect_vector_store.py +``` + +## Environment Variables + +Copy `.env.example` to `.env` and configure: + +| Variable | Description | Example | +|----------|-------------|---------| +| `DATABASE_URL` | PostgreSQL connection | `postgres://user:pass@localhost:5432/db` | +| `CHROMADB_PATH` | ChromaDB storage path | `./chromadb` | +| `OLLAMA_URL` | Ollama server URL | `http://localhost:11434` | +| `OPENAI_API_KEY` | OpenAI API key (fallback LLM) | `sk-...` | +| `PAPERLESS_TOKEN` | Paperless-NGX API token | `...` | +| `BASE_URL` | Paperless-NGX URL | `https://paperless.example.com` | +| `OIDC_ISSUER` | OIDC provider URL | `https://auth.example.com` | +| `OIDC_CLIENT_ID` | OIDC client ID | `simbarag` | +| `OIDC_CLIENT_SECRET` | OIDC client secret | `...` | +| `JWT_SECRET_KEY` | JWT signing key | `random-secret` | +| `TAVILY_KEY` | Tavily web search API key | `tvly-...` | + +## Troubleshooting + +### Port Already in Use + +```bash +# Find and kill process on port 8080 +lsof -ti:8080 | xargs kill -9 + +# Or change the port in app.py +``` + +### Database Connection Errors + +```bash +# Check if PostgreSQL is running +docker compose -f docker-compose.dev.yml ps postgres + +# View PostgreSQL logs +docker compose -f docker-compose.dev.yml logs postgres +``` + +### Frontend Not Building + +```bash +cd raggr-frontend +rm -rf node_modules dist +yarn install +yarn build +``` + +### ChromaDB Errors + +```bash +# Clear and recreate ChromaDB +rm -rf chromadb/ +python scripts/manage_vectorstore.py reindex +``` + +### Import Errors After Reorganization + +Ensure you're in the project root directory when running scripts, or use: + +```bash +# Add project root to Python path +export PYTHONPATH="${PYTHONPATH}:$(pwd)" +python scripts/your_script.py +``` + +## Hot Tips + +- Use `python -m pdb app.py` for debugging +- Enable Quart debug mode in `app.py`: `app.run(debug=True)` +- Check API logs: They appear in the terminal running `python app.py` +- Frontend logs: Open browser DevTools console +- Use `docker compose -f docker-compose.dev.yml down -v` for a clean slate diff --git a/docs/index.md b/docs/index.md index 64d68aa..06730dd 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,14 +1,203 @@ # SimbaRAG Documentation -SimbaRAG is a RAG-powered conversational AI system with enterprise authentication. +Welcome to the SimbaRAG documentation! This guide will help you understand, develop, and deploy the SimbaRAG conversational AI system. -## Architecture +## Getting Started -- **Backend**: Quart (async Python) with Tortoise ORM -- **Vector Store**: LangChain with configurable embeddings -- **Auth Stack**: LLDAP → Authelia → OAuth2/OIDC -- **Database**: PostgreSQL +New to SimbaRAG? Start here: -## Sections +1. Read the main [README](../README.md) for project overview and architecture +2. Follow the [Development Guide](development.md) to set up your environment +3. Learn about [Authentication](authentication.md) setup with OIDC and LDAP -- [Authentication](authentication.md) - OIDC flow, user management, and RBAC planning +## Documentation Structure + +### Core Guides + +- **[Development Guide](development.md)** - Local development setup, project structure, and workflows +- **[Deployment Guide](deployment.md)** - Database migrations, deployment workflows, and troubleshooting +- **[Vector Store Guide](VECTORSTORE.md)** - Managing ChromaDB, indexing documents, and RAG operations +- **[Migrations Guide](MIGRATIONS.md)** - Database migration reference +- **[Authentication Guide](authentication.md)** - OIDC, Authelia, LLDAP configuration and user management + +### Quick Reference + +| Task | Documentation | +|------|---------------| +| Set up local dev environment | [Development Guide → Quick Start](development.md#quick-start) | +| Run database migrations | [Deployment Guide → Migration Workflow](deployment.md#migration-workflow) | +| Index documents | [Vector Store Guide → Management Commands](VECTORSTORE.md#management-commands) | +| Configure authentication | [Authentication Guide](authentication.md) | +| Run administrative scripts | [Development Guide → Scripts](development.md#scripts) | + +## Common Tasks + +### Development + +```bash +# Start local development +docker compose -f docker-compose.dev.yml up -d +export DATABASE_URL="postgres://raggr:raggr_dev_password@localhost:5432/raggr" +export CHROMADB_PATH="./chromadb" +python app.py +``` + +### Database Migrations + +```bash +# Generate migration +aerich migrate --name "your_change" + +# Apply migrations +aerich upgrade + +# View history +aerich history +``` + +### Vector Store Management + +```bash +# Show statistics +python scripts/manage_vectorstore.py stats + +# Index new documents +python scripts/manage_vectorstore.py index + +# Reindex everything +python scripts/manage_vectorstore.py reindex +``` + +## Architecture Overview + +SimbaRAG is built with: + +- **Backend**: Quart (async Python), LangChain, Tortoise ORM +- **Frontend**: React 19, Rsbuild, Tailwind CSS +- **Database**: PostgreSQL (users, conversations) +- **Vector Store**: ChromaDB (document embeddings) +- **LLM**: Ollama (primary), OpenAI (fallback) +- **Auth**: Authelia (OIDC), LLDAP (user directory) + +See the [README](../README.md#system-architecture) for detailed architecture diagram. + +## Project Structure + +``` +simbarag/ +├── app.py # Quart app entry point +├── main.py # RAG & LangChain agent +├── llm.py # LLM client +├── blueprints/ # API routes +├── config/ # Configuration +├── utils/ # Utilities +├── scripts/ # Admin scripts +├── raggr-frontend/ # React UI +├── migrations/ # Database migrations +├── docs/ # This documentation +├── docker-compose.yml # Production Docker setup +└── docker-compose.dev.yml # Development Docker setup +``` + +## Key Concepts + +### RAG (Retrieval-Augmented Generation) + +SimbaRAG uses RAG to answer questions about Simba: + +1. Documents are fetched from Paperless-NGX +2. Documents are chunked and embedded using OpenAI +3. Embeddings are stored in ChromaDB +4. User queries are embedded and matched against the store +5. Relevant chunks are passed to the LLM for context +6. LLM generates an answer using retrieved context + +### LangChain Agent + +The conversational agent has two tools: + +- **simba_search**: Queries the vector store for Simba's documents +- **web_search**: Searches the web via Tavily API + +The agent automatically selects tools based on the query. + +### Authentication Flow + +1. User initiates OIDC login via Authelia +2. Authelia authenticates against LLDAP +3. Backend receives OIDC tokens and issues JWT +4. Frontend stores JWT in localStorage +5. Subsequent requests use JWT for authorization + +## Environment Variables + +Key environment variables (see `.env.example` for complete list): + +| Variable | Purpose | +|----------|---------| +| `DATABASE_URL` | PostgreSQL connection | +| `CHROMADB_PATH` | Vector store location | +| `OLLAMA_URL` | Local LLM server | +| `OPENAI_API_KEY` | OpenAI for embeddings/fallback | +| `PAPERLESS_TOKEN` | Document source API | +| `OIDC_*` | Authentication configuration | +| `TAVILY_KEY` | Web search API | + +## API Endpoints + +### Authentication +- `GET /api/user/oidc/login` - Start OIDC flow +- `GET /api/user/oidc/callback` - OIDC callback +- `POST /api/user/refresh` - Refresh JWT + +### Conversations +- `POST /api/conversation/` - Create conversation +- `GET /api/conversation/` - List conversations +- `POST /api/conversation/query` - Chat message + +### RAG (Admin Only) +- `GET /api/rag/stats` - Vector store stats +- `POST /api/rag/index` - Index documents +- `POST /api/rag/reindex` - Reindex all + +## Troubleshooting + +### Common Issues + +| Issue | Solution | +|-------|----------| +| Port already in use | Check if services are running: `lsof -ti:8080` | +| Database connection error | Ensure PostgreSQL is running: `docker compose ps` | +| ChromaDB errors | Clear and reindex: `python scripts/manage_vectorstore.py reindex` | +| Import errors | Check you're in `services/raggr/` directory | +| Frontend not building | `cd raggr-frontend && yarn install && yarn build` | + +See individual guides for detailed troubleshooting. + +## Contributing + +1. Read the [Development Guide](development.md) +2. Set up your local environment +3. Make changes and test locally +4. Generate migrations if needed +5. Submit a pull request + +## Additional Resources + +- [LangChain Documentation](https://python.langchain.com/) +- [ChromaDB Documentation](https://docs.trychroma.com/) +- [Quart Documentation](https://quart.palletsprojects.com/) +- [Tortoise ORM Documentation](https://tortoise.github.io/) +- [Authelia Documentation](https://www.authelia.com/) + +## Need Help? + +- Check the relevant guide in this documentation +- Review troubleshooting sections +- Check application logs: `docker compose logs -f` +- Inspect database: `docker compose exec postgres psql -U raggr` + +--- + +**Documentation Version**: 1.0 +**Last Updated**: January 2026 diff --git a/services/raggr/llm.py b/llm.py similarity index 100% rename from services/raggr/llm.py rename to llm.py diff --git a/services/raggr/main.py b/main.py similarity index 97% rename from services/raggr/main.py rename to main.py index 838a072..63f7dd1 100644 --- a/services/raggr/main.py +++ b/main.py @@ -9,11 +9,11 @@ import ollama from dotenv import load_dotenv import chromadb -from chunker import Chunker -from cleaner import pdf_to_image, summarize_pdf_image +from utils.chunker import Chunker +from utils.cleaner import pdf_to_image, summarize_pdf_image from llm import LLMClient -from query import QueryGenerator -from request import PaperlessNGXService +from scripts.query import QueryGenerator +from utils.request import PaperlessNGXService _dotenv_loaded = load_dotenv() diff --git a/services/raggr/migrations/models/1_20260131214411_None.py b/migrations/models/1_20260131214411_None.py similarity index 100% rename from services/raggr/migrations/models/1_20260131214411_None.py rename to migrations/models/1_20260131214411_None.py diff --git a/services/raggr/pyproject.toml b/pyproject.toml similarity index 100% rename from services/raggr/pyproject.toml rename to pyproject.toml diff --git a/services/raggr/raggr-frontend/.dockerignore b/raggr-frontend/.dockerignore similarity index 100% rename from services/raggr/raggr-frontend/.dockerignore rename to raggr-frontend/.dockerignore diff --git a/services/raggr/raggr-frontend/.gitignore b/raggr-frontend/.gitignore similarity index 100% rename from services/raggr/raggr-frontend/.gitignore rename to raggr-frontend/.gitignore diff --git a/services/raggr/raggr-frontend/.yarnrc.yml b/raggr-frontend/.yarnrc.yml similarity index 100% rename from services/raggr/raggr-frontend/.yarnrc.yml rename to raggr-frontend/.yarnrc.yml diff --git a/services/raggr/raggr-frontend/Dockerfile.dev b/raggr-frontend/Dockerfile.dev similarity index 100% rename from services/raggr/raggr-frontend/Dockerfile.dev rename to raggr-frontend/Dockerfile.dev diff --git a/services/raggr/raggr-frontend/README.md b/raggr-frontend/README.md similarity index 100% rename from services/raggr/raggr-frontend/README.md rename to raggr-frontend/README.md diff --git a/services/raggr/raggr-frontend/TOKEN_REFRESH_IMPLEMENTATION.md b/raggr-frontend/TOKEN_REFRESH_IMPLEMENTATION.md similarity index 100% rename from services/raggr/raggr-frontend/TOKEN_REFRESH_IMPLEMENTATION.md rename to raggr-frontend/TOKEN_REFRESH_IMPLEMENTATION.md diff --git a/services/raggr/raggr-frontend/package-lock.json b/raggr-frontend/package-lock.json similarity index 100% rename from services/raggr/raggr-frontend/package-lock.json rename to raggr-frontend/package-lock.json diff --git a/services/raggr/raggr-frontend/package.json b/raggr-frontend/package.json similarity index 100% rename from services/raggr/raggr-frontend/package.json rename to raggr-frontend/package.json diff --git a/services/raggr/raggr-frontend/postcss.config.mjs b/raggr-frontend/postcss.config.mjs similarity index 100% rename from services/raggr/raggr-frontend/postcss.config.mjs rename to raggr-frontend/postcss.config.mjs diff --git a/services/raggr/raggr-frontend/rsbuild.config.ts b/raggr-frontend/rsbuild.config.ts similarity index 100% rename from services/raggr/raggr-frontend/rsbuild.config.ts rename to raggr-frontend/rsbuild.config.ts diff --git a/services/raggr/raggr-frontend/src/App.css b/raggr-frontend/src/App.css similarity index 100% rename from services/raggr/raggr-frontend/src/App.css rename to raggr-frontend/src/App.css diff --git a/services/raggr/raggr-frontend/src/App.tsx b/raggr-frontend/src/App.tsx similarity index 100% rename from services/raggr/raggr-frontend/src/App.tsx rename to raggr-frontend/src/App.tsx diff --git a/services/raggr/raggr-frontend/src/api/conversationService.ts b/raggr-frontend/src/api/conversationService.ts similarity index 100% rename from services/raggr/raggr-frontend/src/api/conversationService.ts rename to raggr-frontend/src/api/conversationService.ts diff --git a/services/raggr/raggr-frontend/src/api/oidcService.ts b/raggr-frontend/src/api/oidcService.ts similarity index 100% rename from services/raggr/raggr-frontend/src/api/oidcService.ts rename to raggr-frontend/src/api/oidcService.ts diff --git a/services/raggr/raggr-frontend/src/api/userService.ts b/raggr-frontend/src/api/userService.ts similarity index 100% rename from services/raggr/raggr-frontend/src/api/userService.ts rename to raggr-frontend/src/api/userService.ts diff --git a/services/raggr/raggr-frontend/src/assets/cat.png b/raggr-frontend/src/assets/cat.png similarity index 100% rename from services/raggr/raggr-frontend/src/assets/cat.png rename to raggr-frontend/src/assets/cat.png diff --git a/services/raggr/raggr-frontend/src/assets/favicon.svg b/raggr-frontend/src/assets/favicon.svg similarity index 100% rename from services/raggr/raggr-frontend/src/assets/favicon.svg rename to raggr-frontend/src/assets/favicon.svg diff --git a/services/raggr/raggr-frontend/src/components/AnswerBubble.tsx b/raggr-frontend/src/components/AnswerBubble.tsx similarity index 100% rename from services/raggr/raggr-frontend/src/components/AnswerBubble.tsx rename to raggr-frontend/src/components/AnswerBubble.tsx diff --git a/services/raggr/raggr-frontend/src/components/ChatScreen.tsx b/raggr-frontend/src/components/ChatScreen.tsx similarity index 100% rename from services/raggr/raggr-frontend/src/components/ChatScreen.tsx rename to raggr-frontend/src/components/ChatScreen.tsx diff --git a/services/raggr/raggr-frontend/src/components/ConversationList.tsx b/raggr-frontend/src/components/ConversationList.tsx similarity index 100% rename from services/raggr/raggr-frontend/src/components/ConversationList.tsx rename to raggr-frontend/src/components/ConversationList.tsx diff --git a/services/raggr/raggr-frontend/src/components/ConversationMenu.tsx b/raggr-frontend/src/components/ConversationMenu.tsx similarity index 100% rename from services/raggr/raggr-frontend/src/components/ConversationMenu.tsx rename to raggr-frontend/src/components/ConversationMenu.tsx diff --git a/services/raggr/raggr-frontend/src/components/LoginScreen.tsx b/raggr-frontend/src/components/LoginScreen.tsx similarity index 100% rename from services/raggr/raggr-frontend/src/components/LoginScreen.tsx rename to raggr-frontend/src/components/LoginScreen.tsx diff --git a/services/raggr/raggr-frontend/src/components/MessageInput.tsx b/raggr-frontend/src/components/MessageInput.tsx similarity index 100% rename from services/raggr/raggr-frontend/src/components/MessageInput.tsx rename to raggr-frontend/src/components/MessageInput.tsx diff --git a/services/raggr/raggr-frontend/src/components/QuestionBubble.tsx b/raggr-frontend/src/components/QuestionBubble.tsx similarity index 100% rename from services/raggr/raggr-frontend/src/components/QuestionBubble.tsx rename to raggr-frontend/src/components/QuestionBubble.tsx diff --git a/services/raggr/raggr-frontend/src/contexts/AuthContext.tsx b/raggr-frontend/src/contexts/AuthContext.tsx similarity index 100% rename from services/raggr/raggr-frontend/src/contexts/AuthContext.tsx rename to raggr-frontend/src/contexts/AuthContext.tsx diff --git a/services/raggr/raggr-frontend/src/env.d.ts b/raggr-frontend/src/env.d.ts similarity index 100% rename from services/raggr/raggr-frontend/src/env.d.ts rename to raggr-frontend/src/env.d.ts diff --git a/services/raggr/raggr-frontend/src/index.tsx b/raggr-frontend/src/index.tsx similarity index 100% rename from services/raggr/raggr-frontend/src/index.tsx rename to raggr-frontend/src/index.tsx diff --git a/services/raggr/raggr-frontend/src/simba_cute.jpeg b/raggr-frontend/src/simba_cute.jpeg similarity index 100% rename from services/raggr/raggr-frontend/src/simba_cute.jpeg rename to raggr-frontend/src/simba_cute.jpeg diff --git a/services/raggr/raggr-frontend/src/simba_troll.jpeg b/raggr-frontend/src/simba_troll.jpeg similarity index 100% rename from services/raggr/raggr-frontend/src/simba_troll.jpeg rename to raggr-frontend/src/simba_troll.jpeg diff --git a/services/raggr/raggr-frontend/tsconfig.json b/raggr-frontend/tsconfig.json similarity index 100% rename from services/raggr/raggr-frontend/tsconfig.json rename to raggr-frontend/tsconfig.json diff --git a/services/raggr/raggr-frontend/yarn.lock b/raggr-frontend/yarn.lock similarity index 100% rename from services/raggr/raggr-frontend/yarn.lock rename to raggr-frontend/yarn.lock diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/services/raggr/add_user.py b/scripts/add_user.py similarity index 100% rename from services/raggr/add_user.py rename to scripts/add_user.py diff --git a/services/raggr/index_immich.py b/scripts/index_immich.py similarity index 93% rename from services/raggr/index_immich.py rename to scripts/index_immich.py index fbf7756..6d036e4 100644 --- a/services/raggr/index_immich.py +++ b/scripts/index_immich.py @@ -4,9 +4,14 @@ import sqlite3 import httpx from dotenv import load_dotenv +import sys +from pathlib import Path -from image_process import describe_simba_image -from request import PaperlessNGXService +# Add parent directory to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from utils.image_process import describe_simba_image +from utils.request import PaperlessNGXService logging.basicConfig(level=logging.INFO) diff --git a/services/raggr/inspect_vector_store.py b/scripts/inspect_vector_store.py similarity index 100% rename from services/raggr/inspect_vector_store.py rename to scripts/inspect_vector_store.py diff --git a/services/raggr/manage_vectorstore.py b/scripts/manage_vectorstore.py similarity index 100% rename from services/raggr/manage_vectorstore.py rename to scripts/manage_vectorstore.py diff --git a/services/raggr/petmd_scrape_index.py b/scripts/petmd_scrape_index.py similarity index 100% rename from services/raggr/petmd_scrape_index.py rename to scripts/petmd_scrape_index.py diff --git a/services/raggr/query.py b/scripts/query.py similarity index 100% rename from services/raggr/query.py rename to scripts/query.py diff --git a/services/raggr/test_query.py b/scripts/test_query.py similarity index 100% rename from services/raggr/test_query.py rename to scripts/test_query.py diff --git a/services/raggr/scripts/user_message_stats.py b/scripts/user_message_stats.py similarity index 100% rename from services/raggr/scripts/user_message_stats.py rename to scripts/user_message_stats.py diff --git a/services/raggr/.dockerignore b/services/raggr/.dockerignore deleted file mode 100644 index 33f4bd9..0000000 --- a/services/raggr/.dockerignore +++ /dev/null @@ -1,16 +0,0 @@ -.git -.gitignore -README.md -.env -.DS_Store -chromadb/ -chroma_db/ -raggr-frontend/node_modules/ -__pycache__/ -*.pyc -*.pyo -*.pyd -.Python -.venv/ -venv/ -.pytest_cache/ \ No newline at end of file diff --git a/services/raggr/.python-version b/services/raggr/.python-version deleted file mode 100644 index 24ee5b1..0000000 --- a/services/raggr/.python-version +++ /dev/null @@ -1 +0,0 @@ -3.13 diff --git a/services/raggr/startup-dev.sh b/startup-dev.sh similarity index 100% rename from services/raggr/startup-dev.sh rename to startup-dev.sh diff --git a/services/raggr/startup.sh b/startup.sh similarity index 100% rename from services/raggr/startup.sh rename to startup.sh diff --git a/users.py b/users.py deleted file mode 100644 index c4f57cb..0000000 --- a/users.py +++ /dev/null @@ -1,12 +0,0 @@ -import sqlite3 - - -class User: - def __init__(self, email: str, password_hash: str): - self.email = email - self.is_authenticated - - -if __name__ == "__main__": - connection = sqlite3.connect("users.db") - c = connection.cursor() diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/services/raggr/chunker.py b/utils/chunker.py similarity index 100% rename from services/raggr/chunker.py rename to utils/chunker.py diff --git a/services/raggr/cleaner.py b/utils/cleaner.py similarity index 99% rename from services/raggr/cleaner.py rename to utils/cleaner.py index 764251f..4ea0f64 100644 --- a/services/raggr/cleaner.py +++ b/utils/cleaner.py @@ -8,7 +8,7 @@ import ollama from PIL import Image import fitz -from request import PaperlessNGXService +from .request import PaperlessNGXService load_dotenv() diff --git a/services/raggr/image_process.py b/utils/image_process.py similarity index 100% rename from services/raggr/image_process.py rename to utils/image_process.py diff --git a/services/raggr/request.py b/utils/request.py similarity index 100% rename from services/raggr/request.py rename to utils/request.py diff --git a/services/raggr/uv.lock b/uv.lock similarity index 100% rename from services/raggr/uv.lock rename to uv.lock