From 7cfad5babaa95b058bd2e614a442c7e95b4567cd Mon Sep 17 00:00:00 2001 From: Ryan Chen Date: Sat, 31 Jan 2026 16:20:35 -0500 Subject: [PATCH] Adding mkdocs and privileged tools --- docs/authentication.md | 274 ++++++++++++++++++ docs/index.md | 14 + index.html | 81 ++++++ mkdocs.yml | 25 ++ services/raggr/blueprints/rag/__init__.py | 9 +- services/raggr/blueprints/users/__init__.py | 14 +- services/raggr/blueprints/users/decorators.py | 26 ++ services/raggr/blueprints/users/models.py | 17 +- .../raggr/blueprints/users/oidc_service.py | 15 +- .../1_20260131000000_add_ldap_groups.py | 15 + 10 files changed, 476 insertions(+), 14 deletions(-) create mode 100644 docs/authentication.md create mode 100644 docs/index.md create mode 100644 index.html create mode 100644 mkdocs.yml create mode 100644 services/raggr/blueprints/users/decorators.py create mode 100644 services/raggr/migrations/models/1_20260131000000_add_ldap_groups.py diff --git a/docs/authentication.md b/docs/authentication.md new file mode 100644 index 0000000..a113e45 --- /dev/null +++ b/docs/authentication.md @@ -0,0 +1,274 @@ +# Authentication Architecture + +This document describes the authentication stack for SimbaRAG: LLDAP → Authelia → OAuth2/OIDC. + +## Overview + +``` +┌─────────┐ ┌──────────┐ ┌──────────────┐ ┌──────────┐ +│ LLDAP │────▶│ Authelia │────▶│ OAuth2/OIDC │────▶│ SimbaRAG │ +│ (Users) │ │ (IdP) │ │ (Flow) │ │ (App) │ +└─────────┘ └──────────┘ └──────────────┘ └──────────┘ +``` + +| Component | Role | +|-----------|------| +| **LLDAP** | Lightweight LDAP server storing users and groups | +| **Authelia** | Identity provider that authenticates against LLDAP and issues OIDC tokens | +| **SimbaRAG** | Relying party that consumes OIDC tokens and manages sessions | + +## OIDC Configuration + +### Environment Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `OIDC_ISSUER` | Authelia server URL | Required | +| `OIDC_CLIENT_ID` | Client ID registered in Authelia | Required | +| `OIDC_CLIENT_SECRET` | Client secret for token exchange | Required | +| `OIDC_REDIRECT_URI` | Callback URL after authentication | Required | +| `OIDC_USE_DISCOVERY` | Enable automatic discovery | `true` | +| `JWT_SECRET_KEY` | Secret for signing backend JWTs | Required | + +### Discovery + +When `OIDC_USE_DISCOVERY=true`, the application fetches endpoints from: + +``` +{OIDC_ISSUER}/.well-known/openid-configuration +``` + +This provides: + +- Authorization endpoint +- Token endpoint +- JWKS URI for signature verification +- Supported scopes and claims + +## Authentication Flow + +### 1. Login Initiation + +``` +GET /api/user/oidc/login +``` + +1. Generate PKCE code verifier and challenge (S256) +2. Generate CSRF state token +3. Store state in session storage +4. Return authorization URL for frontend redirect + +### 2. Authorization + +User is redirected to Authelia where they: + +1. Enter LDAP credentials +2. Complete MFA if configured +3. Consent to requested scopes + +### 3. Callback + +``` +GET /api/user/oidc/callback?code=...&state=... +``` + +1. Validate state matches stored value (CSRF protection) +2. Exchange authorization code for tokens using PKCE verifier +3. Verify ID token signature using JWKS +4. Validate claims (issuer, audience, expiration) +5. Create or update user in database +6. Issue backend JWT tokens (access + refresh) + +### 4. Token Refresh + +``` +POST /api/user/refresh +Authorization: Bearer +``` + +Issues a new access token without re-authentication. + +## User Model + +```python +class User(Model): + id = UUIDField(primary_key=True) + username = CharField(max_length=255) + password = BinaryField(null=True) # Nullable for OIDC-only users + email = CharField(max_length=100, unique=True) + + # OIDC fields + oidc_subject = CharField(max_length=255, unique=True, null=True) + auth_provider = CharField(max_length=50, default="local") # "local" or "oidc" + ldap_groups = JSONField(default=[]) # LDAP groups from OIDC claims + + created_at = DatetimeField(auto_now_add=True) + updated_at = DatetimeField(auto_now=True) + + def has_group(self, group: str) -> bool: + """Check if user belongs to a specific LDAP group.""" + return group in (self.ldap_groups or []) + + def is_admin(self) -> bool: + """Check if user is an admin (member of lldap_admin group).""" + return self.has_group("lldap_admin") +``` + +### User Provisioning + +The `OIDCUserService` handles automatic user creation: + +1. Extract claims from ID token (`sub`, `email`, `preferred_username`) +2. Check if user exists by `oidc_subject` +3. If not, check by email for migration from local auth +4. Create new user or update existing + +## JWT Tokens + +Backend issues its own JWTs after OIDC authentication: + +| Token Type | Purpose | Typical Lifetime | +|------------|---------|------------------| +| Access Token | API authorization | 15 minutes | +| Refresh Token | Obtain new access tokens | 7 days | + +### Claims + +```json +{ + "identity": "", + "type": "access|refresh", + "exp": 1234567890, + "iat": 1234567890 +} +``` + +## Protected Endpoints + +All API endpoints use the `@jwt_refresh_token_required` decorator for basic authentication: + +```python +@blueprint.route("/example") +@jwt_refresh_token_required +async def protected_endpoint(): + user_id = get_jwt_identity() + # ... +``` + +--- + +## Role-Based Access Control (RBAC) + +RBAC is implemented using LDAP groups passed through Authelia as OIDC claims. Users in the `lldap_admin` group have admin privileges. + +### Architecture + +``` +┌─────────────────────────────────────────────────────────────┐ +│ LLDAP │ +│ Groups: lldap_admin, lldap_user │ +└─────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ Authelia │ +│ Scope: groups → Claim: groups = ["lldap_admin"] │ +└─────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ SimbaRAG │ +│ 1. Extract groups from ID token │ +│ 2. Store in User.ldap_groups │ +│ 3. Check membership with @admin_required decorator │ +└─────────────────────────────────────────────────────────────┘ +``` + +### Authelia Configuration + +Ensure Authelia is configured to pass the `groups` claim: + +```yaml +identity_providers: + oidc: + clients: + - client_id: simbarag + scopes: + - openid + - profile + - email + - groups # Required for RBAC +``` + +### Admin-Only Endpoints + +The `@admin_required` decorator protects privileged endpoints: + +```python +from blueprints.users.decorators import admin_required + +@blueprint.post("/admin-action") +@admin_required +async def admin_only_endpoint(): + # Only users in lldap_admin group can access + ... +``` + +**Protected endpoints:** + +| Endpoint | Access | Description | +|----------|--------|-------------| +| `POST /api/rag/index` | Admin | Trigger document indexing | +| `POST /api/rag/reindex` | Admin | Clear and reindex all documents | +| `GET /api/rag/stats` | All users | View vector store statistics | + +### User Response + +The OIDC callback returns group information: + +```json +{ + "access_token": "...", + "refresh_token": "...", + "user": { + "id": "uuid", + "username": "john", + "email": "john@example.com", + "groups": ["lldap_admin", "lldap_user"], + "is_admin": true + } +} +``` + +--- + +## Security Considerations + +### Current Gaps + +| Issue | Risk | Mitigation | +|-------|------|------------| +| In-memory session storage | State lost on restart, not scalable | Use Redis for production | +| No token revocation | Tokens valid until expiry | Implement blacklist or short expiry | +| No audit logging | Cannot track auth events | Add event logging | +| Single JWT secret | Compromise affects all tokens | Rotate secrets, use asymmetric keys | + +### Recommendations + +1. **Use Redis** for OIDC state storage in production +2. **Implement logout** with token blacklisting +3. **Add audit logging** for authentication events +4. **Rotate JWT secrets** regularly +5. **Use short-lived access tokens** (15 min) with refresh + +--- + +## File Reference + +| File | Purpose | +|------|---------| +| `services/raggr/oidc_config.py` | OIDC client configuration and discovery | +| `services/raggr/blueprints/users/models.py` | User model definition with group helpers | +| `services/raggr/blueprints/users/oidc_service.py` | User provisioning from OIDC claims | +| `services/raggr/blueprints/users/__init__.py` | Auth endpoints and flow | +| `services/raggr/blueprints/users/decorators.py` | Auth decorators (`@admin_required`) | diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..64d68aa --- /dev/null +++ b/docs/index.md @@ -0,0 +1,14 @@ +# SimbaRAG Documentation + +SimbaRAG is a RAG-powered conversational AI system with enterprise authentication. + +## Architecture + +- **Backend**: Quart (async Python) with Tortoise ORM +- **Vector Store**: LangChain with configurable embeddings +- **Auth Stack**: LLDAP → Authelia → OAuth2/OIDC +- **Database**: PostgreSQL + +## Sections + +- [Authentication](authentication.md) - OIDC flow, user management, and RBAC planning diff --git a/index.html b/index.html new file mode 100644 index 0000000..d481a70 --- /dev/null +++ b/index.html @@ -0,0 +1,81 @@ + + + + + + + + + + + + + Paperless-ngx sign in + + + + + + + +
+
+ + + + + + + +

+ Please sign in. + +

+ + + + + + + + +
+ + +
+
+ + +
+
+ +
+ + + +
+ + + + + +
+ + diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..b244c9f --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,25 @@ +site_name: SimbaRAG Documentation +site_description: Documentation for SimbaRAG - RAG-powered conversational AI + +theme: + name: material + features: + - content.code.copy + - navigation.sections + - navigation.expand + +markdown_extensions: + - admonition + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.superfences + - pymdownx.tabbed: + alternate_style: true + - tables + - toc: + permalink: true + +nav: + - Home: index.md + - Architecture: + - Authentication: authentication.md diff --git a/services/raggr/blueprints/rag/__init__.py b/services/raggr/blueprints/rag/__init__.py index d039af5..610539c 100644 --- a/services/raggr/blueprints/rag/__init__.py +++ b/services/raggr/blueprints/rag/__init__.py @@ -2,6 +2,7 @@ from quart import Blueprint, jsonify from quart_jwt_extended import jwt_refresh_token_required from .logic import get_vector_store_stats, index_documents, vector_store +from blueprints.users.decorators import admin_required rag_blueprint = Blueprint("rag_api", __name__, url_prefix="/api/rag") @@ -15,9 +16,9 @@ async def get_stats(): @rag_blueprint.post("/index") -@jwt_refresh_token_required +@admin_required async def trigger_index(): - """Trigger indexing of documents from Paperless-NGX.""" + """Trigger indexing of documents from Paperless-NGX. Admin only.""" try: await index_documents() stats = get_vector_store_stats() @@ -27,9 +28,9 @@ async def trigger_index(): @rag_blueprint.post("/reindex") -@jwt_refresh_token_required +@admin_required async def trigger_reindex(): - """Clear and reindex all documents.""" + """Clear and reindex all documents. Admin only.""" try: # Clear existing documents collection = vector_store._collection diff --git a/services/raggr/blueprints/users/__init__.py b/services/raggr/blueprints/users/__init__.py index e92933d..69073c9 100644 --- a/services/raggr/blueprints/users/__init__.py +++ b/services/raggr/blueprints/users/__init__.py @@ -60,7 +60,7 @@ async def oidc_login(): "client_id": oidc_config.client_id, "response_type": "code", "redirect_uri": oidc_config.redirect_uri, - "scope": "openid email profile", + "scope": "openid email profile groups", "state": state, "code_challenge": code_challenge, "code_challenge_method": "S256", @@ -115,7 +115,9 @@ async def oidc_callback(): token_response = await client.post(token_endpoint, data=token_data) if token_response.status_code != 200: - return jsonify({"error": f"Failed to exchange code for token: {token_response.text}"}), 400 + return jsonify( + {"error": f"Failed to exchange code for token: {token_response.text}"} + ), 400 tokens = token_response.json() @@ -141,7 +143,13 @@ async def oidc_callback(): return jsonify( access_token=access_token, refresh_token=refresh_token, - user={"id": str(user.id), "username": user.username, "email": user.email}, + user={ + "id": str(user.id), + "username": user.username, + "email": user.email, + "groups": user.ldap_groups, + "is_admin": user.is_admin(), + }, ) diff --git a/services/raggr/blueprints/users/decorators.py b/services/raggr/blueprints/users/decorators.py new file mode 100644 index 0000000..ac9a3a3 --- /dev/null +++ b/services/raggr/blueprints/users/decorators.py @@ -0,0 +1,26 @@ +""" +Authentication decorators for role-based access control. +""" + +from functools import wraps +from quart import jsonify +from quart_jwt_extended import jwt_refresh_token_required, get_jwt_identity +from .models import User + + +def admin_required(fn): + """ + Decorator that requires the user to be an admin (member of lldap_admin group). + Must be used on async route handlers. + """ + + @wraps(fn) + @jwt_refresh_token_required + async def wrapper(*args, **kwargs): + user_id = get_jwt_identity() + user = await User.get_or_none(id=user_id) + if not user or not user.is_admin(): + return jsonify({"error": "Admin access required"}), 403 + return await fn(*args, **kwargs) + + return wrapper diff --git a/services/raggr/blueprints/users/models.py b/services/raggr/blueprints/users/models.py index 9b3f6be..51d86d0 100644 --- a/services/raggr/blueprints/users/models.py +++ b/services/raggr/blueprints/users/models.py @@ -12,8 +12,13 @@ class User(Model): email = fields.CharField(max_length=100, unique=True) # OIDC fields - oidc_subject = fields.CharField(max_length=255, unique=True, null=True, index=True) # "sub" claim from OIDC - auth_provider = fields.CharField(max_length=50, default="local") # "local" or "oidc" + oidc_subject = fields.CharField( + max_length=255, unique=True, null=True, index=True + ) # "sub" claim from OIDC + auth_provider = fields.CharField( + max_length=50, default="local" + ) # "local" or "oidc" + ldap_groups = fields.JSONField(default=[]) # LDAP groups from OIDC claims created_at = fields.DatetimeField(auto_now_add=True) updated_at = fields.DatetimeField(auto_now=True) @@ -21,6 +26,14 @@ class User(Model): class Meta: table = "users" + def has_group(self, group: str) -> bool: + """Check if user belongs to a specific LDAP group.""" + return group in (self.ldap_groups or []) + + def is_admin(self) -> bool: + """Check if user is an admin (member of lldap_admin group).""" + return self.has_group("lldap_admin") + def set_password(self, plain_password: str): self.password = bcrypt.hashpw( plain_password.encode("utf-8"), diff --git a/services/raggr/blueprints/users/oidc_service.py b/services/raggr/blueprints/users/oidc_service.py index d01441a..0344ffb 100644 --- a/services/raggr/blueprints/users/oidc_service.py +++ b/services/raggr/blueprints/users/oidc_service.py @@ -1,6 +1,7 @@ """ OIDC User Management Service """ + from typing import Dict, Any, Optional from uuid import uuid4 from .models import User @@ -31,10 +32,10 @@ class OIDCUserService: # Update user info from latest claims (optional) user.email = claims.get("email", user.email) user.username = ( - claims.get("preferred_username") - or claims.get("name") - or user.username + claims.get("preferred_username") or claims.get("name") or user.username ) + # Update LDAP groups from claims + user.ldap_groups = claims.get("groups", []) await user.save() return user @@ -47,6 +48,7 @@ class OIDCUserService: user.oidc_subject = oidc_subject user.auth_provider = "oidc" user.password = None # Clear password + user.ldap_groups = claims.get("groups", []) await user.save() return user @@ -58,14 +60,17 @@ class OIDCUserService: or f"user_{oidc_subject[:8]}" ) + # Extract LDAP groups from claims + groups = claims.get("groups", []) + user = await User.create( id=uuid4(), username=username, - email=email - or f"{oidc_subject}@oidc.local", # Fallback if no email claim + email=email or f"{oidc_subject}@oidc.local", # Fallback if no email claim oidc_subject=oidc_subject, auth_provider="oidc", password=None, + ldap_groups=groups, ) return user diff --git a/services/raggr/migrations/models/1_20260131000000_add_ldap_groups.py b/services/raggr/migrations/models/1_20260131000000_add_ldap_groups.py new file mode 100644 index 0000000..d385895 --- /dev/null +++ b/services/raggr/migrations/models/1_20260131000000_add_ldap_groups.py @@ -0,0 +1,15 @@ +from tortoise import BaseDBAsyncClient + +RUN_IN_TRANSACTION = True + + +async def upgrade(db: BaseDBAsyncClient) -> str: + return """ + ALTER TABLE "users" ADD COLUMN "ldap_groups" JSONB DEFAULT '[]'; + """ + + +async def downgrade(db: BaseDBAsyncClient) -> str: + return """ + ALTER TABLE "users" DROP COLUMN "ldap_groups"; + """