14 Commits

Author SHA1 Message Date
Ryan Chen
5054b4a859 Added conversation history 2025-10-23 22:28:41 -04:00
Ryan Chen
8479898cc4 Logging 2025-10-16 22:43:14 -04:00
Ryan Chen
acaf681927 Metadata filtering 2025-10-16 22:36:21 -04:00
Ryan Chen
2bbe33fedc Starting attempt #2 at metadata filtering 2025-10-14 22:13:01 -04:00
Ryan Chen
b872750444 Only use OpenAI for embedding 2025-10-14 20:06:32 -04:00
Ryan Chen
376baccadb message-style frontend 2025-10-10 23:28:41 -04:00
Ryan Chen
c978b1a255 Reducing startup time/cost 2025-10-08 23:21:22 -04:00
Ryan Chen
51b9932389 fixing loal llm 2025-10-08 22:52:49 -04:00
Ryan Chen
ebf39480b6 urf 2025-10-08 22:46:16 -04:00
Ryan Chen
e4a04331cb add some more debugging 2025-10-08 21:17:45 -04:00
Ryan Chen
166ffb4c09 i only ship bugs 2025-10-08 21:13:15 -04:00
Ryan Chen
64e286e623 oops 2025-10-08 21:07:33 -04:00
Ryan Chen
c6c14729dd interseting 2025-10-08 21:03:42 -04:00
Ryan Chen
910097d13b data 2025-10-05 20:31:46 -04:00
16 changed files with 993 additions and 237 deletions

88
app.py
View File

@@ -1,43 +1,101 @@
import os
from flask import Flask, request, jsonify, render_template, send_from_directory
from quart import Quart, request, jsonify, render_template, send_from_directory
from tortoise.contrib.quart import register_tortoise
from quart_jwt_extended import JWTManager
from main import consult_simba_oracle
from blueprints.conversation.logic import (
get_the_only_conversation,
add_message_to_conversation,
)
app = Flask(
app = Quart(
__name__,
static_folder="raggr-frontend/dist/static",
template_folder="raggr-frontend/dist",
)
app.config["JWT_SECRET_KEY"] = os.getenv("JWT_SECRET_KEY", "SECRET_KEY")
jwt = JWTManager(app)
# Initialize Tortoise ORM
register_tortoise(
app,
db_url=os.getenv("DATABASE_URL", "sqlite://raggr.db"),
modules={"models": ["blueprints.conversation.models"]},
generate_schemas=True,
)
# Serve React static files
@app.route("/static/<path:filename>")
def static_files(filename):
return send_from_directory(app.static_folder, filename)
async def static_files(filename):
return await send_from_directory(app.static_folder, filename)
# Serve the React app for all routes (catch-all)
@app.route("/", defaults={"path": ""})
@app.route("/<path:path>")
def serve_react_app(path):
async def serve_react_app(path):
if path and os.path.exists(os.path.join(app.template_folder, path)):
return send_from_directory(app.template_folder, path)
return render_template("index.html")
return await send_from_directory(app.template_folder, path)
return await render_template("index.html")
@app.route("/api/query", methods=["POST"])
def query():
data = request.get_json()
async def query():
data = await request.get_json()
query = data.get("query")
return jsonify({"response": consult_simba_oracle(query)})
# add message to database
conversation = await get_the_only_conversation()
print(conversation)
await add_message_to_conversation(
conversation=conversation, message=query, speaker="user"
)
response = consult_simba_oracle(query)
await add_message_to_conversation(
conversation=conversation, message=response, speaker="simba"
)
return jsonify({"response": response})
@app.route("/api/ingest", methods=["POST"])
def webhook():
data = request.get_json()
print(data)
return jsonify({"status": "received"})
@app.route("/api/messages", methods=["GET"])
async def get_messages():
conversation = await get_the_only_conversation()
# Prefetch related messages
await conversation.fetch_related("messages")
# Manually serialize the conversation with messages
messages = []
for msg in conversation.messages:
messages.append(
{
"id": str(msg.id),
"text": msg.text,
"speaker": msg.speaker.value,
"created_at": msg.created_at.isoformat(),
}
)
return jsonify(
{
"id": str(conversation.id),
"name": conversation.name,
"messages": messages,
"created_at": conversation.created_at.isoformat(),
"updated_at": conversation.updated_at.isoformat(),
}
)
# @app.route("/api/ingest", methods=["POST"])
# def webhook():
# data = request.get_json()
# print(data)
# return jsonify({"status": "received"})
if __name__ == "__main__":

View File

@@ -0,0 +1,17 @@
from quart import Blueprint, jsonify
from .models import (
Conversation,
PydConversation,
)
conversation_blueprint = Blueprint(
"conversation_api", __name__, url_prefix="/api/conversation"
)
@conversation_blueprint.route("/<conversation_id>")
async def get_conversation(conversation_id: str):
conversation = await Conversation.get(id=conversation_id)
serialized_conversation = await PydConversation.from_tortoise_orm(conversation)
return jsonify(serialized_conversation.model_dump_json())

View File

@@ -0,0 +1,32 @@
from .models import Conversation, ConversationMessage
async def create_conversation(name: str = "") -> Conversation:
conversation = await Conversation.create(name=name)
return conversation
async def add_message_to_conversation(
conversation: Conversation,
message: str,
speaker: str,
) -> ConversationMessage:
print(conversation, message, speaker)
message = await ConversationMessage.create(
text=message,
speaker=speaker,
conversation=conversation,
)
return message
async def get_the_only_conversation() -> Conversation:
try:
conversation = await Conversation.all().first()
if conversation is None:
conversation = await Conversation.create(name="simba_chat")
except Exception as _e:
conversation = await Conversation.create(name="simba_chat")
return conversation

View File

@@ -0,0 +1,41 @@
import enum
from tortoise.models import Model
from tortoise import fields
from tortoise.contrib.pydantic import (
pydantic_queryset_creator,
pydantic_model_creator,
)
class Speaker(enum.Enum):
USER = "user"
SIMBA = "simba"
class Conversation(Model):
id = fields.UUIDField(primary_key=True)
name = fields.CharField(max_length=255)
created_at = fields.DatetimeField(auto_now_add=True)
updated_at = fields.DatetimeField(auto_now=True)
class Meta:
table = "conversations"
class ConversationMessage(Model):
id = fields.UUIDField(primary_key=True)
text = fields.TextField()
conversation = fields.ForeignKeyField(
"models.Conversation", related_name="messages"
)
created_at = fields.DatetimeField(auto_now_add=True)
speaker = fields.CharEnumField(enum_type=Speaker, max_length=10)
class Meta:
table = "conversation_messages"
PydConversationMessage = pydantic_model_creator(ConversationMessage)
PydConversation = pydantic_model_creator(Conversation, name="Conversation")
PydListConversationMessage = pydantic_queryset_creator(ConversationMessage)

View File

@@ -3,15 +3,20 @@ from math import ceil
import re
from typing import Union
from uuid import UUID, uuid4
from ollama import Client
from chromadb.utils.embedding_functions.openai_embedding_function import (
OpenAIEmbeddingFunction,
)
from dotenv import load_dotenv
from llm import LLMClient
load_dotenv()
ollama_client = Client(
host=os.getenv("OLLAMA_HOST", "http://localhost:11434"), timeout=10.0
)
def remove_headers_footers(text, header_patterns=None, footer_patterns=None):
if header_patterns is None:
@@ -80,13 +85,16 @@ class Chunk:
class Chunker:
embedding_fx = OpenAIEmbeddingFunction(
api_key=os.getenv("OPENAI_API_KEY"),
model_name="text-embedding-3-small",
)
def __init__(self, collection) -> None:
self.collection = collection
self.llm_client = LLMClient()
def embedding_fx(self, inputs):
openai_embedding_fx = OpenAIEmbeddingFunction(
api_key=os.getenv("OPENAI_API_KEY"),
model_name="text-embedding-3-small",
)
return openai_embedding_fx(inputs)
def chunk_document(
self,

View File

@@ -1,4 +1,4 @@
version: '3.8'
version: "3.8"
services:
raggr:

View File

@@ -25,22 +25,24 @@ parser.add_argument("filepath")
client = Client(host=os.getenv("OLLAMA_HOST", "http://localhost:11434"))
class SimbaImageDescription(BaseModel):
image_date: str
description: str
def describe_simba_image(input):
logging.info("Opening image of Simba ...")
if "heic" in input.lower() or "heif" in input.lower():
new_filepath = input.split(".")[0] + ".jpg"
img = Image.open(input)
img.save(new_filepath, 'JPEG')
img.save(new_filepath, "JPEG")
logging.info("Extracting EXIF...")
exif = {
ExifTags.TAGS[k]: v for k, v in img.getexif().items() if k in ExifTags.TAGS
}
img = Image.open(new_filepath)
input=new_filepath
input = new_filepath
else:
img = Image.open(input)
@@ -66,7 +68,7 @@ def describe_simba_image(input):
},
{"role": "user", "content": prompt, "images": [input]},
],
format=SimbaImageDescription.model_json_schema()
format=SimbaImageDescription.model_json_schema(),
)
result = SimbaImageDescription.model_validate_json(response["message"]["content"])

View File

@@ -6,6 +6,7 @@ import tempfile
from image_process import describe_simba_image
from request import PaperlessNGXService
import sqlite3
logging.basicConfig(level=logging.INFO)
@@ -23,8 +24,16 @@ DOWNLOAD_DIR = os.getenv("DOWNLOAD_DIR", "./simba_photos")
# Set up headers
headers = {"x-api-key": API_KEY, "Content-Type": "application/json"}
VISITED = {}
if __name__ == "__main__":
conn = sqlite3.connect("./visited.db")
c = conn.cursor()
c.execute("select immich_id from visited")
rows = c.fetchall()
for row in rows:
VISITED.add(row[0])
ppngx = PaperlessNGXService()
people_url = f"{IMMICH_URL}/api/search/person?name=Simba"
people = httpx.get(people_url, headers=headers).json()
@@ -39,7 +48,7 @@ if __name__ == "__main__":
assets = results.json()["assets"]
for asset in assets["items"]:
if asset["type"] == "IMAGE":
if asset["type"] == "IMAGE" and asset["id"] not in VISITED:
ids[asset["id"]] = asset.get("originalFileName")
nextPage = assets.get("nextPage")
@@ -58,41 +67,49 @@ if __name__ == "__main__":
asset_search = f"{IMMICH_URL}/api/search/smart"
request_body = {"query": "simba"}
results = httpx.post(asset_search, headers=headers, json=request_body)
print(results.json()["assets"]["total"])
for asset in results.json()["assets"]["items"]:
if asset["type"] == "IMAGE":
ids[asset["id"]] = asset.get("originalFileName")
immich_asset_id = list(ids.keys())[1]
immich_filename = ids.get(immich_asset_id)
response = httpx.get(
f"{IMMICH_URL}/api/assets/{immich_asset_id}/original", headers=headers
)
for immich_asset_id, immich_filename in ids.items():
try:
response = httpx.get(
f"{IMMICH_URL}/api/assets/{immich_asset_id}/original", headers=headers
)
path = os.path.join("/Users/ryanchen/Programs/raggr", immich_filename)
file = open(path, "wb+")
for chunk in response.iter_bytes(chunk_size=8192):
file.write(chunk)
path = os.path.join("/Users/ryanchen/Programs/raggr", immich_filename)
file = open(path, "wb+")
for chunk in response.iter_bytes(chunk_size=8192):
file.write(chunk)
logging.info("Processing image ...")
description = describe_simba_image(path)
logging.info("Processing image ...")
description = describe_simba_image(path)
image_description = description.description
image_date = description.image_date
image_description = description.description
image_date = description.image_date
description_filepath = os.path.join("/Users/ryanchen/Programs/raggr", f"SIMBA_DESCRIBE_001.txt")
file = open(description_filepath, "w+")
file.write(image_description)
file.close()
description_filepath = os.path.join(
"/Users/ryanchen/Programs/raggr", f"SIMBA_DESCRIBE_001.txt"
)
file = open(description_filepath, "w+")
file.write(image_description)
file.close()
file = open(description_filepath, 'rb')
file = open(description_filepath, "rb")
ppngx.upload_description(
description_filepath=description_filepath,
file=file,
title="SIMBA_DESCRIBE_001.txt",
exif_date=image_date,
)
file.close()
ppngx.upload_description(description_filepath=description_filepath, file=file, title="SIMBA_DESCRIBE_001.txt", exif_date=image_date)
c.execute("INSERT INTO visited (immich_id) values (?)", (immich_asset_id,))
conn.commit()
logging.info("Processing complete. Deleting file.")
os.remove(file.name)
except Exception as e:
logging.info(f"something went wrong for {immich_filename}")
logging.info(e)
file.close()
logging.info("Processing complete. Deleting file.")
os.remove(file.name)
conn.close()

64
llm.py Normal file
View File

@@ -0,0 +1,64 @@
import os
from ollama import Client
from openai import OpenAI
import logging
logging.basicConfig(level=logging.INFO)
class LLMClient:
def __init__(self):
try:
self.ollama_client = Client(
host=os.getenv("OLLAMA_URL", "http://localhost:11434"), timeout=10.0
)
self.ollama_client.chat(
model="gemma3:4b", messages=[{"role": "system", "content": "test"}]
)
self.PROVIDER = "ollama"
logging.info("Using Ollama as LLM backend")
except Exception as e:
print(e)
self.openai_client = OpenAI()
self.PROVIDER = "openai"
logging.info("Using OpenAI as LLM backend")
def chat(
self,
prompt: str,
system_prompt: str,
):
if self.PROVIDER == "ollama":
response = self.ollama_client.chat(
model="gemma3:4b",
messages=[
{
"role": "system",
"content": system_prompt,
},
{"role": "user", "content": prompt},
],
)
print(response)
output = response.message.content
elif self.PROVIDER == "openai":
response = self.openai_client.responses.create(
model="gpt-4o-mini",
input=[
{
"role": "system",
"content": system_prompt,
},
{"role": "user", "content": prompt},
],
)
output = response.output_text
return output
if __name__ == "__main__":
client = Client()
client.chat(model="gemma3:4b", messages=[{"role": "system", "promp": "hack"}])

188
main.py
View File

@@ -1,28 +1,31 @@
import datetime
import logging
import os
from typing import Any, Union
import sqlite3
import argparse
import chromadb
import ollama
from openai import OpenAI
from request import PaperlessNGXService
from chunker import Chunker
from query import QueryGenerator
from cleaner import pdf_to_image, summarize_pdf_image
from llm import LLMClient
from query import QueryGenerator
from dotenv import load_dotenv
load_dotenv()
_dotenv_loaded = load_dotenv()
# Configure ollama client with URL from environment or default to localhost
ollama_client = ollama.Client(host=os.getenv("OLLAMA_URL", "http://localhost:11434"))
ollama_client = ollama.Client(
host=os.getenv("OLLAMA_URL", "http://localhost:11434"), timeout=10.0
)
client = chromadb.PersistentClient(path=os.getenv("CHROMADB_PATH", ""))
simba_docs = client.get_or_create_collection(name="simba_docs")
simba_docs = client.get_or_create_collection(name="simba_docs2")
feline_vet_lookup = client.get_or_create_collection(name="feline_vet_lookup")
parser = argparse.ArgumentParser(
@@ -37,25 +40,25 @@ parser.add_argument("--index", help="index a file")
ppngx = PaperlessNGXService()
openai_client = OpenAI()
llm_client = LLMClient()
def index_using_pdf_llm():
def index_using_pdf_llm(doctypes):
logging.info("reindex data...")
files = ppngx.get_data()
for file in files:
document_id = file["id"]
document_id: int = file["id"]
pdf_path = ppngx.download_pdf_from_id(id=document_id)
image_paths = pdf_to_image(filepath=pdf_path)
print(f"summarizing {file}")
logging.info(f"summarizing {file}")
generated_summary = summarize_pdf_image(filepaths=image_paths)
file["content"] = generated_summary
chunk_data(files, simba_docs)
chunk_data(files, simba_docs, doctypes=doctypes)
def date_to_epoch(date_str: str) -> float:
split_date = date_str.split("-")
print(split_date)
date = datetime.datetime(
int(split_date[0]),
int(split_date[1]),
@@ -68,23 +71,35 @@ def date_to_epoch(date_str: str) -> float:
return date.timestamp()
def chunk_data(docs: list[dict[str, Union[str, Any]]], collection):
def chunk_data(docs, collection, doctypes):
# Step 2: Create chunks
chunker = Chunker(collection)
print(f"chunking {len(docs)} documents")
print(docs)
logging.info(f"chunking {len(docs)} documents")
texts: list[str] = [doc["content"] for doc in docs]
for index, text in enumerate(texts):
print(docs[index]["original_file_name"])
metadata = {
"created_date": date_to_epoch(docs[index]["created_date"]),
"filename": docs[index]["original_file_name"],
}
chunker.chunk_document(
document=text,
metadata=metadata,
with sqlite3.connect("visited.db") as conn:
to_insert = []
c = conn.cursor()
for index, text in enumerate(texts):
metadata = {
"created_date": date_to_epoch(docs[index]["created_date"]),
"filename": docs[index]["original_file_name"],
"document_type": doctypes.get(docs[index]["document_type"], ""),
}
if doctypes:
metadata["type"] = doctypes.get(docs[index]["document_type"])
chunker.chunk_document(
document=text,
metadata=metadata,
)
to_insert.append((docs[index]["id"],))
c.executemany(
"INSERT INTO indexed_documents (paperless_id) values (?)", to_insert
)
conn.commit()
def chunk_text(texts: list[str], collection):
@@ -99,64 +114,54 @@ def chunk_text(texts: list[str], collection):
def consult_oracle(input: str, collection):
print(input)
import time
chunker = Chunker(collection)
start_time = time.time()
# Ask
# print("Starting query generation")
# qg_start = time.time()
# qg = QueryGenerator()
logging.info("Starting query generation")
qg_start = time.time()
qg = QueryGenerator()
doctype_query = qg.get_doctype_query(input=input)
# metadata_filter = qg.get_query(input)
# qg_end = time.time()
# print(f"Query generation took {qg_end - qg_start:.2f} seconds")
# print(metadata_filter)
metadata_filter = {**doctype_query}
logging.info(metadata_filter)
qg_end = time.time()
logging.info(f"Query generation took {qg_end - qg_start:.2f} seconds")
print("Starting embedding generation")
logging.info("Starting embedding generation")
embedding_start = time.time()
embeddings = Chunker.embedding_fx(input=[input])
embeddings = chunker.embedding_fx(inputs=[input])
embedding_end = time.time()
print(f"Embedding generation took {embedding_end - embedding_start:.2f} seconds")
logging.info(
f"Embedding generation took {embedding_end - embedding_start:.2f} seconds"
)
print("Starting collection query")
logging.info("Starting collection query")
query_start = time.time()
results = collection.query(
query_texts=[input],
query_embeddings=embeddings,
# where=metadata_filter,
where=metadata_filter,
)
print(results)
query_end = time.time()
print(f"Collection query took {query_end - query_start:.2f} seconds")
logging.info(f"Collection query took {query_end - query_start:.2f} seconds")
# Generate
print("Starting LLM generation")
logging.info("Starting LLM generation")
llm_start = time.time()
# output = ollama_client.generate(
# model="gemma3n:e4b",
# prompt=f"You are a helpful assistant that understandings veterinary terms. Using the following data, help answer the user's query by providing as many details as possible. Using this data: {results}. Respond to this prompt: {input}",
# )
response = openai_client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{
"role": "system",
"content": "You are a helpful assistant that understands veterinary terms.",
},
{
"role": "user",
"content": f"Using the following data, help answer the user's query by providing as many details as possible. Using this data: {results}. Respond to this prompt: {input}",
},
],
)
system_prompt = "You are a helpful assistant that understands veterinary terms."
prompt = f"Using the following data, help answer the user's query by providing as many details as possible. Using this data: {results}. Respond to this prompt: {input}"
output = llm_client.chat(prompt=prompt, system_prompt=system_prompt)
llm_end = time.time()
print(f"LLM generation took {llm_end - llm_start:.2f} seconds")
logging.info(f"LLM generation took {llm_end - llm_start:.2f} seconds")
total_time = time.time() - start_time
print(f"Total consult_oracle execution took {total_time:.2f} seconds")
logging.info(f"Total consult_oracle execution took {total_time:.2f} seconds")
return response.choices[0].message.content
return output
def paperless_workflow(input):
@@ -175,34 +180,59 @@ def consult_simba_oracle(input: str):
)
def filter_indexed_files(docs):
with sqlite3.connect("visited.db") as conn:
c = conn.cursor()
c.execute(
"CREATE TABLE IF NOT EXISTS indexed_documents (id INTEGER PRIMARY KEY AUTOINCREMENT, paperless_id INTEGER)"
)
c.execute("SELECT paperless_id FROM indexed_documents")
rows = c.fetchall()
conn.commit()
visited = {row[0] for row in rows}
return [doc for doc in docs if doc["id"] not in visited]
if __name__ == "__main__":
args = parser.parse_args()
if args.reindex:
print("Fetching documents from Paperless-NGX")
with sqlite3.connect("./visited.db") as conn:
c = conn.cursor()
c.execute("DELETE FROM indexed_documents")
logging.info("Fetching documents from Paperless-NGX")
ppngx = PaperlessNGXService()
docs = ppngx.get_data()
print(docs)
print(f"Fetched {len(docs)} documents")
#
print("Chunking documents now ...")
chunk_data(docs, collection=simba_docs)
print("Done chunking documents")
# index_using_pdf_llm()
docs = filter_indexed_files(docs)
logging.info(f"Fetched {len(docs)} documents")
if args.index:
with open(args.index) as file:
extension = args.index.split(".")[-1]
# Delete all chromadb data
ids = simba_docs.get(ids=None, limit=None, offset=0)
all_ids = ids["ids"]
if len(all_ids) > 0:
simba_docs.delete(ids=all_ids)
if extension == "pdf":
pdf_path = ppngx.download_pdf_from_id(id=document_id)
image_paths = pdf_to_image(filepath=pdf_path)
print(f"summarizing {file}")
generated_summary = summarize_pdf_image(filepaths=image_paths)
elif extension in [".md", ".txt"]:
chunk_text(texts=[file.readall()], collection=simba_docs)
# Chunk documents
logging.info("Chunking documents now ...")
tag_lookup = ppngx.get_tags()
doctype_lookup = ppngx.get_doctypes()
chunk_data(docs, collection=simba_docs, doctypes=doctype_lookup)
logging.info("Done chunking documents")
# if args.index:
# with open(args.index) as file:
# extension = args.index.split(".")[-1]
# if extension == "pdf":
# pdf_path = ppngx.download_pdf_from_id(id=document_id)
# image_paths = pdf_to_image(filepath=pdf_path)
# print(f"summarizing {file}")
# generated_summary = summarize_pdf_image(filepaths=image_paths)
# elif extension in [".md", ".txt"]:
# chunk_text(texts=[file.readall()], collection=simba_docs)
if args.query:
print("Consulting oracle ...")
logging.info("Consulting oracle ...")
print(
consult_oracle(
input=args.query,
@@ -210,4 +240,4 @@ if __name__ == "__main__":
)
)
else:
print("please provide a query")
logging.info("please provide a query")

View File

@@ -16,4 +16,12 @@ dependencies = [
"pymupdf>=1.24.0",
"black>=25.9.0",
"pillow-heif>=1.1.1",
"flask-jwt-extended>=4.7.1",
"bcrypt>=5.0.0",
"pony>=0.7.19",
"flask-login>=0.6.3",
"quart>=0.20.0",
"tortoise-orm>=0.25.1",
"quart-jwt-extended>=0.1.0",
"pre-commit>=4.3.0",
]

View File

@@ -2,14 +2,16 @@ import json
import os
from typing import Literal
import datetime
from ollama import chat, ChatResponse, Client
from ollama import Client
from openai import OpenAI
from pydantic import BaseModel, Field
# Configure ollama client with URL from environment or default to localhost
ollama_client = Client(host=os.getenv("OLLAMA_URL", "http://localhost:11434"))
ollama_client = Client(
host=os.getenv("OLLAMA_URL", "http://localhost:11434"), timeout=10.0
)
# This uses inferred filters — which means using LLM to create the metadata filters
@@ -38,6 +40,20 @@ class Time(BaseModel):
time: int
DOCTYPE_OPTIONS = [
"Bill",
"Image Description",
"Insurance",
"Medical Record",
"Documentation",
"Letter",
]
class DocumentType(BaseModel):
type: list[str] = Field(description="type of document", enum=DOCTYPE_OPTIONS)
PROMPT = """
You are an information specialist that processes user queries. The current year is 2025. The user queries are all about
a cat, Simba, and its records. The types of records are listed below. Using the query, extract the
@@ -45,7 +61,6 @@ the date range the user is trying to query. You should return it as a JSON. The
If the created_date cannot be ascertained, set it to epoch time start.
You have several operators at your disposal:
- $gt: greater than
- $gte: greater than or equal
@@ -84,6 +99,19 @@ Only return the extracted metadata fields. Make sure the extracted metadata fiel
"""
DOCTYPE_PROMPT = f"""You are an information specialist that processes user queries. A query can have two tags attached from the following options. Based on the query, determine which of the following options is most appropriate: {",".join(DOCTYPE_OPTIONS)}
### Example 1
Query: "Who is Simba's current vet?"
Tags: ["Bill", "Medical Record"]
### Example 2
Query: "Who does Simba know?"
Tags: ["Letter", "Documentation"]
"""
class QueryGenerator:
def __init__(self) -> None:
pass
@@ -101,20 +129,43 @@ class QueryGenerator:
return date.timestamp()
def get_doctype_query(self, input: str):
client = OpenAI()
response = client.chat.completions.create(
messages=[
{
"role": "system",
"content": "You are an information specialist that is really good at deciding what tags a query should have",
},
{"role": "user", "content": DOCTYPE_PROMPT + " " + input},
],
model="gpt-4o",
response_format={
"type": "json_schema",
"json_schema": {
"name": "document_type",
"schema": DocumentType.model_json_schema(),
},
},
)
response_json_str = response.choices[0].message.content
type_data = json.loads(response_json_str)
metadata_query = {"document_type": {"$in": type_data["type"]}}
return metadata_query
def get_query(self, input: str):
client = OpenAI()
print(input)
response = client.responses.parse(
model="gpt-4o",
input=[
{"role": "system", "content": PROMPT},
{"role": "user", "content": input},
],
text_format=Time,
text_format=GeneratedQuery,
)
print(response)
print(response.output)
query = json.loads(response.output_parsed.extracted_metadata_fields)
# response: ChatResponse = ollama_client.chat(
# model="gemma3n:e4b",
# messages=[
@@ -127,17 +178,17 @@ class QueryGenerator:
# query = json.loads(
# json.loads(response["message"]["content"])["extracted_metadata_fields"]
# )
date_key = list(query["created_date"].keys())[0]
query["created_date"][date_key] = self.date_to_epoch(
query["created_date"][date_key]
)
# date_key = list(query["created_date"].keys())[0]
# query["created_date"][date_key] = self.date_to_epoch(
# query["created_date"][date_key]
# )
if "$" not in date_key:
query["created_date"]["$" + date_key] = query["created_date"][date_key]
# if "$" not in date_key:
# query["created_date"]["$" + date_key] = query["created_date"][date_key]
return query
if __name__ == "__main__":
qg = QueryGenerator()
print(qg.get_query("How heavy is Simba?"))
print(qg.get_doctype_query("How heavy is Simba?"))

Binary file not shown.

View File

@@ -1,91 +1,204 @@
import { useState } from "react";
import { useEffect, useState } from "react";
import axios from "axios";
import ReactMarkdown from "react-markdown";
import "./App.css";
type QuestionAnswer = {
question: string;
answer: string;
};
type QuestionBubbleProps = {
text: string;
};
type AnswerBubbleProps = {
text: string;
loading: string;
};
type QuestionAnswerPairProps = {
question: string;
answer: string;
loading: boolean;
};
type Conversation = {
title: string;
id: string;
};
type Message = {
text: string;
speaker: "simba" | "user";
};
type ConversationMenuProps = {
conversations: Conversation[];
};
const ConversationMenu = ({ conversations }: ConversationMenuProps) => {
return (
<div className="absolute bg-white w-md rounded-md shadow-xl m-4 p-4">
<p className="py-2 px-4 rounded-md w-full text-xl font-bold">askSimba!</p>
{conversations.map((conversation) => (
<p className="py-2 px-4 rounded-md hover:bg-stone-200 w-full text-xl font-bold cursor-pointer">
{conversation.title}
</p>
))}
</div>
);
};
const QuestionBubble = ({ text }: QuestionBubbleProps) => {
return <div className="rounded-md bg-stone-200 p-3">🤦: {text}</div>;
};
const AnswerBubble = ({ text, loading }: AnswerBubbleProps) => {
return (
<div className="rounded-md bg-orange-100 p-3">
{loading ? (
<div className="flex flex-col w-full animate-pulse gap-2">
<div className="flex flex-row gap-2 w-full">
<div className="bg-gray-400 w-1/2 p-3 rounded-lg" />
<div className="bg-gray-400 w-1/2 p-3 rounded-lg" />
</div>
<div className="flex flex-row gap-2 w-full">
<div className="bg-gray-400 w-1/3 p-3 rounded-lg" />
<div className="bg-gray-400 w-2/3 p-3 rounded-lg" />
</div>
</div>
) : (
<div className="flex flex-col">
<ReactMarkdown>{"🐈: " + text}</ReactMarkdown>
</div>
)}
</div>
);
};
const QuestionAnswerPair = ({
question,
answer,
loading,
}: QuestionAnswerPairProps) => {
return (
<div className="flex flex-col gap-4">
<QuestionBubble text={question} />
<AnswerBubble text={answer} loading={loading} />
</div>
);
};
const App = () => {
const [query, setQuery] = useState<string>("");
const [answer, setAnswer] = useState<string>("");
const [loading, setLoading] = useState<boolean>(false);
const [simbaMode, setSimbaMode] = useState<boolean>(false);
const [query, setQuery] = useState<string>("");
const [answer, setAnswer] = useState<string>("");
const [simbaMode, setSimbaMode] = useState<boolean>(false);
const [questionsAnswers, setQuestionsAnswers] = useState<QuestionAnswer[]>(
[],
);
const [messages, setMessages] = useState<Message[]>([]);
const [conversations, setConversations] = useState<Conversation[]>([
{ title: "simba meow meow", id: "uuid" },
]);
const simbaAnswers = ["meow.", "hiss...", "purrrrrr", "yowOWROWWowowr"];
const simbaAnswers = ["meow.", "hiss...", "purrrrrr", "yowOWROWWowowr"];
const handleQuestionSubmit = () => {
if (simbaMode) {
console.log("simba mode activated");
setLoading(true);
const randomIndex = Math.floor(Math.random() * simbaAnswers.length);
const randomElement = simbaAnswers[randomIndex];
setAnswer(randomElement);
setTimeout(() => setLoading(false), 3500);
return;
}
const payload = { query: query };
setLoading(true);
axios
.post("/api/query", payload)
.then((result) => setAnswer(result.data.response))
.finally(() => setLoading(false));
};
const handleQueryChange = (event) => {
setQuery(event.target.value);
};
return (
<div className="bg-[url('./simba_cute.jpeg')] bg-cover bg-center bg-no-repeat h-screen bg-opacity-20">
<div className="bg-white/85 h-screen">
<div className="flex flex-row justify-center py-4">
<div className="flex flex-col gap-4 min-w-xl max-w-xl">
<div className="flex flex-row justify-center gap-2 grow">
<h1 className="text-3xl">ask simba!</h1>
</div>
<div className="flex flex-row justify-between gap-2 grow">
<textarea
type="text"
className="p-4 border border-blue-200 rounded-md grow bg-white"
onChange={handleQueryChange}
/>
</div>
<div className="flex flex-row justify-between gap-2 grow">
<button
className="p-4 border border-blue-400 bg-blue-200 hover:bg-blue-400 cursor-pointer rounded-md flex-grow"
onClick={() => handleQuestionSubmit()}
type="submit"
>
Submit
</button>
</div>
<div className="flex flex-row justify-center gap-2 grow">
<input
type="checkbox"
onChange={(event) =>
setSimbaMode(event.target.checked)
}
/>
<p>simba mode?</p>
</div>
{loading ? (
<div className="flex flex-col w-full animate-pulse gap-2">
<div className="flex flex-row gap-2 w-full">
<div className="bg-gray-400 w-1/2 p-3 rounded-lg" />
<div className="bg-gray-400 w-1/2 p-3 rounded-lg" />
</div>
<div className="flex flex-row gap-2 w-full">
<div className="bg-gray-400 w-1/3 p-3 rounded-lg" />
<div className="bg-gray-400 w-2/3 p-3 rounded-lg" />
</div>
</div>
) : (
<div className="flex flex-col">
<ReactMarkdown>{answer}</ReactMarkdown>
</div>
)}
</div>
</div>
</div>
</div>
);
useEffect(() => {
axios.get("/api/messages").then((result) => {
setMessages(
result.data.messages.map((message) => {
return {
text: message.text,
speaker: message.speaker,
};
}),
);
});
}, []);
const handleQuestionSubmit = () => {
let currMessages = messages.concat([{ text: query, speaker: "user" }]);
setMessages(currMessages);
if (simbaMode) {
console.log("simba mode activated");
const randomIndex = Math.floor(Math.random() * simbaAnswers.length);
const randomElement = simbaAnswers[randomIndex];
setAnswer(randomElement);
setQuestionsAnswers(
questionsAnswers.concat([
{
question: query,
answer: randomElement,
},
]),
);
return;
}
const payload = { query: query };
axios.post("/api/query", payload).then((result) => {
setQuestionsAnswers(
questionsAnswers.concat([
{ question: query, answer: result.data.response },
]),
);
setMessages(
currMessages.concat([{ text: result.data.response, speaker: "simba" }]),
);
});
};
const handleQueryChange = (event) => {
setQuery(event.target.value);
};
return (
<div className="h-screen bg-opacity-20">
<div className="bg-white/85 h-screen">
<div className="flex flex-row justify-center py-4">
<div className="flex flex-col gap-4 min-w-xl max-w-xl">
<header className="flex flex-row justify-center gap-2 grow sticky top-0 z-10 bg-white">
<h1 className="text-3xl">ask simba!</h1>
</header>
{/*{questionsAnswers.map((qa) => (
<QuestionAnswerPair question={qa.question} answer={qa.answer} />
))}*/}
{messages.map((msg) => {
if (msg.speaker == "simba") {
return <AnswerBubble text={msg.text} loading="" />;
}
return <QuestionBubble text={msg.text} />;
})}
<footer className="flex flex-col gap-2 sticky bottom-0">
<div className="flex flex-row justify-between gap-2 grow">
<textarea
type="text"
className="p-4 border border-blue-200 rounded-md grow bg-white"
onChange={handleQueryChange}
/>
</div>
<div className="flex flex-row justify-between gap-2 grow">
<button
className="p-4 border border-blue-400 bg-blue-200 hover:bg-blue-400 cursor-pointer rounded-md flex-grow"
onClick={() => handleQuestionSubmit()}
type="submit"
>
Submit
</button>
</div>
<div className="flex flex-row justify-center gap-2 grow">
<input
type="checkbox"
onChange={(event) => setSimbaMode(event.target.checked)}
/>
<p>simba mode?</p>
</div>
</footer>
</div>
</div>
</div>
</div>
);
};
export default App;

View File

@@ -1,23 +1,35 @@
import os
import tempfile
import httpx
import logging
from dotenv import load_dotenv
load_dotenv()
logging.basicConfig(level=logging.INFO)
class PaperlessNGXService:
def __init__(self):
self.base_url = os.getenv("BASE_URL")
self.token = os.getenv("PAPERLESS_TOKEN")
self.url = f"http://{os.getenv('BASE_URL')}/api/documents/?query=simba"
self.url = f"http://{os.getenv('BASE_URL')}/api/documents/?tags__id=8"
self.headers = {"Authorization": f"Token {os.getenv('PAPERLESS_TOKEN')}"}
def get_data(self):
print(f"Getting data from: {self.url}")
r = httpx.get(self.url, headers=self.headers)
return r.json()["results"]
results = r.json()["results"]
nextLink = r.json().get("next")
while nextLink:
r = httpx.get(nextLink, headers=self.headers)
results += r.json()["results"]
nextLink = r.json().get("next")
return results
def get_doc_by_id(self, doc_id: int):
url = f"http://{os.getenv('BASE_URL')}/api/documents/{doc_id}/"
@@ -45,17 +57,29 @@ class PaperlessNGXService:
def upload_description(self, description_filepath, file, title, exif_date: str):
POST_URL = f"http://{os.getenv('BASE_URL')}/api/documents/post_document/"
files = {'document': ('description_filepath', file, 'application/txt')}
files = {"document": ("description_filepath", file, "application/txt")}
data = {
"title": title,
"create": exif_date,
"document_type": 3
"tags": [7]
"title": title,
"create": exif_date,
"document_type": 3,
"tags": [7],
}
r= httpx.post(POST_URL, headers=self.headers, data=data, files=files)
r = httpx.post(POST_URL, headers=self.headers, data=data, files=files)
r.raise_for_status()
def get_tags(self):
GET_URL = f"http://{os.getenv('BASE_URL')}/api/tags/"
r = httpx.get(GET_URL, headers=self.headers)
data = r.json()
return {tag["id"]: tag["name"] for tag in data["results"]}
def get_doctypes(self):
GET_URL = f"http://{os.getenv('BASE_URL')}/api/document_types/"
r = httpx.get(GET_URL, headers=self.headers)
data = r.json()
return {doctype["id"]: doctype["name"] for doctype in data["results"]}
if __name__ == "__main__":
pp = PaperlessNGXService()

291
uv.lock generated
View File

@@ -2,6 +2,27 @@ version = 1
revision = 2
requires-python = ">=3.13"
[[package]]
name = "aiofiles"
version = "25.1.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/41/c3/534eac40372d8ee36ef40df62ec129bee4fdb5ad9706e58a29be53b2c970/aiofiles-25.1.0.tar.gz", hash = "sha256:a8d728f0a29de45dc521f18f07297428d56992a742f0cd2701ba86e44d23d5b2", size = 46354, upload-time = "2025-10-09T20:51:04.358Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/bc/8a/340a1555ae33d7354dbca4faa54948d76d89a27ceef032c8c3bc661d003e/aiofiles-25.1.0-py3-none-any.whl", hash = "sha256:abe311e527c862958650f9438e859c1fa7568a141b22abcd015e120e86a85695", size = 14668, upload-time = "2025-10-09T20:51:03.174Z" },
]
[[package]]
name = "aiosqlite"
version = "0.21.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/13/7d/8bca2bf9a247c2c5dfeec1d7a5f40db6518f88d314b8bca9da29670d2671/aiosqlite-0.21.0.tar.gz", hash = "sha256:131bb8056daa3bc875608c631c678cda73922a2d4ba8aec373b19f18c17e7aa3", size = 13454, upload-time = "2025-02-03T07:30:16.235Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/f5/10/6c25ed6de94c49f88a91fa5018cb4c0f3625f31d5be9f771ebe5cc7cd506/aiosqlite-0.21.0-py3-none-any.whl", hash = "sha256:2549cf4057f95f53dcba16f2b64e8e2791d7e1adedb13197dd8ed77bb226d7d0", size = 15792, upload-time = "2025-02-03T07:30:13.6Z" },
]
[[package]]
name = "annotated-types"
version = "0.7.0"
@@ -170,6 +191,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/e5/48/1549795ba7742c948d2ad169c1c8cdbae65bc450d6cd753d124b17c8cd32/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5", size = 161216, upload-time = "2025-08-03T03:07:45.777Z" },
]
[[package]]
name = "cfgv"
version = "3.4.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560", size = 7114, upload-time = "2023-08-12T20:38:17.776Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249, upload-time = "2023-08-12T20:38:16.269Z" },
]
[[package]]
name = "charset-normalizer"
version = "3.4.3"
@@ -276,6 +306,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934", size = 46018, upload-time = "2021-06-11T10:22:42.561Z" },
]
[[package]]
name = "distlib"
version = "0.4.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/96/8e/709914eb2b5749865801041647dc7f4e6d00b549cfe88b65ca192995f07c/distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d", size = 614605, upload-time = "2025-07-17T16:52:00.465Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" },
]
[[package]]
name = "distro"
version = "1.9.0"
@@ -320,6 +359,33 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ec/f9/7f9263c5695f4bd0023734af91bedb2ff8209e8de6ead162f35d8dc762fd/flask-3.1.2-py3-none-any.whl", hash = "sha256:ca1d8112ec8a6158cc29ea4858963350011b5c846a414cdb7a954aa9e967d03c", size = 103308, upload-time = "2025-08-19T21:03:19.499Z" },
]
[[package]]
name = "flask-jwt-extended"
version = "4.7.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "flask" },
{ name = "pyjwt" },
{ name = "werkzeug" },
]
sdist = { url = "https://files.pythonhosted.org/packages/51/16/96b101f18cba17ecce3225ab07bc4c8f23e6befd8552dbbed87482e7c7fb/flask_jwt_extended-4.7.1.tar.gz", hash = "sha256:8085d6757505b6f3291a2638c84d207e8f0ad0de662d1f46aa2f77e658a0c976", size = 34411, upload-time = "2024-11-20T23:44:41.044Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/67/34/9a91da47b1565811ab4aa5fb134632c8d1757960bfa7d457f486947c4d75/Flask_JWT_Extended-4.7.1-py2.py3-none-any.whl", hash = "sha256:52f35bf0985354d7fb7b876e2eb0e0b141aaff865a22ff6cc33d9a18aa987978", size = 22588, upload-time = "2024-11-20T23:44:39.435Z" },
]
[[package]]
name = "flask-login"
version = "0.6.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "flask" },
{ name = "werkzeug" },
]
sdist = { url = "https://files.pythonhosted.org/packages/c3/6e/2f4e13e373bb49e68c02c51ceadd22d172715a06716f9299d9df01b6ddb2/Flask-Login-0.6.3.tar.gz", hash = "sha256:5e23d14a607ef12806c699590b89d0f0e0d67baeec599d75947bf9c147330333", size = 48834, upload-time = "2023-10-30T14:53:21.151Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/59/f5/67e9cc5c2036f58115f9fe0f00d203cf6780c3ff8ae0e705e7a9d9e8ff9e/Flask_Login-0.6.3-py3-none-any.whl", hash = "sha256:849b25b82a436bf830a054e74214074af59097171562ab10bfa999e6b78aae5d", size = 17303, upload-time = "2023-10-30T14:53:19.636Z" },
]
[[package]]
name = "flatbuffers"
version = "25.9.23"
@@ -404,6 +470,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
]
[[package]]
name = "h2"
version = "4.3.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "hpack" },
{ name = "hyperframe" },
]
sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026, upload-time = "2025-08-23T18:12:19.778Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" },
]
[[package]]
name = "hf-xet"
version = "1.1.10"
@@ -419,6 +498,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ee/0e/471f0a21db36e71a2f1752767ad77e92d8cde24e974e03d662931b1305ec/hf_xet-1.1.10-cp37-abi3-win_amd64.whl", hash = "sha256:5f54b19cc347c13235ae7ee98b330c26dd65ef1df47e5316ffb1e87713ca7045", size = 2804691, upload-time = "2025-09-12T20:10:28.433Z" },
]
[[package]]
name = "hpack"
version = "4.1.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276, upload-time = "2025-01-22T21:44:58.347Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" },
]
[[package]]
name = "httpcore"
version = "1.0.9"
@@ -493,6 +581,39 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/f0/0f/310fb31e39e2d734ccaa2c0fb981ee41f7bd5056ce9bc29b2248bd569169/humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477", size = 86794, upload-time = "2021-09-17T21:40:39.897Z" },
]
[[package]]
name = "hypercorn"
version = "0.17.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "h11" },
{ name = "h2" },
{ name = "priority" },
{ name = "wsproto" },
]
sdist = { url = "https://files.pythonhosted.org/packages/7e/3a/df6c27642e0dcb7aff688ca4be982f0fb5d89f2afd3096dc75347c16140f/hypercorn-0.17.3.tar.gz", hash = "sha256:1b37802ee3ac52d2d85270700d565787ab16cf19e1462ccfa9f089ca17574165", size = 44409, upload-time = "2024-05-28T20:55:53.06Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/0e/3b/dfa13a8d96aa24e40ea74a975a9906cfdc2ab2f4e3b498862a57052f04eb/hypercorn-0.17.3-py3-none-any.whl", hash = "sha256:059215dec34537f9d40a69258d323f56344805efb462959e727152b0aa504547", size = 61742, upload-time = "2024-05-28T20:55:48.829Z" },
]
[[package]]
name = "hyperframe"
version = "6.1.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566, upload-time = "2025-01-22T21:41:49.302Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" },
]
[[package]]
name = "identify"
version = "2.6.15"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/ff/e7/685de97986c916a6d93b3876139e00eef26ad5bbbd61925d670ae8013449/identify-2.6.15.tar.gz", hash = "sha256:e4f4864b96c6557ef2a1e1c951771838f4edc9df3a72ec7118b338801b11c7bf", size = 99311, upload-time = "2025-10-02T17:43:40.631Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/0f/1c/e5fd8f973d4f375adb21565739498e2e9a1e54c858a97b9a8ccfdc81da9b/identify-2.6.15-py2.py3-none-any.whl", hash = "sha256:1181ef7608e00704db228516541eb83a88a9f94433a8c80bb9b5bd54b1d81757", size = 99183, upload-time = "2025-10-02T17:43:39.137Z" },
]
[[package]]
name = "idna"
version = "3.10"
@@ -523,6 +644,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/a4/ed/1f1afb2e9e7f38a545d628f864d562a5ae64fe6f7a10e28ffb9b185b4e89/importlib_resources-6.5.2-py3-none-any.whl", hash = "sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec", size = 37461, upload-time = "2025-01-03T18:51:54.306Z" },
]
[[package]]
name = "iso8601"
version = "2.1.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/b9/f3/ef59cee614d5e0accf6fd0cbba025b93b272e626ca89fb70a3e9187c5d15/iso8601-2.1.0.tar.gz", hash = "sha256:6b1d3829ee8921c4301998c909f7829fa9ed3cbdac0d3b16af2d743aed1ba8df", size = 6522, upload-time = "2023-10-03T00:25:39.317Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/6c/0c/f37b6a241f0759b7653ffa7213889d89ad49a2b76eb2ddf3b57b2738c347/iso8601-2.1.0-py3-none-any.whl", hash = "sha256:aac4145c4dcb66ad8b648a02830f5e2ff6c24af20f4f482689be402db2429242", size = 7545, upload-time = "2023-10-03T00:25:32.304Z" },
]
[[package]]
name = "itsdangerous"
version = "2.2.0"
@@ -783,6 +913,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
]
[[package]]
name = "nodeenv"
version = "1.9.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437, upload-time = "2024-06-04T18:44:11.171Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" },
]
[[package]]
name = "numpy"
version = "2.3.3"
@@ -1130,6 +1269,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/40/4b/2028861e724d3bd36227adfa20d3fd24c3fc6d52032f4a93c133be5d17ce/platformdirs-4.4.0-py3-none-any.whl", hash = "sha256:abd01743f24e5287cd7a5db3752faf1a2d65353f38ec26d98e25a6db65958c85", size = 18654, upload-time = "2025-08-26T14:32:02.735Z" },
]
[[package]]
name = "pony"
version = "0.7.19"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/60/59/ab6542afa95de0d5a16545ce6ce683960352cfd9a2318722593b81a98123/pony-0.7.19.tar.gz", hash = "sha256:f7f83b2981893e49f7f18e8def52ad8fa8f8e6c5f9583b9aaed62d4d85036a0f", size = 258589, upload-time = "2024-08-27T12:29:29.963Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/16/cb/0ef8429024309fe6f5edf1debc7cf63adeaeb34b2242490cc18710658abd/pony-0.7.19-py3-none-any.whl", hash = "sha256:5112b4cf40d3f24e93ae66dc5ab7dc6813388efa870e750928d60dc699873cf5", size = 317259, upload-time = "2024-08-27T12:29:28.247Z" },
]
[[package]]
name = "posthog"
version = "5.4.0"
@@ -1146,6 +1294,31 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/4f/98/e480cab9a08d1c09b1c59a93dade92c1bb7544826684ff2acbfd10fcfbd4/posthog-5.4.0-py3-none-any.whl", hash = "sha256:284dfa302f64353484420b52d4ad81ff5c2c2d1d607c4e2db602ac72761831bd", size = 105364, upload-time = "2025-06-20T23:19:22.001Z" },
]
[[package]]
name = "pre-commit"
version = "4.3.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "cfgv" },
{ name = "identify" },
{ name = "nodeenv" },
{ name = "pyyaml" },
{ name = "virtualenv" },
]
sdist = { url = "https://files.pythonhosted.org/packages/ff/29/7cf5bbc236333876e4b41f56e06857a87937ce4bf91e117a6991a2dbb02a/pre_commit-4.3.0.tar.gz", hash = "sha256:499fe450cc9d42e9d58e606262795ecb64dd05438943c62b66f6a8673da30b16", size = 193792, upload-time = "2025-08-09T18:56:14.651Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/5b/a5/987a405322d78a73b66e39e4a90e4ef156fd7141bf71df987e50717c321b/pre_commit-4.3.0-py2.py3-none-any.whl", hash = "sha256:2b0747ad7e6e967169136edffee14c16e148a778a54e4f967921aa1ebf2308d8", size = 220965, upload-time = "2025-08-09T18:56:13.192Z" },
]
[[package]]
name = "priority"
version = "2.0.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f5/3c/eb7c35f4dcede96fca1842dac5f4f5d15511aa4b52f3a961219e68ae9204/priority-2.0.0.tar.gz", hash = "sha256:c965d54f1b8d0d0b19479db3924c7c36cf672dbf2aec92d43fbdaf4492ba18c0", size = 24792, upload-time = "2021-06-27T10:15:05.487Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/5e/5f/82c8074f7e84978129347c2c6ec8b6c59f3584ff1a20bc3c940a3e061790/priority-2.0.0-py3-none-any.whl", hash = "sha256:6f8eefce5f3ad59baf2c080a664037bb4725cd0a790d53d59ab4059288faf6aa", size = 8946, upload-time = "2021-06-27T10:15:03.856Z" },
]
[[package]]
name = "protobuf"
version = "6.32.1"
@@ -1325,6 +1498,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
]
[[package]]
name = "pyjwt"
version = "2.10.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785, upload-time = "2024-11-28T03:43:29.933Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997, upload-time = "2024-11-28T03:43:27.893Z" },
]
[[package]]
name = "pymupdf"
version = "1.26.4"
@@ -1346,6 +1528,15 @@ version = "0.48.9"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/c7/2c/94ed7b91db81d61d7096ac8f2d325ec562fc75e35f3baea8749c85b28784/PyPika-0.48.9.tar.gz", hash = "sha256:838836a61747e7c8380cd1b7ff638694b7a7335345d0f559b04b2cd832ad5378", size = 67259, upload-time = "2022-03-15T11:22:57.066Z" }
[[package]]
name = "pypika-tortoise"
version = "0.6.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/1a/7b/0a31165e22e599ba149ba35d4323d343205a70d91a4f6e8c6565f5b4fa08/pypika_tortoise-0.6.2.tar.gz", hash = "sha256:f95ab59d9b6454db2e8daa0934728458350a1f3d56e81d9d1debc8eebeff26b3", size = 80522, upload-time = "2025-09-02T03:56:33.986Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/67/cf/2d47236c80d6deea85e76c86b959f0ec24369c16db691c6266f7a20ff4bd/pypika_tortoise-0.6.2-py3-none-any.whl", hash = "sha256:425462b02ede0a5ed7b812ec12427419927ed6b19282c55667d1cbc9a440d3cb", size = 46919, upload-time = "2025-09-02T03:56:32.771Z" },
]
[[package]]
name = "pyproject-hooks"
version = "1.2.0"
@@ -1394,6 +1585,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/60/e5/63bed382f6a7a5ba70e7e132b8b7b8abbcf4888ffa6be4877698dcfbed7d/pytokens-0.1.10-py3-none-any.whl", hash = "sha256:db7b72284e480e69fb085d9f251f66b3d2df8b7166059261258ff35f50fb711b", size = 12046, upload-time = "2025-02-19T14:51:18.694Z" },
]
[[package]]
name = "pytz"
version = "2025.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload-time = "2025-03-25T02:25:00.538Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" },
]
[[package]]
name = "pyyaml"
version = "6.0.3"
@@ -1430,37 +1630,87 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
]
[[package]]
name = "quart"
version = "0.20.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "aiofiles" },
{ name = "blinker" },
{ name = "click" },
{ name = "flask" },
{ name = "hypercorn" },
{ name = "itsdangerous" },
{ name = "jinja2" },
{ name = "markupsafe" },
{ name = "werkzeug" },
]
sdist = { url = "https://files.pythonhosted.org/packages/1d/9d/12e1143a5bd2ccc05c293a6f5ae1df8fd94a8fc1440ecc6c344b2b30ce13/quart-0.20.0.tar.gz", hash = "sha256:08793c206ff832483586f5ae47018c7e40bdd75d886fee3fabbdaa70c2cf505d", size = 63874, upload-time = "2024-12-23T13:53:05.664Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/7e/e9/cc28f21f52913adf333f653b9e0a3bf9cb223f5083a26422968ba73edd8d/quart-0.20.0-py3-none-any.whl", hash = "sha256:003c08f551746710acb757de49d9b768986fd431517d0eb127380b656b98b8f1", size = 77960, upload-time = "2024-12-23T13:53:02.842Z" },
]
[[package]]
name = "quart-jwt-extended"
version = "0.1.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pyjwt" },
{ name = "quart" },
{ name = "six" },
{ name = "werkzeug" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/60/12/e7b37b6dad958470248041f3626ea039337e258cec33ce25e3f316329791/Quart_JWT_Extended-0.1.0-py3-none-any.whl", hash = "sha256:422f04f317a76dc614a55ce01c945534e28a30c4e6e09e746f11f160a618a9f7", size = 22652, upload-time = "2022-10-03T12:53:05.451Z" },
]
[[package]]
name = "raggr"
version = "0.1.0"
source = { virtual = "." }
dependencies = [
{ name = "bcrypt" },
{ name = "black" },
{ name = "chromadb" },
{ name = "flask" },
{ name = "flask-jwt-extended" },
{ name = "flask-login" },
{ name = "httpx" },
{ name = "ollama" },
{ name = "openai" },
{ name = "pillow" },
{ name = "pillow-heif" },
{ name = "pony" },
{ name = "pre-commit" },
{ name = "pydantic" },
{ name = "pymupdf" },
{ name = "python-dotenv" },
{ name = "quart" },
{ name = "quart-jwt-extended" },
{ name = "tortoise-orm" },
]
[package.metadata]
requires-dist = [
{ name = "bcrypt", specifier = ">=5.0.0" },
{ name = "black", specifier = ">=25.9.0" },
{ name = "chromadb", specifier = ">=1.1.0" },
{ name = "flask", specifier = ">=3.1.2" },
{ name = "flask-jwt-extended", specifier = ">=4.7.1" },
{ name = "flask-login", specifier = ">=0.6.3" },
{ name = "httpx", specifier = ">=0.28.1" },
{ name = "ollama", specifier = ">=0.6.0" },
{ name = "openai", specifier = ">=2.0.1" },
{ name = "pillow", specifier = ">=10.0.0" },
{ name = "pillow-heif", specifier = ">=1.1.1" },
{ name = "pony", specifier = ">=0.7.19" },
{ name = "pre-commit", specifier = ">=4.3.0" },
{ name = "pydantic", specifier = ">=2.11.9" },
{ name = "pymupdf", specifier = ">=1.24.0" },
{ name = "python-dotenv", specifier = ">=1.0.0" },
{ name = "quart", specifier = ">=0.20.0" },
{ name = "quart-jwt-extended", specifier = ">=0.1.0" },
{ name = "tortoise-orm", specifier = ">=0.25.1" },
]
[[package]]
@@ -1668,6 +1918,21 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/b3/46/e33a8c93907b631a99377ef4c5f817ab453d0b34f93529421f42ff559671/tokenizers-0.22.1-cp39-abi3-win_amd64.whl", hash = "sha256:65fd6e3fb11ca1e78a6a93602490f134d1fdeb13bcef99389d5102ea318ed138", size = 2674684, upload-time = "2025-09-19T09:49:24.953Z" },
]
[[package]]
name = "tortoise-orm"
version = "0.25.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "aiosqlite" },
{ name = "iso8601", marker = "python_full_version < '4.0'" },
{ name = "pypika-tortoise", marker = "python_full_version < '4.0'" },
{ name = "pytz" },
]
sdist = { url = "https://files.pythonhosted.org/packages/d7/9b/de966810021fa773fead258efd8deea2bb73bb12479e27f288bd8ceb8763/tortoise_orm-0.25.1.tar.gz", hash = "sha256:4d5bfd13d5750935ffe636a6b25597c5c8f51c47e5b72d7509d712eda1a239fe", size = 128341, upload-time = "2025-06-05T10:43:31.058Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/70/55/2bda7f4445f4c07b734385b46d1647a388d05160cf5b8714a713e8709378/tortoise_orm-0.25.1-py3-none-any.whl", hash = "sha256:df0ef7e06eb0650a7e5074399a51ee6e532043308c612db2cac3882486a3fd9f", size = 167723, upload-time = "2025-06-05T10:43:29.309Z" },
]
[[package]]
name = "tqdm"
version = "4.67.1"
@@ -1763,6 +2028,20 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/63/9a/0962b05b308494e3202d3f794a6e85abe471fe3cafdbcf95c2e8c713aabd/uvloop-0.21.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a5c39f217ab3c663dc699c04cbd50c13813e31d917642d459fdcec07555cc553", size = 4660018, upload-time = "2024-10-14T23:38:10.888Z" },
]
[[package]]
name = "virtualenv"
version = "20.35.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "distlib" },
{ name = "filelock" },
{ name = "platformdirs" },
]
sdist = { url = "https://files.pythonhosted.org/packages/a4/d5/b0ccd381d55c8f45d46f77df6ae59fbc23d19e901e2d523395598e5f4c93/virtualenv-20.35.3.tar.gz", hash = "sha256:4f1a845d131133bdff10590489610c98c168ff99dc75d6c96853801f7f67af44", size = 6002907, upload-time = "2025-10-10T21:23:33.178Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/27/73/d9a94da0e9d470a543c1b9d3ccbceb0f59455983088e727b8a1824ed90fb/virtualenv-20.35.3-py3-none-any.whl", hash = "sha256:63d106565078d8c8d0b206d48080f938a8b25361e19432d2c9db40d2899c810a", size = 5981061, upload-time = "2025-10-10T21:23:30.433Z" },
]
[[package]]
name = "watchfiles"
version = "1.1.0"
@@ -1858,6 +2137,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/52/24/ab44c871b0f07f491e5d2ad12c9bd7358e527510618cb1b803a88e986db1/werkzeug-3.1.3-py3-none-any.whl", hash = "sha256:54b78bf3716d19a65be4fceccc0d1d7b89e608834989dfae50ea87564639213e", size = 224498, upload-time = "2024-11-08T15:52:16.132Z" },
]
[[package]]
name = "wsproto"
version = "1.2.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "h11" },
]
sdist = { url = "https://files.pythonhosted.org/packages/c9/4a/44d3c295350d776427904d73c189e10aeae66d7f555bb2feee16d1e4ba5a/wsproto-1.2.0.tar.gz", hash = "sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065", size = 53425, upload-time = "2022-08-23T19:58:21.447Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/78/58/e860788190eba3bcce367f74d29c4675466ce8dddfba85f7827588416f01/wsproto-1.2.0-py3-none-any.whl", hash = "sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736", size = 24226, upload-time = "2022-08-23T19:58:19.96Z" },
]
[[package]]
name = "zipp"
version = "3.23.0"