adding image processing pipeline immich -> paperless

This commit is contained in:
2025-10-04 08:54:10 -04:00
parent 24b30bc8a3
commit 0bb3e3172b
11 changed files with 380 additions and 78 deletions

21
app.py
View File

@@ -4,34 +4,41 @@ from flask import Flask, request, jsonify, render_template, send_from_directory
from main import consult_simba_oracle from main import consult_simba_oracle
app = Flask(__name__, static_folder="raggr-frontend/dist/static", template_folder="raggr-frontend/dist") app = Flask(
__name__,
static_folder="raggr-frontend/dist/static",
template_folder="raggr-frontend/dist",
)
# Serve React static files # Serve React static files
@app.route('/static/<path:filename>') @app.route("/static/<path:filename>")
def static_files(filename): def static_files(filename):
return send_from_directory(app.static_folder, filename) return send_from_directory(app.static_folder, filename)
# Serve the React app for all routes (catch-all) # Serve the React app for all routes (catch-all)
@app.route('/', defaults={'path': ''}) @app.route("/", defaults={"path": ""})
@app.route('/<path:path>') @app.route("/<path:path>")
def serve_react_app(path): def serve_react_app(path):
if path and os.path.exists(os.path.join(app.template_folder, path)): if path and os.path.exists(os.path.join(app.template_folder, path)):
return send_from_directory(app.template_folder, path) return send_from_directory(app.template_folder, path)
return render_template('index.html') return render_template("index.html")
@app.route("/api/query", methods=["POST"]) @app.route("/api/query", methods=["POST"])
def query(): def query():
data = request.get_json() data = request.get_json()
query = data.get("query") query = data.get("query")
return jsonify({"response": consult_simba_oracle(query)}) return jsonify({"response": consult_simba_oracle(query)})
@app.route("/api/ingest", methods=["POST"]) @app.route("/api/ingest", methods=["POST"])
def webhook(): def webhook():
data = request.get_json() data = request.get_json()
print(data) print(data)
return jsonify({"status": "received"}) return jsonify({"status": "received"})
if __name__ == "__main__": if __name__ == "__main__":
app.run(host="0.0.0.0", port=8080, debug=True) app.run(host="0.0.0.0", port=8080, debug=True)

81
image_process.py Normal file
View File

@@ -0,0 +1,81 @@
from ollama import Client
import argparse
import os
import logging
from PIL import Image, ExifTags
from pillow_heif import register_heif_opener
from pydantic import BaseModel
from dotenv import load_dotenv
load_dotenv()
register_heif_opener()
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser(
prog="SimbaImageProcessor",
description="What the program does",
epilog="Text at the bottom of help",
)
parser.add_argument("filepath")
client = Client(host=os.getenv("OLLAMA_HOST", "http://localhost:11434"))
class SimbaImageDescription(BaseModel):
image_date: str
description: str
def describe_simba_image(input):
logging.info("Opening image of Simba ...")
if "heic" in input.lower() or "heif" in input.lower():
new_filepath = input.split(".")[0] + ".jpg"
img = Image.open(input)
img.save(new_filepath, 'JPEG')
logging.info("Extracting EXIF...")
exif = {
ExifTags.TAGS[k]: v for k, v in img.getexif().items() if k in ExifTags.TAGS
}
img = Image.open(new_filepath)
input=new_filepath
else:
img = Image.open(input)
logging.info("Extracting EXIF...")
exif = {
ExifTags.TAGS[k]: v for k, v in img.getexif().items() if k in ExifTags.TAGS
}
if "MakerNote" in exif:
exif.pop("MakerNote")
logging.info(exif)
prompt = f"Simba is an orange cat belonging to Ryan Chen. In 2025, they lived in New York. In 2024, they lived in California. Analyze the following image and tell me what Simba seems to be doing. Be extremely descriptive about Simba, things in the background, and the setting of the image. I will also include the EXIF data of the image, please use it to help you determine information about Simba. EXIF: {exif}. Put the notes in the description field and the date in the image_date field."
logging.info("Sending info to Ollama ...")
response = client.chat(
model="gemma3:4b",
messages=[
{
"role": "system",
"content": "you are a very shrewd and descriptive note taker. all of your responses will be formatted like notes in bullet points. be very descriptive. do not leave a single thing out.",
},
{"role": "user", "content": prompt, "images": [input]},
],
format=SimbaImageDescription.model_json_schema()
)
result = SimbaImageDescription.model_validate_json(response["message"]["content"])
return result
if __name__ == "__main__":
args = parser.parse_args()
if args.filepath:
logging.info
describe_simba_image(input=args.filepath)

98
index_immich.py Normal file
View File

@@ -0,0 +1,98 @@
import httpx
import os
from pathlib import Path
import logging
import tempfile
from image_process import describe_simba_image
from request import PaperlessNGXService
logging.basicConfig(level=logging.INFO)
from dotenv import load_dotenv
load_dotenv()
# Configuration from environment variables
IMMICH_URL = os.getenv("IMMICH_URL", "http://localhost:2283")
API_KEY = os.getenv("IMMICH_API_KEY")
PERSON_NAME = os.getenv("PERSON_NAME", "Simba") # Name of the tagged person/pet
DOWNLOAD_DIR = os.getenv("DOWNLOAD_DIR", "./simba_photos")
# Set up headers
headers = {"x-api-key": API_KEY, "Content-Type": "application/json"}
if __name__ == "__main__":
ppngx = PaperlessNGXService()
people_url = f"{IMMICH_URL}/api/search/person?name=Simba"
people = httpx.get(people_url, headers=headers).json()
simba_id = people[0]["id"]
ids = {}
asset_search = f"{IMMICH_URL}/api/search/smart"
request_body = {"query": "orange cat"}
results = httpx.post(asset_search, headers=headers, json=request_body)
assets = results.json()["assets"]
for asset in assets["items"]:
if asset["type"] == "IMAGE":
ids[asset["id"]] = asset.get("originalFileName")
nextPage = assets.get("nextPage")
# while nextPage != None:
# logging.info(f"next page: {nextPage}")
# request_body["page"] = nextPage
# results = httpx.post(asset_search, headers=headers, json=request_body)
# assets = results.json()["assets"]
# for asset in assets["items"]:
# if asset["type"] == "IMAGE":
# ids.add(asset['id'])
# nextPage = assets.get("nextPage")
asset_search = f"{IMMICH_URL}/api/search/smart"
request_body = {"query": "simba"}
results = httpx.post(asset_search, headers=headers, json=request_body)
print(results.json()["assets"]["total"])
for asset in results.json()["assets"]["items"]:
if asset["type"] == "IMAGE":
ids[asset["id"]] = asset.get("originalFileName")
immich_asset_id = list(ids.keys())[1]
immich_filename = ids.get(immich_asset_id)
response = httpx.get(
f"{IMMICH_URL}/api/assets/{immich_asset_id}/original", headers=headers
)
path = os.path.join("/Users/ryanchen/Programs/raggr", immich_filename)
file = open(path, "wb+")
for chunk in response.iter_bytes(chunk_size=8192):
file.write(chunk)
logging.info("Processing image ...")
description = describe_simba_image(path)
image_description = description.description
image_date = description.image_date
description_filepath = os.path.join("/Users/ryanchen/Programs/raggr", f"SIMBA_DESCRIBE_001.txt")
file = open(description_filepath, "w+")
file.write(image_description)
file.close()
file = open(description_filepath, 'rb')
ppngx.upload_description(description_filepath=description_filepath, file=file, title="SIMBA_DESCRIBE_001.txt", exif_date=image_date)
file.close()
logging.info("Processing complete. Deleting file.")
os.remove(file.name)

43
main.py
View File

@@ -33,14 +33,13 @@ parser.add_argument("query", type=str, help="questions about simba's health")
parser.add_argument( parser.add_argument(
"--reindex", action="store_true", help="re-index the simba documents" "--reindex", action="store_true", help="re-index the simba documents"
) )
parser.add_argument( parser.add_argument("--index", help="index a file")
"--index", help="index a file"
)
ppngx = PaperlessNGXService() ppngx = PaperlessNGXService()
openai_client = OpenAI() openai_client = OpenAI()
def index_using_pdf_llm(): def index_using_pdf_llm():
files = ppngx.get_data() files = ppngx.get_data()
for file in files: for file in files:
@@ -79,14 +78,15 @@ def chunk_data(docs: list[dict[str, Union[str, Any]]], collection):
for index, text in enumerate(texts): for index, text in enumerate(texts):
print(docs[index]["original_file_name"]) print(docs[index]["original_file_name"])
metadata = { metadata = {
"created_date": date_to_epoch(docs[index]["created_date"]), "created_date": date_to_epoch(docs[index]["created_date"]),
"filename": docs[index]["original_file_name"] "filename": docs[index]["original_file_name"],
} }
chunker.chunk_document( chunker.chunk_document(
document=text, document=text,
metadata=metadata, metadata=metadata,
) )
def chunk_text(texts: list[str], collection): def chunk_text(texts: list[str], collection):
chunker = Chunker(collection) chunker = Chunker(collection)
@@ -97,9 +97,11 @@ def chunk_text(texts: list[str], collection):
metadata=metadata, metadata=metadata,
) )
def consult_oracle(input: str, collection): def consult_oracle(input: str, collection):
print(input) print(input)
import time import time
start_time = time.time() start_time = time.time()
# Ask # Ask
@@ -122,7 +124,7 @@ def consult_oracle(input: str, collection):
results = collection.query( results = collection.query(
query_texts=[input], query_texts=[input],
query_embeddings=embeddings, query_embeddings=embeddings,
#where=metadata_filter, # where=metadata_filter,
) )
print(results) print(results)
query_end = time.time() query_end = time.time()
@@ -132,15 +134,21 @@ def consult_oracle(input: str, collection):
print("Starting LLM generation") print("Starting LLM generation")
llm_start = time.time() llm_start = time.time()
# output = ollama_client.generate( # output = ollama_client.generate(
# model="gemma3n:e4b", # model="gemma3n:e4b",
# prompt=f"You are a helpful assistant that understandings veterinary terms. Using the following data, help answer the user's query by providing as many details as possible. Using this data: {results}. Respond to this prompt: {input}", # prompt=f"You are a helpful assistant that understandings veterinary terms. Using the following data, help answer the user's query by providing as many details as possible. Using this data: {results}. Respond to this prompt: {input}",
# ) # )
response = openai_client.chat.completions.create( response = openai_client.chat.completions.create(
model="gpt-4o-mini", model="gpt-4o-mini",
messages=[ messages=[
{"role": "system", "content": "You are a helpful assistant that understands veterinary terms."}, {
{"role": "user", "content": f"Using the following data, help answer the user's query by providing as many details as possible. Using this data: {results}. Respond to this prompt: {input}"} "role": "system",
] "content": "You are a helpful assistant that understands veterinary terms.",
},
{
"role": "user",
"content": f"Using the following data, help answer the user's query by providing as many details as possible. Using this data: {results}. Respond to this prompt: {input}",
},
],
) )
llm_end = time.time() llm_end = time.time()
print(f"LLM generation took {llm_end - llm_start:.2f} seconds") print(f"LLM generation took {llm_end - llm_start:.2f} seconds")
@@ -181,7 +189,6 @@ if __name__ == "__main__":
print("Done chunking documents") print("Done chunking documents")
# index_using_pdf_llm() # index_using_pdf_llm()
if args.index: if args.index:
with open(args.index) as file: with open(args.index) as file:
extension = args.index.split(".")[-1] extension = args.index.split(".")[-1]
@@ -196,11 +203,11 @@ if __name__ == "__main__":
if args.query: if args.query:
print("Consulting oracle ...") print("Consulting oracle ...")
print(consult_oracle( print(
input=args.query, consult_oracle(
collection=simba_docs, input=args.query,
)) collection=simba_docs,
)
)
else: else:
print("please provide a query") print("please provide a query")

View File

@@ -14,4 +14,6 @@ dependencies = [
"pydantic>=2.11.9", "pydantic>=2.11.9",
"pillow>=10.0.0", "pillow>=10.0.0",
"pymupdf>=1.24.0", "pymupdf>=1.24.0",
"black>=25.9.0",
"pillow-heif>=1.1.1",
] ]

View File

@@ -33,9 +33,11 @@ class GeneratedQuery(BaseModel):
fields: list[str] fields: list[str]
extracted_metadata_fields: str extracted_metadata_fields: str
class Time(BaseModel): class Time(BaseModel):
time: int time: int
PROMPT = """ PROMPT = """
You are an information specialist that processes user queries. The current year is 2025. The user queries are all about You are an information specialist that processes user queries. The current year is 2025. The user queries are all about
a cat, Simba, and its records. The types of records are listed below. Using the query, extract the a cat, Simba, and its records. The types of records are listed below. Using the query, extract the
@@ -114,16 +116,16 @@ class QueryGenerator:
query = json.loads(response.output_parsed.extracted_metadata_fields) query = json.loads(response.output_parsed.extracted_metadata_fields)
# response: ChatResponse = ollama_client.chat( # response: ChatResponse = ollama_client.chat(
# model="gemma3n:e4b", # model="gemma3n:e4b",
# messages=[ # messages=[
# {"role": "system", "content": PROMPT}, # {"role": "system", "content": PROMPT},
# {"role": "user", "content": input}, # {"role": "user", "content": input},
# ], # ],
# format=GeneratedQuery.model_json_schema(), # format=GeneratedQuery.model_json_schema(),
# ) # )
# query = json.loads( # query = json.loads(
# json.loads(response["message"]["content"])["extracted_metadata_fields"] # json.loads(response["message"]["content"])["extracted_metadata_fields"]
# ) # )
date_key = list(query["created_date"].keys())[0] date_key = list(query["created_date"].keys())[0]
query["created_date"][date_key] = self.date_to_epoch( query["created_date"][date_key] = self.date_to_epoch(

View File

@@ -33,50 +33,56 @@ const App = () => {
setQuery(event.target.value); setQuery(event.target.value);
}; };
return ( return (
<div className="flex flex-row justify-center py-4"> <div className="bg-[url('./simba_cute.jpeg')] bg-cover bg-center bg-no-repeat h-screen bg-opacity-20">
<div className="flex flex-col gap-4 min-w-xl max-w-xl"> <div className="bg-white/85 h-screen">
<div className="flex flex-row justify-center gap-2 grow"> <div className="flex flex-row justify-center py-4">
<h1 className="text-3xl">ask simba!</h1> <div className="flex flex-col gap-4 min-w-xl max-w-xl">
</div> <div className="flex flex-row justify-center gap-2 grow">
<div className="flex flex-row justify-between gap-2 grow"> <h1 className="text-3xl">ask simba!</h1>
<textarea
type="text"
className="p-4 border border-blue-200 rounded-md grow"
onChange={handleQueryChange}
/>
</div>
<div className="flex flex-row justify-between gap-2 grow">
<button
className="p-4 border border-blue-400 bg-blue-200 hover:bg-blue-400 cursor-pointer rounded-md flex-grow"
onClick={() => handleQuestionSubmit()}
type="submit"
>
Submit
</button>
</div>
<div className="flex flex-row justify-center gap-2 grow">
<input
type="checkbox"
onChange={(event) => setSimbaMode(event.target.checked)}
/>
<p>simba mode?</p>
</div>
{loading ? (
<div className="flex flex-col w-full animate-pulse gap-2">
<div className="flex flex-row gap-2 w-full">
<div className="bg-gray-400 w-1/2 p-3 rounded-lg" />
<div className="bg-gray-400 w-1/2 p-3 rounded-lg" />
</div> </div>
<div className="flex flex-row gap-2 w-full"> <div className="flex flex-row justify-between gap-2 grow">
<div className="bg-gray-400 w-1/3 p-3 rounded-lg" /> <textarea
<div className="bg-gray-400 w-2/3 p-3 rounded-lg" /> type="text"
className="p-4 border border-blue-200 rounded-md grow bg-white"
onChange={handleQueryChange}
/>
</div> </div>
<div className="flex flex-row justify-between gap-2 grow">
<button
className="p-4 border border-blue-400 bg-blue-200 hover:bg-blue-400 cursor-pointer rounded-md flex-grow"
onClick={() => handleQuestionSubmit()}
type="submit"
>
Submit
</button>
</div>
<div className="flex flex-row justify-center gap-2 grow">
<input
type="checkbox"
onChange={(event) =>
setSimbaMode(event.target.checked)
}
/>
<p>simba mode?</p>
</div>
{loading ? (
<div className="flex flex-col w-full animate-pulse gap-2">
<div className="flex flex-row gap-2 w-full">
<div className="bg-gray-400 w-1/2 p-3 rounded-lg" />
<div className="bg-gray-400 w-1/2 p-3 rounded-lg" />
</div>
<div className="flex flex-row gap-2 w-full">
<div className="bg-gray-400 w-1/3 p-3 rounded-lg" />
<div className="bg-gray-400 w-2/3 p-3 rounded-lg" />
</div>
</div>
) : (
<div className="flex flex-col">
<ReactMarkdown>{answer}</ReactMarkdown>
</div>
)}
</div> </div>
) : ( </div>
<div className="flex flex-col">
<ReactMarkdown>{answer}</ReactMarkdown>
</div>
)}
</div> </div>
</div> </div>
); );

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 MiB

View File

@@ -11,8 +11,8 @@ class PaperlessNGXService:
def __init__(self): def __init__(self):
self.base_url = os.getenv("BASE_URL") self.base_url = os.getenv("BASE_URL")
self.token = os.getenv("PAPERLESS_TOKEN") self.token = os.getenv("PAPERLESS_TOKEN")
self.url = f"http://{os.getenv("BASE_URL")}/api/documents/?query=simba" self.url = f"http://{os.getenv('BASE_URL')}/api/documents/?query=simba"
self.headers = {"Authorization": f"Token {os.getenv("PAPERLESS_TOKEN")}"} self.headers = {"Authorization": f"Token {os.getenv('PAPERLESS_TOKEN')}"}
def get_data(self): def get_data(self):
print(f"Getting data from: {self.url}") print(f"Getting data from: {self.url}")
@@ -20,12 +20,12 @@ class PaperlessNGXService:
return r.json()["results"] return r.json()["results"]
def get_doc_by_id(self, doc_id: int): def get_doc_by_id(self, doc_id: int):
url = f"http://{os.getenv("BASE_URL")}/api/documents/{doc_id}/" url = f"http://{os.getenv('BASE_URL')}/api/documents/{doc_id}/"
r = httpx.get(url, headers=self.headers) r = httpx.get(url, headers=self.headers)
return r.json() return r.json()
def download_pdf_from_id(self, id: int) -> str: def download_pdf_from_id(self, id: int) -> str:
download_url = f"http://{os.getenv("BASE_URL")}/api/documents/{id}/download/" download_url = f"http://{os.getenv('BASE_URL')}/api/documents/{id}/download/"
response = httpx.get( response = httpx.get(
download_url, headers=self.headers, follow_redirects=True, timeout=30 download_url, headers=self.headers, follow_redirects=True, timeout=30
) )
@@ -39,10 +39,23 @@ class PaperlessNGXService:
return pdf_to_process return pdf_to_process
def upload_cleaned_content(self, document_id, data): def upload_cleaned_content(self, document_id, data):
PUTS_URL = f"http://{os.getenv("BASE_URL")}/api/documents/{document_id}/" PUTS_URL = f"http://{os.getenv('BASE_URL')}/api/documents/{document_id}/"
r = httpx.put(PUTS_URL, headers=self.headers, data=data) r = httpx.put(PUTS_URL, headers=self.headers, data=data)
r.raise_for_status() r.raise_for_status()
def upload_description(self, description_filepath, file, title, exif_date: str):
POST_URL = f"http://{os.getenv('BASE_URL')}/api/documents/post_document/"
files = {'document': ('description_filepath', file, 'application/txt')}
data = {
"title": title,
"create": exif_date,
"document_type": 3
"tags": [7]
}
r= httpx.post(POST_URL, headers=self.headers, data=data, files=files)
r.raise_for_status()
if __name__ == "__main__": if __name__ == "__main__":
pp = PaperlessNGXService() pp = PaperlessNGXService()

86
uv.lock generated
View File

@@ -108,6 +108,27 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/27/44/d2ef5e87509158ad2187f4dd0852df80695bb1ee0cfe0a684727b01a69e0/bcrypt-5.0.0-cp39-abi3-win_arm64.whl", hash = "sha256:f2347d3534e76bf50bca5500989d6c1d05ed64b440408057a37673282c654927", size = 144953, upload-time = "2025-09-25T19:50:37.32Z" }, { url = "https://files.pythonhosted.org/packages/27/44/d2ef5e87509158ad2187f4dd0852df80695bb1ee0cfe0a684727b01a69e0/bcrypt-5.0.0-cp39-abi3-win_arm64.whl", hash = "sha256:f2347d3534e76bf50bca5500989d6c1d05ed64b440408057a37673282c654927", size = 144953, upload-time = "2025-09-25T19:50:37.32Z" },
] ]
[[package]]
name = "black"
version = "25.9.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "click" },
{ name = "mypy-extensions" },
{ name = "packaging" },
{ name = "pathspec" },
{ name = "platformdirs" },
{ name = "pytokens" },
]
sdist = { url = "https://files.pythonhosted.org/packages/4b/43/20b5c90612d7bdb2bdbcceeb53d588acca3bb8f0e4c5d5c751a2c8fdd55a/black-25.9.0.tar.gz", hash = "sha256:0474bca9a0dd1b51791fcc507a4e02078a1c63f6d4e4ae5544b9848c7adfb619", size = 648393, upload-time = "2025-09-19T00:27:37.758Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/48/99/3acfea65f5e79f45472c45f87ec13037b506522719cd9d4ac86484ff51ac/black-25.9.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0172a012f725b792c358d57fe7b6b6e8e67375dd157f64fa7a3097b3ed3e2175", size = 1742165, upload-time = "2025-09-19T00:34:10.402Z" },
{ url = "https://files.pythonhosted.org/packages/3a/18/799285282c8236a79f25d590f0222dbd6850e14b060dfaa3e720241fd772/black-25.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3bec74ee60f8dfef564b573a96b8930f7b6a538e846123d5ad77ba14a8d7a64f", size = 1581259, upload-time = "2025-09-19T00:32:49.685Z" },
{ url = "https://files.pythonhosted.org/packages/f1/ce/883ec4b6303acdeca93ee06b7622f1fa383c6b3765294824165d49b1a86b/black-25.9.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b756fc75871cb1bcac5499552d771822fd9db5a2bb8db2a7247936ca48f39831", size = 1655583, upload-time = "2025-09-19T00:30:44.505Z" },
{ url = "https://files.pythonhosted.org/packages/21/17/5c253aa80a0639ccc427a5c7144534b661505ae2b5a10b77ebe13fa25334/black-25.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:846d58e3ce7879ec1ffe816bb9df6d006cd9590515ed5d17db14e17666b2b357", size = 1343428, upload-time = "2025-09-19T00:32:13.839Z" },
{ url = "https://files.pythonhosted.org/packages/1b/46/863c90dcd3f9d41b109b7f19032ae0db021f0b2a81482ba0a1e28c84de86/black-25.9.0-py3-none-any.whl", hash = "sha256:474b34c1342cdc157d307b56c4c65bce916480c4a8f6551fdc6bf9b486a7c4ae", size = 203363, upload-time = "2025-09-19T00:27:35.724Z" },
]
[[package]] [[package]]
name = "blinker" name = "blinker"
version = "1.9.0" version = "1.9.0"
@@ -753,6 +774,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" }, { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" },
] ]
[[package]]
name = "mypy-extensions"
version = "1.1.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
]
[[package]] [[package]]
name = "numpy" name = "numpy"
version = "2.3.3" version = "2.3.3"
@@ -1002,6 +1032,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
] ]
[[package]]
name = "pathspec"
version = "0.12.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" },
]
[[package]] [[package]]
name = "pillow" name = "pillow"
version = "11.3.0" version = "11.3.0"
@@ -1057,6 +1096,40 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/89/c7/5572fa4a3f45740eaab6ae86fcdf7195b55beac1371ac8c619d880cfe948/pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa", size = 2512835, upload-time = "2025-07-01T09:15:50.399Z" }, { url = "https://files.pythonhosted.org/packages/89/c7/5572fa4a3f45740eaab6ae86fcdf7195b55beac1371ac8c619d880cfe948/pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa", size = 2512835, upload-time = "2025-07-01T09:15:50.399Z" },
] ]
[[package]]
name = "pillow-heif"
version = "1.1.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pillow" },
]
sdist = { url = "https://files.pythonhosted.org/packages/64/65/77284daf2a8a2849b9040889bd8e1b845e693ed97973a28ba2122b8922ad/pillow_heif-1.1.1.tar.gz", hash = "sha256:f60e8c8a8928556104cec4fff39d43caa1da105625bdb53b11ce3c89d09b6bde", size = 18271952, upload-time = "2025-09-30T16:42:24.485Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c2/1d/2ea075d537b4ac9f5fb0c53fd543a764f5f1dee1fe6bea8fb5b34018cf94/pillow_heif-1.1.1-cp313-cp313-macosx_10_15_x86_64.whl", hash = "sha256:8269cae2e0232f73bda5128181a42fbbb562c29b76fbcced22fef70a61b94dbe", size = 4696826, upload-time = "2025-09-30T16:41:35.281Z" },
{ url = "https://files.pythonhosted.org/packages/2e/6f/860fab6d6e6f04f13b97a8d9150816fa16feb3f7a2fe2d8ab4b460adc711/pillow_heif-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:08787cc64b4a519789a348f137b914981ce520d4b906e09e2b8e974c87e3e215", size = 3451076, upload-time = "2025-09-30T16:41:36.506Z" },
{ url = "https://files.pythonhosted.org/packages/b1/9a/711f77c7c6e00fa1ae0e890a36a5be03c47170b6cbb88fc92761bee0fff5/pillow_heif-1.1.1-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ac0fc8523a77c351991f78766d41290241dd87fbe036b6f777c49f2bd3561119", size = 5773389, upload-time = "2025-09-30T16:41:37.65Z" },
{ url = "https://files.pythonhosted.org/packages/50/c8/50e2d1adede807dc1d3b35f2cfa28d7f8e73e9d56cb560dc94b1d7053b75/pillow_heif-1.1.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18db6c78b8fa52065339ffb69739f5c45748c0b5f836349f0aba786f7bb905ab", size = 5504774, upload-time = "2025-09-30T16:41:38.849Z" },
{ url = "https://files.pythonhosted.org/packages/03/c2/0fa0ebaaec2a7e548989b84a2561300137d9999fc780d24ad7d6d4ef9417/pillow_heif-1.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c5db8a8ee7ee4b1311f81d223d32538d63a73adc2ece7610a9f19519856c8e68", size = 6809350, upload-time = "2025-09-30T16:41:40.455Z" },
{ url = "https://files.pythonhosted.org/packages/85/cc/b0eee2b939a362dcccb96483f8d172b64df192ec93445103be04634255c8/pillow_heif-1.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a2e6d4f7209aade2d55bbbcdbbbe623118722bcc7a12edef15cf4ee0d8586c3e", size = 6431750, upload-time = "2025-09-30T16:41:42.418Z" },
{ url = "https://files.pythonhosted.org/packages/dd/0b/a559ad48a5c03db5ecdc7c8b8dd04df3cb1072c0f983bcaebd26e1e63442/pillow_heif-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:ff158ff338082d39864abd31c69ae2ee57de3f193c85ccbe365f4d7260712229", size = 5422353, upload-time = "2025-09-30T16:41:44.15Z" },
{ url = "https://files.pythonhosted.org/packages/8e/f8/6c3fd8a28ea16236d40d6885b3babd801a2e7bdb73ee52a293eb34de7afc/pillow_heif-1.1.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:7f19389ffeb3866f95370eb917e6a32706c110a9fa670daefb63b5660948a82e", size = 4696793, upload-time = "2025-09-30T16:41:45.744Z" },
{ url = "https://files.pythonhosted.org/packages/28/09/f2ffdac98465d00b6244c4333d2f73e815351beb6fa1d22f489797f0411c/pillow_heif-1.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8d5fa5539ff3c7bbe64aa446c10bf10f6d8c1604997a66b195bec02e2965eb10", size = 3451160, upload-time = "2025-09-30T16:41:47.111Z" },
{ url = "https://files.pythonhosted.org/packages/84/93/a801624eb86e8e0a2a6212a10da6c69beb7060be569fe36100d96a2d9e2c/pillow_heif-1.1.1-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f9b08c81602ffd660cd27456fbfa3cbf396cf23bb39d3015cc7a6cd56ade82fd", size = 5773568, upload-time = "2025-09-30T16:41:48.65Z" },
{ url = "https://files.pythonhosted.org/packages/e8/fd/7d619b7b9386abd6228b1465450ea0bb8a3875b1e22c4f9e5bbd598224ae/pillow_heif-1.1.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0f2d68af87d5e1f6af0db021b61f62e456f413eba98ea7723d7f49f2a6f1f01", size = 5504865, upload-time = "2025-09-30T16:41:50.051Z" },
{ url = "https://files.pythonhosted.org/packages/44/72/35b5a8a5cbdcb38968328a8d8f2385f38328141dca6dc52d8e192a36e256/pillow_heif-1.1.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e486b15696a958a04178aa9ff7f7db4f803d1ec7bbded924671576125c052ed5", size = 6809536, upload-time = "2025-09-30T16:41:51.309Z" },
{ url = "https://files.pythonhosted.org/packages/23/70/fc0e0cc6b864f53be2833b23cadd1d1a480a51d6b2d5efd5c4c119e8112e/pillow_heif-1.1.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a37999e53c0cd32401667303e0b34c43240c32530809827091fabc7eb04d7cad", size = 6431784, upload-time = "2025-09-30T16:41:52.602Z" },
{ url = "https://files.pythonhosted.org/packages/96/0e/af38e5cbca622fceaa1ee8eba8e68b3c6bf1bd6e6a37eca3817bf3dcebdc/pillow_heif-1.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:1d35e973b2463b03f7b0bd5c898c7a424a46d69f7c20a9c251b322dfe4f45068", size = 5577269, upload-time = "2025-09-30T16:41:54.206Z" },
]
[[package]]
name = "platformdirs"
version = "4.4.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/23/e8/21db9c9987b0e728855bd57bff6984f67952bea55d6f75e055c46b5383e8/platformdirs-4.4.0.tar.gz", hash = "sha256:ca753cf4d81dc309bc67b0ea38fd15dc97bc30ce419a7f58d13eb3bf14c4febf", size = 21634, upload-time = "2025-08-26T14:32:04.268Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/40/4b/2028861e724d3bd36227adfa20d3fd24c3fc6d52032f4a93c133be5d17ce/platformdirs-4.4.0-py3-none-any.whl", hash = "sha256:abd01743f24e5287cd7a5db3752faf1a2d65353f38ec26d98e25a6db65958c85", size = 18654, upload-time = "2025-08-26T14:32:02.735Z" },
]
[[package]] [[package]]
name = "posthog" name = "posthog"
version = "5.4.0" version = "5.4.0"
@@ -1312,6 +1385,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/5f/ed/539768cf28c661b5b068d66d96a2f155c4971a5d55684a514c1a0e0dec2f/python_dotenv-1.1.1-py3-none-any.whl", hash = "sha256:31f23644fe2602f88ff55e1f5c79ba497e01224ee7737937930c448e4d0e24dc", size = 20556, upload-time = "2025-06-24T04:21:06.073Z" }, { url = "https://files.pythonhosted.org/packages/5f/ed/539768cf28c661b5b068d66d96a2f155c4971a5d55684a514c1a0e0dec2f/python_dotenv-1.1.1-py3-none-any.whl", hash = "sha256:31f23644fe2602f88ff55e1f5c79ba497e01224ee7737937930c448e4d0e24dc", size = 20556, upload-time = "2025-06-24T04:21:06.073Z" },
] ]
[[package]]
name = "pytokens"
version = "0.1.10"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/30/5f/e959a442435e24f6fb5a01aec6c657079ceaca1b3baf18561c3728d681da/pytokens-0.1.10.tar.gz", hash = "sha256:c9a4bfa0be1d26aebce03e6884ba454e842f186a59ea43a6d3b25af58223c044", size = 12171, upload-time = "2025-02-19T14:51:22.001Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/60/e5/63bed382f6a7a5ba70e7e132b8b7b8abbcf4888ffa6be4877698dcfbed7d/pytokens-0.1.10-py3-none-any.whl", hash = "sha256:db7b72284e480e69fb085d9f251f66b3d2df8b7166059261258ff35f50fb711b", size = 12046, upload-time = "2025-02-19T14:51:18.694Z" },
]
[[package]] [[package]]
name = "pyyaml" name = "pyyaml"
version = "6.0.3" version = "6.0.3"
@@ -1353,12 +1435,14 @@ name = "raggr"
version = "0.1.0" version = "0.1.0"
source = { virtual = "." } source = { virtual = "." }
dependencies = [ dependencies = [
{ name = "black" },
{ name = "chromadb" }, { name = "chromadb" },
{ name = "flask" }, { name = "flask" },
{ name = "httpx" }, { name = "httpx" },
{ name = "ollama" }, { name = "ollama" },
{ name = "openai" }, { name = "openai" },
{ name = "pillow" }, { name = "pillow" },
{ name = "pillow-heif" },
{ name = "pydantic" }, { name = "pydantic" },
{ name = "pymupdf" }, { name = "pymupdf" },
{ name = "python-dotenv" }, { name = "python-dotenv" },
@@ -1366,12 +1450,14 @@ dependencies = [
[package.metadata] [package.metadata]
requires-dist = [ requires-dist = [
{ name = "black", specifier = ">=25.9.0" },
{ name = "chromadb", specifier = ">=1.1.0" }, { name = "chromadb", specifier = ">=1.1.0" },
{ name = "flask", specifier = ">=3.1.2" }, { name = "flask", specifier = ">=3.1.2" },
{ name = "httpx", specifier = ">=0.28.1" }, { name = "httpx", specifier = ">=0.28.1" },
{ name = "ollama", specifier = ">=0.6.0" }, { name = "ollama", specifier = ">=0.6.0" },
{ name = "openai", specifier = ">=2.0.1" }, { name = "openai", specifier = ">=2.0.1" },
{ name = "pillow", specifier = ">=10.0.0" }, { name = "pillow", specifier = ">=10.0.0" },
{ name = "pillow-heif", specifier = ">=1.1.1" },
{ name = "pydantic", specifier = ">=2.11.9" }, { name = "pydantic", specifier = ">=2.11.9" },
{ name = "pymupdf", specifier = ">=1.24.0" }, { name = "pymupdf", specifier = ">=1.24.0" },
{ name = "python-dotenv", specifier = ">=1.0.0" }, { name = "python-dotenv", specifier = ">=1.0.0" },