adding image processing pipeline immich -> paperless
This commit is contained in:
19
app.py
19
app.py
@@ -4,21 +4,27 @@ from flask import Flask, request, jsonify, render_template, send_from_directory
|
|||||||
|
|
||||||
from main import consult_simba_oracle
|
from main import consult_simba_oracle
|
||||||
|
|
||||||
app = Flask(__name__, static_folder="raggr-frontend/dist/static", template_folder="raggr-frontend/dist")
|
app = Flask(
|
||||||
|
__name__,
|
||||||
|
static_folder="raggr-frontend/dist/static",
|
||||||
|
template_folder="raggr-frontend/dist",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# Serve React static files
|
# Serve React static files
|
||||||
@app.route('/static/<path:filename>')
|
@app.route("/static/<path:filename>")
|
||||||
def static_files(filename):
|
def static_files(filename):
|
||||||
return send_from_directory(app.static_folder, filename)
|
return send_from_directory(app.static_folder, filename)
|
||||||
|
|
||||||
|
|
||||||
# Serve the React app for all routes (catch-all)
|
# Serve the React app for all routes (catch-all)
|
||||||
@app.route('/', defaults={'path': ''})
|
@app.route("/", defaults={"path": ""})
|
||||||
@app.route('/<path:path>')
|
@app.route("/<path:path>")
|
||||||
def serve_react_app(path):
|
def serve_react_app(path):
|
||||||
if path and os.path.exists(os.path.join(app.template_folder, path)):
|
if path and os.path.exists(os.path.join(app.template_folder, path)):
|
||||||
return send_from_directory(app.template_folder, path)
|
return send_from_directory(app.template_folder, path)
|
||||||
return render_template('index.html')
|
return render_template("index.html")
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/query", methods=["POST"])
|
@app.route("/api/query", methods=["POST"])
|
||||||
def query():
|
def query():
|
||||||
@@ -26,12 +32,13 @@ def query():
|
|||||||
query = data.get("query")
|
query = data.get("query")
|
||||||
return jsonify({"response": consult_simba_oracle(query)})
|
return jsonify({"response": consult_simba_oracle(query)})
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/ingest", methods=["POST"])
|
@app.route("/api/ingest", methods=["POST"])
|
||||||
def webhook():
|
def webhook():
|
||||||
data = request.get_json()
|
data = request.get_json()
|
||||||
print(data)
|
print(data)
|
||||||
return jsonify({"status": "received"})
|
return jsonify({"status": "received"})
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
app.run(host="0.0.0.0", port=8080, debug=True)
|
app.run(host="0.0.0.0", port=8080, debug=True)
|
||||||
|
|
||||||
|
|||||||
81
image_process.py
Normal file
81
image_process.py
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
from ollama import Client
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
from PIL import Image, ExifTags
|
||||||
|
from pillow_heif import register_heif_opener
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
register_heif_opener()
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
prog="SimbaImageProcessor",
|
||||||
|
description="What the program does",
|
||||||
|
epilog="Text at the bottom of help",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument("filepath")
|
||||||
|
|
||||||
|
client = Client(host=os.getenv("OLLAMA_HOST", "http://localhost:11434"))
|
||||||
|
|
||||||
|
class SimbaImageDescription(BaseModel):
|
||||||
|
image_date: str
|
||||||
|
description: str
|
||||||
|
|
||||||
|
def describe_simba_image(input):
|
||||||
|
logging.info("Opening image of Simba ...")
|
||||||
|
if "heic" in input.lower() or "heif" in input.lower():
|
||||||
|
new_filepath = input.split(".")[0] + ".jpg"
|
||||||
|
img = Image.open(input)
|
||||||
|
img.save(new_filepath, 'JPEG')
|
||||||
|
logging.info("Extracting EXIF...")
|
||||||
|
exif = {
|
||||||
|
ExifTags.TAGS[k]: v for k, v in img.getexif().items() if k in ExifTags.TAGS
|
||||||
|
}
|
||||||
|
img = Image.open(new_filepath)
|
||||||
|
input=new_filepath
|
||||||
|
else:
|
||||||
|
img = Image.open(input)
|
||||||
|
|
||||||
|
logging.info("Extracting EXIF...")
|
||||||
|
exif = {
|
||||||
|
ExifTags.TAGS[k]: v for k, v in img.getexif().items() if k in ExifTags.TAGS
|
||||||
|
}
|
||||||
|
|
||||||
|
if "MakerNote" in exif:
|
||||||
|
exif.pop("MakerNote")
|
||||||
|
|
||||||
|
logging.info(exif)
|
||||||
|
|
||||||
|
prompt = f"Simba is an orange cat belonging to Ryan Chen. In 2025, they lived in New York. In 2024, they lived in California. Analyze the following image and tell me what Simba seems to be doing. Be extremely descriptive about Simba, things in the background, and the setting of the image. I will also include the EXIF data of the image, please use it to help you determine information about Simba. EXIF: {exif}. Put the notes in the description field and the date in the image_date field."
|
||||||
|
|
||||||
|
logging.info("Sending info to Ollama ...")
|
||||||
|
response = client.chat(
|
||||||
|
model="gemma3:4b",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "you are a very shrewd and descriptive note taker. all of your responses will be formatted like notes in bullet points. be very descriptive. do not leave a single thing out.",
|
||||||
|
},
|
||||||
|
{"role": "user", "content": prompt, "images": [input]},
|
||||||
|
],
|
||||||
|
format=SimbaImageDescription.model_json_schema()
|
||||||
|
)
|
||||||
|
|
||||||
|
result = SimbaImageDescription.model_validate_json(response["message"]["content"])
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
args = parser.parse_args()
|
||||||
|
if args.filepath:
|
||||||
|
logging.info
|
||||||
|
describe_simba_image(input=args.filepath)
|
||||||
98
index_immich.py
Normal file
98
index_immich.py
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
import httpx
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
import logging
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
from image_process import describe_simba_image
|
||||||
|
from request import PaperlessNGXService
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Configuration from environment variables
|
||||||
|
IMMICH_URL = os.getenv("IMMICH_URL", "http://localhost:2283")
|
||||||
|
API_KEY = os.getenv("IMMICH_API_KEY")
|
||||||
|
PERSON_NAME = os.getenv("PERSON_NAME", "Simba") # Name of the tagged person/pet
|
||||||
|
DOWNLOAD_DIR = os.getenv("DOWNLOAD_DIR", "./simba_photos")
|
||||||
|
|
||||||
|
# Set up headers
|
||||||
|
headers = {"x-api-key": API_KEY, "Content-Type": "application/json"}
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
ppngx = PaperlessNGXService()
|
||||||
|
people_url = f"{IMMICH_URL}/api/search/person?name=Simba"
|
||||||
|
people = httpx.get(people_url, headers=headers).json()
|
||||||
|
|
||||||
|
simba_id = people[0]["id"]
|
||||||
|
|
||||||
|
ids = {}
|
||||||
|
|
||||||
|
asset_search = f"{IMMICH_URL}/api/search/smart"
|
||||||
|
request_body = {"query": "orange cat"}
|
||||||
|
results = httpx.post(asset_search, headers=headers, json=request_body)
|
||||||
|
|
||||||
|
assets = results.json()["assets"]
|
||||||
|
for asset in assets["items"]:
|
||||||
|
if asset["type"] == "IMAGE":
|
||||||
|
ids[asset["id"]] = asset.get("originalFileName")
|
||||||
|
nextPage = assets.get("nextPage")
|
||||||
|
|
||||||
|
# while nextPage != None:
|
||||||
|
# logging.info(f"next page: {nextPage}")
|
||||||
|
# request_body["page"] = nextPage
|
||||||
|
# results = httpx.post(asset_search, headers=headers, json=request_body)
|
||||||
|
# assets = results.json()["assets"]
|
||||||
|
|
||||||
|
# for asset in assets["items"]:
|
||||||
|
# if asset["type"] == "IMAGE":
|
||||||
|
# ids.add(asset['id'])
|
||||||
|
|
||||||
|
# nextPage = assets.get("nextPage")
|
||||||
|
|
||||||
|
asset_search = f"{IMMICH_URL}/api/search/smart"
|
||||||
|
request_body = {"query": "simba"}
|
||||||
|
results = httpx.post(asset_search, headers=headers, json=request_body)
|
||||||
|
print(results.json()["assets"]["total"])
|
||||||
|
for asset in results.json()["assets"]["items"]:
|
||||||
|
if asset["type"] == "IMAGE":
|
||||||
|
ids[asset["id"]] = asset.get("originalFileName")
|
||||||
|
|
||||||
|
immich_asset_id = list(ids.keys())[1]
|
||||||
|
immich_filename = ids.get(immich_asset_id)
|
||||||
|
response = httpx.get(
|
||||||
|
f"{IMMICH_URL}/api/assets/{immich_asset_id}/original", headers=headers
|
||||||
|
)
|
||||||
|
|
||||||
|
path = os.path.join("/Users/ryanchen/Programs/raggr", immich_filename)
|
||||||
|
file = open(path, "wb+")
|
||||||
|
for chunk in response.iter_bytes(chunk_size=8192):
|
||||||
|
file.write(chunk)
|
||||||
|
|
||||||
|
logging.info("Processing image ...")
|
||||||
|
description = describe_simba_image(path)
|
||||||
|
|
||||||
|
image_description = description.description
|
||||||
|
image_date = description.image_date
|
||||||
|
|
||||||
|
description_filepath = os.path.join("/Users/ryanchen/Programs/raggr", f"SIMBA_DESCRIBE_001.txt")
|
||||||
|
file = open(description_filepath, "w+")
|
||||||
|
file.write(image_description)
|
||||||
|
file.close()
|
||||||
|
|
||||||
|
file = open(description_filepath, 'rb')
|
||||||
|
|
||||||
|
ppngx.upload_description(description_filepath=description_filepath, file=file, title="SIMBA_DESCRIBE_001.txt", exif_date=image_date)
|
||||||
|
|
||||||
|
|
||||||
|
file.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
logging.info("Processing complete. Deleting file.")
|
||||||
|
os.remove(file.name)
|
||||||
33
main.py
33
main.py
@@ -33,14 +33,13 @@ parser.add_argument("query", type=str, help="questions about simba's health")
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--reindex", action="store_true", help="re-index the simba documents"
|
"--reindex", action="store_true", help="re-index the simba documents"
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument("--index", help="index a file")
|
||||||
"--index", help="index a file"
|
|
||||||
)
|
|
||||||
|
|
||||||
ppngx = PaperlessNGXService()
|
ppngx = PaperlessNGXService()
|
||||||
|
|
||||||
openai_client = OpenAI()
|
openai_client = OpenAI()
|
||||||
|
|
||||||
|
|
||||||
def index_using_pdf_llm():
|
def index_using_pdf_llm():
|
||||||
files = ppngx.get_data()
|
files = ppngx.get_data()
|
||||||
for file in files:
|
for file in files:
|
||||||
@@ -80,13 +79,14 @@ def chunk_data(docs: list[dict[str, Union[str, Any]]], collection):
|
|||||||
print(docs[index]["original_file_name"])
|
print(docs[index]["original_file_name"])
|
||||||
metadata = {
|
metadata = {
|
||||||
"created_date": date_to_epoch(docs[index]["created_date"]),
|
"created_date": date_to_epoch(docs[index]["created_date"]),
|
||||||
"filename": docs[index]["original_file_name"]
|
"filename": docs[index]["original_file_name"],
|
||||||
}
|
}
|
||||||
chunker.chunk_document(
|
chunker.chunk_document(
|
||||||
document=text,
|
document=text,
|
||||||
metadata=metadata,
|
metadata=metadata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def chunk_text(texts: list[str], collection):
|
def chunk_text(texts: list[str], collection):
|
||||||
chunker = Chunker(collection)
|
chunker = Chunker(collection)
|
||||||
|
|
||||||
@@ -97,9 +97,11 @@ def chunk_text(texts: list[str], collection):
|
|||||||
metadata=metadata,
|
metadata=metadata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def consult_oracle(input: str, collection):
|
def consult_oracle(input: str, collection):
|
||||||
print(input)
|
print(input)
|
||||||
import time
|
import time
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
# Ask
|
# Ask
|
||||||
@@ -122,7 +124,7 @@ def consult_oracle(input: str, collection):
|
|||||||
results = collection.query(
|
results = collection.query(
|
||||||
query_texts=[input],
|
query_texts=[input],
|
||||||
query_embeddings=embeddings,
|
query_embeddings=embeddings,
|
||||||
#where=metadata_filter,
|
# where=metadata_filter,
|
||||||
)
|
)
|
||||||
print(results)
|
print(results)
|
||||||
query_end = time.time()
|
query_end = time.time()
|
||||||
@@ -138,9 +140,15 @@ def consult_oracle(input: str, collection):
|
|||||||
response = openai_client.chat.completions.create(
|
response = openai_client.chat.completions.create(
|
||||||
model="gpt-4o-mini",
|
model="gpt-4o-mini",
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "system", "content": "You are a helpful assistant that understands veterinary terms."},
|
{
|
||||||
{"role": "user", "content": f"Using the following data, help answer the user's query by providing as many details as possible. Using this data: {results}. Respond to this prompt: {input}"}
|
"role": "system",
|
||||||
]
|
"content": "You are a helpful assistant that understands veterinary terms.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": f"Using the following data, help answer the user's query by providing as many details as possible. Using this data: {results}. Respond to this prompt: {input}",
|
||||||
|
},
|
||||||
|
],
|
||||||
)
|
)
|
||||||
llm_end = time.time()
|
llm_end = time.time()
|
||||||
print(f"LLM generation took {llm_end - llm_start:.2f} seconds")
|
print(f"LLM generation took {llm_end - llm_start:.2f} seconds")
|
||||||
@@ -181,7 +189,6 @@ if __name__ == "__main__":
|
|||||||
print("Done chunking documents")
|
print("Done chunking documents")
|
||||||
# index_using_pdf_llm()
|
# index_using_pdf_llm()
|
||||||
|
|
||||||
|
|
||||||
if args.index:
|
if args.index:
|
||||||
with open(args.index) as file:
|
with open(args.index) as file:
|
||||||
extension = args.index.split(".")[-1]
|
extension = args.index.split(".")[-1]
|
||||||
@@ -196,11 +203,11 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
if args.query:
|
if args.query:
|
||||||
print("Consulting oracle ...")
|
print("Consulting oracle ...")
|
||||||
print(consult_oracle(
|
print(
|
||||||
|
consult_oracle(
|
||||||
input=args.query,
|
input=args.query,
|
||||||
collection=simba_docs,
|
collection=simba_docs,
|
||||||
))
|
)
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
print("please provide a query")
|
print("please provide a query")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -14,4 +14,6 @@ dependencies = [
|
|||||||
"pydantic>=2.11.9",
|
"pydantic>=2.11.9",
|
||||||
"pillow>=10.0.0",
|
"pillow>=10.0.0",
|
||||||
"pymupdf>=1.24.0",
|
"pymupdf>=1.24.0",
|
||||||
|
"black>=25.9.0",
|
||||||
|
"pillow-heif>=1.1.1",
|
||||||
]
|
]
|
||||||
|
|||||||
2
query.py
2
query.py
@@ -33,9 +33,11 @@ class GeneratedQuery(BaseModel):
|
|||||||
fields: list[str]
|
fields: list[str]
|
||||||
extracted_metadata_fields: str
|
extracted_metadata_fields: str
|
||||||
|
|
||||||
|
|
||||||
class Time(BaseModel):
|
class Time(BaseModel):
|
||||||
time: int
|
time: int
|
||||||
|
|
||||||
|
|
||||||
PROMPT = """
|
PROMPT = """
|
||||||
You are an information specialist that processes user queries. The current year is 2025. The user queries are all about
|
You are an information specialist that processes user queries. The current year is 2025. The user queries are all about
|
||||||
a cat, Simba, and its records. The types of records are listed below. Using the query, extract the
|
a cat, Simba, and its records. The types of records are listed below. Using the query, extract the
|
||||||
|
|||||||
@@ -33,6 +33,8 @@ const App = () => {
|
|||||||
setQuery(event.target.value);
|
setQuery(event.target.value);
|
||||||
};
|
};
|
||||||
return (
|
return (
|
||||||
|
<div className="bg-[url('./simba_cute.jpeg')] bg-cover bg-center bg-no-repeat h-screen bg-opacity-20">
|
||||||
|
<div className="bg-white/85 h-screen">
|
||||||
<div className="flex flex-row justify-center py-4">
|
<div className="flex flex-row justify-center py-4">
|
||||||
<div className="flex flex-col gap-4 min-w-xl max-w-xl">
|
<div className="flex flex-col gap-4 min-w-xl max-w-xl">
|
||||||
<div className="flex flex-row justify-center gap-2 grow">
|
<div className="flex flex-row justify-center gap-2 grow">
|
||||||
@@ -41,7 +43,7 @@ const App = () => {
|
|||||||
<div className="flex flex-row justify-between gap-2 grow">
|
<div className="flex flex-row justify-between gap-2 grow">
|
||||||
<textarea
|
<textarea
|
||||||
type="text"
|
type="text"
|
||||||
className="p-4 border border-blue-200 rounded-md grow"
|
className="p-4 border border-blue-200 rounded-md grow bg-white"
|
||||||
onChange={handleQueryChange}
|
onChange={handleQueryChange}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
@@ -57,7 +59,9 @@ const App = () => {
|
|||||||
<div className="flex flex-row justify-center gap-2 grow">
|
<div className="flex flex-row justify-center gap-2 grow">
|
||||||
<input
|
<input
|
||||||
type="checkbox"
|
type="checkbox"
|
||||||
onChange={(event) => setSimbaMode(event.target.checked)}
|
onChange={(event) =>
|
||||||
|
setSimbaMode(event.target.checked)
|
||||||
|
}
|
||||||
/>
|
/>
|
||||||
<p>simba mode?</p>
|
<p>simba mode?</p>
|
||||||
</div>
|
</div>
|
||||||
@@ -79,6 +83,8 @@ const App = () => {
|
|||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
BIN
raggr-frontend/src/simba_cute.jpeg
Normal file
BIN
raggr-frontend/src/simba_cute.jpeg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.4 MiB |
BIN
raggr-frontend/src/simba_troll.jpeg
Normal file
BIN
raggr-frontend/src/simba_troll.jpeg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 2.1 MiB |
23
request.py
23
request.py
@@ -11,8 +11,8 @@ class PaperlessNGXService:
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.base_url = os.getenv("BASE_URL")
|
self.base_url = os.getenv("BASE_URL")
|
||||||
self.token = os.getenv("PAPERLESS_TOKEN")
|
self.token = os.getenv("PAPERLESS_TOKEN")
|
||||||
self.url = f"http://{os.getenv("BASE_URL")}/api/documents/?query=simba"
|
self.url = f"http://{os.getenv('BASE_URL')}/api/documents/?query=simba"
|
||||||
self.headers = {"Authorization": f"Token {os.getenv("PAPERLESS_TOKEN")}"}
|
self.headers = {"Authorization": f"Token {os.getenv('PAPERLESS_TOKEN')}"}
|
||||||
|
|
||||||
def get_data(self):
|
def get_data(self):
|
||||||
print(f"Getting data from: {self.url}")
|
print(f"Getting data from: {self.url}")
|
||||||
@@ -20,12 +20,12 @@ class PaperlessNGXService:
|
|||||||
return r.json()["results"]
|
return r.json()["results"]
|
||||||
|
|
||||||
def get_doc_by_id(self, doc_id: int):
|
def get_doc_by_id(self, doc_id: int):
|
||||||
url = f"http://{os.getenv("BASE_URL")}/api/documents/{doc_id}/"
|
url = f"http://{os.getenv('BASE_URL')}/api/documents/{doc_id}/"
|
||||||
r = httpx.get(url, headers=self.headers)
|
r = httpx.get(url, headers=self.headers)
|
||||||
return r.json()
|
return r.json()
|
||||||
|
|
||||||
def download_pdf_from_id(self, id: int) -> str:
|
def download_pdf_from_id(self, id: int) -> str:
|
||||||
download_url = f"http://{os.getenv("BASE_URL")}/api/documents/{id}/download/"
|
download_url = f"http://{os.getenv('BASE_URL')}/api/documents/{id}/download/"
|
||||||
response = httpx.get(
|
response = httpx.get(
|
||||||
download_url, headers=self.headers, follow_redirects=True, timeout=30
|
download_url, headers=self.headers, follow_redirects=True, timeout=30
|
||||||
)
|
)
|
||||||
@@ -39,10 +39,23 @@ class PaperlessNGXService:
|
|||||||
return pdf_to_process
|
return pdf_to_process
|
||||||
|
|
||||||
def upload_cleaned_content(self, document_id, data):
|
def upload_cleaned_content(self, document_id, data):
|
||||||
PUTS_URL = f"http://{os.getenv("BASE_URL")}/api/documents/{document_id}/"
|
PUTS_URL = f"http://{os.getenv('BASE_URL')}/api/documents/{document_id}/"
|
||||||
r = httpx.put(PUTS_URL, headers=self.headers, data=data)
|
r = httpx.put(PUTS_URL, headers=self.headers, data=data)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
|
|
||||||
|
def upload_description(self, description_filepath, file, title, exif_date: str):
|
||||||
|
POST_URL = f"http://{os.getenv('BASE_URL')}/api/documents/post_document/"
|
||||||
|
files = {'document': ('description_filepath', file, 'application/txt')}
|
||||||
|
data = {
|
||||||
|
"title": title,
|
||||||
|
"create": exif_date,
|
||||||
|
"document_type": 3
|
||||||
|
"tags": [7]
|
||||||
|
}
|
||||||
|
|
||||||
|
r= httpx.post(POST_URL, headers=self.headers, data=data, files=files)
|
||||||
|
r.raise_for_status()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
pp = PaperlessNGXService()
|
pp = PaperlessNGXService()
|
||||||
|
|||||||
86
uv.lock
generated
86
uv.lock
generated
@@ -108,6 +108,27 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/27/44/d2ef5e87509158ad2187f4dd0852df80695bb1ee0cfe0a684727b01a69e0/bcrypt-5.0.0-cp39-abi3-win_arm64.whl", hash = "sha256:f2347d3534e76bf50bca5500989d6c1d05ed64b440408057a37673282c654927", size = 144953, upload-time = "2025-09-25T19:50:37.32Z" },
|
{ url = "https://files.pythonhosted.org/packages/27/44/d2ef5e87509158ad2187f4dd0852df80695bb1ee0cfe0a684727b01a69e0/bcrypt-5.0.0-cp39-abi3-win_arm64.whl", hash = "sha256:f2347d3534e76bf50bca5500989d6c1d05ed64b440408057a37673282c654927", size = 144953, upload-time = "2025-09-25T19:50:37.32Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "black"
|
||||||
|
version = "25.9.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "click" },
|
||||||
|
{ name = "mypy-extensions" },
|
||||||
|
{ name = "packaging" },
|
||||||
|
{ name = "pathspec" },
|
||||||
|
{ name = "platformdirs" },
|
||||||
|
{ name = "pytokens" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/4b/43/20b5c90612d7bdb2bdbcceeb53d588acca3bb8f0e4c5d5c751a2c8fdd55a/black-25.9.0.tar.gz", hash = "sha256:0474bca9a0dd1b51791fcc507a4e02078a1c63f6d4e4ae5544b9848c7adfb619", size = 648393, upload-time = "2025-09-19T00:27:37.758Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/48/99/3acfea65f5e79f45472c45f87ec13037b506522719cd9d4ac86484ff51ac/black-25.9.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0172a012f725b792c358d57fe7b6b6e8e67375dd157f64fa7a3097b3ed3e2175", size = 1742165, upload-time = "2025-09-19T00:34:10.402Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/3a/18/799285282c8236a79f25d590f0222dbd6850e14b060dfaa3e720241fd772/black-25.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3bec74ee60f8dfef564b573a96b8930f7b6a538e846123d5ad77ba14a8d7a64f", size = 1581259, upload-time = "2025-09-19T00:32:49.685Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f1/ce/883ec4b6303acdeca93ee06b7622f1fa383c6b3765294824165d49b1a86b/black-25.9.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b756fc75871cb1bcac5499552d771822fd9db5a2bb8db2a7247936ca48f39831", size = 1655583, upload-time = "2025-09-19T00:30:44.505Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/21/17/5c253aa80a0639ccc427a5c7144534b661505ae2b5a10b77ebe13fa25334/black-25.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:846d58e3ce7879ec1ffe816bb9df6d006cd9590515ed5d17db14e17666b2b357", size = 1343428, upload-time = "2025-09-19T00:32:13.839Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/1b/46/863c90dcd3f9d41b109b7f19032ae0db021f0b2a81482ba0a1e28c84de86/black-25.9.0-py3-none-any.whl", hash = "sha256:474b34c1342cdc157d307b56c4c65bce916480c4a8f6551fdc6bf9b486a7c4ae", size = 203363, upload-time = "2025-09-19T00:27:35.724Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "blinker"
|
name = "blinker"
|
||||||
version = "1.9.0"
|
version = "1.9.0"
|
||||||
@@ -753,6 +774,15 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" },
|
{ url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mypy-extensions"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "numpy"
|
name = "numpy"
|
||||||
version = "2.3.3"
|
version = "2.3.3"
|
||||||
@@ -1002,6 +1032,15 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
|
{ url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pathspec"
|
||||||
|
version = "0.12.1"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pillow"
|
name = "pillow"
|
||||||
version = "11.3.0"
|
version = "11.3.0"
|
||||||
@@ -1057,6 +1096,40 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/89/c7/5572fa4a3f45740eaab6ae86fcdf7195b55beac1371ac8c619d880cfe948/pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa", size = 2512835, upload-time = "2025-07-01T09:15:50.399Z" },
|
{ url = "https://files.pythonhosted.org/packages/89/c7/5572fa4a3f45740eaab6ae86fcdf7195b55beac1371ac8c619d880cfe948/pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa", size = 2512835, upload-time = "2025-07-01T09:15:50.399Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pillow-heif"
|
||||||
|
version = "1.1.1"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "pillow" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/64/65/77284daf2a8a2849b9040889bd8e1b845e693ed97973a28ba2122b8922ad/pillow_heif-1.1.1.tar.gz", hash = "sha256:f60e8c8a8928556104cec4fff39d43caa1da105625bdb53b11ce3c89d09b6bde", size = 18271952, upload-time = "2025-09-30T16:42:24.485Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/c2/1d/2ea075d537b4ac9f5fb0c53fd543a764f5f1dee1fe6bea8fb5b34018cf94/pillow_heif-1.1.1-cp313-cp313-macosx_10_15_x86_64.whl", hash = "sha256:8269cae2e0232f73bda5128181a42fbbb562c29b76fbcced22fef70a61b94dbe", size = 4696826, upload-time = "2025-09-30T16:41:35.281Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/2e/6f/860fab6d6e6f04f13b97a8d9150816fa16feb3f7a2fe2d8ab4b460adc711/pillow_heif-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:08787cc64b4a519789a348f137b914981ce520d4b906e09e2b8e974c87e3e215", size = 3451076, upload-time = "2025-09-30T16:41:36.506Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/b1/9a/711f77c7c6e00fa1ae0e890a36a5be03c47170b6cbb88fc92761bee0fff5/pillow_heif-1.1.1-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ac0fc8523a77c351991f78766d41290241dd87fbe036b6f777c49f2bd3561119", size = 5773389, upload-time = "2025-09-30T16:41:37.65Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/50/c8/50e2d1adede807dc1d3b35f2cfa28d7f8e73e9d56cb560dc94b1d7053b75/pillow_heif-1.1.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18db6c78b8fa52065339ffb69739f5c45748c0b5f836349f0aba786f7bb905ab", size = 5504774, upload-time = "2025-09-30T16:41:38.849Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/03/c2/0fa0ebaaec2a7e548989b84a2561300137d9999fc780d24ad7d6d4ef9417/pillow_heif-1.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c5db8a8ee7ee4b1311f81d223d32538d63a73adc2ece7610a9f19519856c8e68", size = 6809350, upload-time = "2025-09-30T16:41:40.455Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/85/cc/b0eee2b939a362dcccb96483f8d172b64df192ec93445103be04634255c8/pillow_heif-1.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a2e6d4f7209aade2d55bbbcdbbbe623118722bcc7a12edef15cf4ee0d8586c3e", size = 6431750, upload-time = "2025-09-30T16:41:42.418Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/dd/0b/a559ad48a5c03db5ecdc7c8b8dd04df3cb1072c0f983bcaebd26e1e63442/pillow_heif-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:ff158ff338082d39864abd31c69ae2ee57de3f193c85ccbe365f4d7260712229", size = 5422353, upload-time = "2025-09-30T16:41:44.15Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/8e/f8/6c3fd8a28ea16236d40d6885b3babd801a2e7bdb73ee52a293eb34de7afc/pillow_heif-1.1.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:7f19389ffeb3866f95370eb917e6a32706c110a9fa670daefb63b5660948a82e", size = 4696793, upload-time = "2025-09-30T16:41:45.744Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/28/09/f2ffdac98465d00b6244c4333d2f73e815351beb6fa1d22f489797f0411c/pillow_heif-1.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8d5fa5539ff3c7bbe64aa446c10bf10f6d8c1604997a66b195bec02e2965eb10", size = 3451160, upload-time = "2025-09-30T16:41:47.111Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/84/93/a801624eb86e8e0a2a6212a10da6c69beb7060be569fe36100d96a2d9e2c/pillow_heif-1.1.1-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f9b08c81602ffd660cd27456fbfa3cbf396cf23bb39d3015cc7a6cd56ade82fd", size = 5773568, upload-time = "2025-09-30T16:41:48.65Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e8/fd/7d619b7b9386abd6228b1465450ea0bb8a3875b1e22c4f9e5bbd598224ae/pillow_heif-1.1.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0f2d68af87d5e1f6af0db021b61f62e456f413eba98ea7723d7f49f2a6f1f01", size = 5504865, upload-time = "2025-09-30T16:41:50.051Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/44/72/35b5a8a5cbdcb38968328a8d8f2385f38328141dca6dc52d8e192a36e256/pillow_heif-1.1.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e486b15696a958a04178aa9ff7f7db4f803d1ec7bbded924671576125c052ed5", size = 6809536, upload-time = "2025-09-30T16:41:51.309Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/23/70/fc0e0cc6b864f53be2833b23cadd1d1a480a51d6b2d5efd5c4c119e8112e/pillow_heif-1.1.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a37999e53c0cd32401667303e0b34c43240c32530809827091fabc7eb04d7cad", size = 6431784, upload-time = "2025-09-30T16:41:52.602Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/96/0e/af38e5cbca622fceaa1ee8eba8e68b3c6bf1bd6e6a37eca3817bf3dcebdc/pillow_heif-1.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:1d35e973b2463b03f7b0bd5c898c7a424a46d69f7c20a9c251b322dfe4f45068", size = 5577269, upload-time = "2025-09-30T16:41:54.206Z" },
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "platformdirs"
|
||||||
|
version = "4.4.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/23/e8/21db9c9987b0e728855bd57bff6984f67952bea55d6f75e055c46b5383e8/platformdirs-4.4.0.tar.gz", hash = "sha256:ca753cf4d81dc309bc67b0ea38fd15dc97bc30ce419a7f58d13eb3bf14c4febf", size = 21634, upload-time = "2025-08-26T14:32:04.268Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/40/4b/2028861e724d3bd36227adfa20d3fd24c3fc6d52032f4a93c133be5d17ce/platformdirs-4.4.0-py3-none-any.whl", hash = "sha256:abd01743f24e5287cd7a5db3752faf1a2d65353f38ec26d98e25a6db65958c85", size = 18654, upload-time = "2025-08-26T14:32:02.735Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "posthog"
|
name = "posthog"
|
||||||
version = "5.4.0"
|
version = "5.4.0"
|
||||||
@@ -1312,6 +1385,15 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/5f/ed/539768cf28c661b5b068d66d96a2f155c4971a5d55684a514c1a0e0dec2f/python_dotenv-1.1.1-py3-none-any.whl", hash = "sha256:31f23644fe2602f88ff55e1f5c79ba497e01224ee7737937930c448e4d0e24dc", size = 20556, upload-time = "2025-06-24T04:21:06.073Z" },
|
{ url = "https://files.pythonhosted.org/packages/5f/ed/539768cf28c661b5b068d66d96a2f155c4971a5d55684a514c1a0e0dec2f/python_dotenv-1.1.1-py3-none-any.whl", hash = "sha256:31f23644fe2602f88ff55e1f5c79ba497e01224ee7737937930c448e4d0e24dc", size = 20556, upload-time = "2025-06-24T04:21:06.073Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pytokens"
|
||||||
|
version = "0.1.10"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/30/5f/e959a442435e24f6fb5a01aec6c657079ceaca1b3baf18561c3728d681da/pytokens-0.1.10.tar.gz", hash = "sha256:c9a4bfa0be1d26aebce03e6884ba454e842f186a59ea43a6d3b25af58223c044", size = 12171, upload-time = "2025-02-19T14:51:22.001Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/60/e5/63bed382f6a7a5ba70e7e132b8b7b8abbcf4888ffa6be4877698dcfbed7d/pytokens-0.1.10-py3-none-any.whl", hash = "sha256:db7b72284e480e69fb085d9f251f66b3d2df8b7166059261258ff35f50fb711b", size = 12046, upload-time = "2025-02-19T14:51:18.694Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pyyaml"
|
name = "pyyaml"
|
||||||
version = "6.0.3"
|
version = "6.0.3"
|
||||||
@@ -1353,12 +1435,14 @@ name = "raggr"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
source = { virtual = "." }
|
source = { virtual = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
{ name = "black" },
|
||||||
{ name = "chromadb" },
|
{ name = "chromadb" },
|
||||||
{ name = "flask" },
|
{ name = "flask" },
|
||||||
{ name = "httpx" },
|
{ name = "httpx" },
|
||||||
{ name = "ollama" },
|
{ name = "ollama" },
|
||||||
{ name = "openai" },
|
{ name = "openai" },
|
||||||
{ name = "pillow" },
|
{ name = "pillow" },
|
||||||
|
{ name = "pillow-heif" },
|
||||||
{ name = "pydantic" },
|
{ name = "pydantic" },
|
||||||
{ name = "pymupdf" },
|
{ name = "pymupdf" },
|
||||||
{ name = "python-dotenv" },
|
{ name = "python-dotenv" },
|
||||||
@@ -1366,12 +1450,14 @@ dependencies = [
|
|||||||
|
|
||||||
[package.metadata]
|
[package.metadata]
|
||||||
requires-dist = [
|
requires-dist = [
|
||||||
|
{ name = "black", specifier = ">=25.9.0" },
|
||||||
{ name = "chromadb", specifier = ">=1.1.0" },
|
{ name = "chromadb", specifier = ">=1.1.0" },
|
||||||
{ name = "flask", specifier = ">=3.1.2" },
|
{ name = "flask", specifier = ">=3.1.2" },
|
||||||
{ name = "httpx", specifier = ">=0.28.1" },
|
{ name = "httpx", specifier = ">=0.28.1" },
|
||||||
{ name = "ollama", specifier = ">=0.6.0" },
|
{ name = "ollama", specifier = ">=0.6.0" },
|
||||||
{ name = "openai", specifier = ">=2.0.1" },
|
{ name = "openai", specifier = ">=2.0.1" },
|
||||||
{ name = "pillow", specifier = ">=10.0.0" },
|
{ name = "pillow", specifier = ">=10.0.0" },
|
||||||
|
{ name = "pillow-heif", specifier = ">=1.1.1" },
|
||||||
{ name = "pydantic", specifier = ">=2.11.9" },
|
{ name = "pydantic", specifier = ">=2.11.9" },
|
||||||
{ name = "pymupdf", specifier = ">=1.24.0" },
|
{ name = "pymupdf", specifier = ">=1.24.0" },
|
||||||
{ name = "python-dotenv", specifier = ">=1.0.0" },
|
{ name = "python-dotenv", specifier = ">=1.0.0" },
|
||||||
|
|||||||
Reference in New Issue
Block a user