This commit is contained in:
2025-08-07 17:43:24 -04:00
parent fc504d3e9c
commit 679cfb08e4
5 changed files with 294 additions and 32 deletions

View File

@@ -1,4 +1,5 @@
import os
import tempfile
import httpx
from dotenv import load_dotenv
@@ -18,6 +19,30 @@ class PaperlessNGXService:
r = httpx.get(self.url, headers=self.headers)
return r.json()["results"]
def get_doc_by_id(self, doc_id: int):
url = f"http://{os.getenv("BASE_URL")}/api/documents/{doc_id}/"
r = httpx.get(url, headers=self.headers)
return r.json()
def download_pdf_from_id(self, id: int) -> str:
download_url = f"http://{os.getenv("BASE_URL")}/api/documents/{id}/download/"
response = httpx.get(
download_url, headers=self.headers, follow_redirects=True, timeout=30
)
response.raise_for_status()
# Use a temporary file for the downloaded PDF
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
temp_file.write(response.content)
temp_file.close()
temp_pdf_path = temp_file.name
pdf_to_process = temp_pdf_path
return pdf_to_process
def upload_cleaned_content(self, document_id, data):
PUTS_URL = f"http://{os.getenv("BASE_URL")}/api/documents/{document_id}/"
r = httpx.put(PUTS_URL, headers=self.headers, data=data)
r.raise_for_status()
if __name__ == "__main__":
pp = PaperlessNGXService()