From 1e6bc536b439ad7e212ed0b6518af00de041d28b Mon Sep 17 00:00:00 2001 From: Ryan Chen Date: Sun, 31 May 2026 07:11:29 -0400 Subject: [PATCH] Fix datetime serialization in Obsidian metadata for pgvector YAML frontmatter can contain datetime objects which aren't JSON serializable. Add _make_serializable() to coerce all metadata values before storing in pgvector. Co-Authored-By: Claude Opus 4.6 --- blueprints/rag/logic.py | 61 +++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/blueprints/rag/logic.py b/blueprints/rag/logic.py index 28f2795..21f02c3 100644 --- a/blueprints/rag/logic.py +++ b/blueprints/rag/logic.py @@ -116,6 +116,17 @@ async def fetch_documents_from_paperless_ngx() -> list[Document]: return documents +def _make_serializable(value): + """Convert a value to a JSON-serializable type.""" + if isinstance(value, (str, int, float, bool, type(None))): + return value + if isinstance(value, (list, tuple)): + return [_make_serializable(v) for v in value] + if isinstance(value, dict): + return {k: _make_serializable(v) for k, v in value.items()} + return str(value) + + def _sanitize_text(text_content: str) -> str: """Strip non-printable and invalid characters that break embedding tokenizers.""" # Remove null bytes and control characters (keep newlines and tabs) @@ -174,20 +185,21 @@ async def fetch_obsidian_documents() -> list[Document]: parsed = obsidian_service.parse_markdown(content, md_path) # Create LangChain Document with obsidian source + metadata = { + "source": "obsidian", + "filepath": parsed["filepath"], + "tags": parsed["tags"], + "created_at": parsed["metadata"].get("created_at"), + "indexed_at": time.time(), + **{ + k: v + for k, v in parsed["metadata"].items() + if k not in ["created_at", "created_by"] + }, + } document = Document( page_content=parsed["content"], - metadata={ - "source": "obsidian", - "filepath": parsed["filepath"], - "tags": parsed["tags"], - "created_at": parsed["metadata"].get("created_at"), - "indexed_at": time.time(), - **{ - k: v - for k, v in parsed["metadata"].items() - if k not in ["created_at", "created_by"] - }, - }, + metadata=_make_serializable(metadata), ) documents.append(document) @@ -289,20 +301,21 @@ async def sync_obsidian_documents() -> dict[str, int]: with open(filepath, "r", encoding="utf-8") as f: content = f.read() parsed = obsidian_service.parse_markdown(content, filepath) + metadata = { + "source": "obsidian", + "filepath": parsed["filepath"], + "tags": parsed["tags"], + "created_at": parsed["metadata"].get("created_at"), + "indexed_at": time.time(), + **{ + k: v + for k, v in parsed["metadata"].items() + if k not in ["created_at", "created_by"] + }, + } document = Document( page_content=parsed["content"], - metadata={ - "source": "obsidian", - "filepath": parsed["filepath"], - "tags": parsed["tags"], - "created_at": parsed["metadata"].get("created_at"), - "indexed_at": time.time(), - **{ - k: v - for k, v in parsed["metadata"].items() - if k not in ["created_at", "created_by"] - }, - }, + metadata=_make_serializable(metadata), ) documents.append(document) except Exception as e: