Fix datetime serialization in Obsidian metadata for pgvector

YAML frontmatter can contain datetime objects which aren't JSON
serializable. Add _make_serializable() to coerce all metadata values
before storing in pgvector.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-31 07:11:29 -04:00
parent 869de1c250
commit 1e6bc536b4
+19 -6
View File
@@ -116,6 +116,17 @@ async def fetch_documents_from_paperless_ngx() -> list[Document]:
return documents
def _make_serializable(value):
"""Convert a value to a JSON-serializable type."""
if isinstance(value, (str, int, float, bool, type(None))):
return value
if isinstance(value, (list, tuple)):
return [_make_serializable(v) for v in value]
if isinstance(value, dict):
return {k: _make_serializable(v) for k, v in value.items()}
return str(value)
def _sanitize_text(text_content: str) -> str:
"""Strip non-printable and invalid characters that break embedding tokenizers."""
# Remove null bytes and control characters (keep newlines and tabs)
@@ -174,8 +185,6 @@ async def fetch_obsidian_documents() -> list[Document]:
parsed = obsidian_service.parse_markdown(content, md_path)
# Create LangChain Document with obsidian source
document = Document(
page_content=parsed["content"],
metadata = {
"source": "obsidian",
"filepath": parsed["filepath"],
@@ -187,7 +196,10 @@ async def fetch_obsidian_documents() -> list[Document]:
for k, v in parsed["metadata"].items()
if k not in ["created_at", "created_by"]
},
},
}
document = Document(
page_content=parsed["content"],
metadata=_make_serializable(metadata),
)
documents.append(document)
@@ -289,8 +301,6 @@ async def sync_obsidian_documents() -> dict[str, int]:
with open(filepath, "r", encoding="utf-8") as f:
content = f.read()
parsed = obsidian_service.parse_markdown(content, filepath)
document = Document(
page_content=parsed["content"],
metadata = {
"source": "obsidian",
"filepath": parsed["filepath"],
@@ -302,7 +312,10 @@ async def sync_obsidian_documents() -> dict[str, int]:
for k, v in parsed["metadata"].items()
if k not in ["created_at", "created_by"]
},
},
}
document = Document(
page_content=parsed["content"],
metadata=_make_serializable(metadata),
)
documents.append(document)
except Exception as e: