Fix datetime serialization in Obsidian metadata for pgvector
YAML frontmatter can contain datetime objects which aren't JSON serializable. Add _make_serializable() to coerce all metadata values before storing in pgvector. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+19
-6
@@ -116,6 +116,17 @@ async def fetch_documents_from_paperless_ngx() -> list[Document]:
|
||||
return documents
|
||||
|
||||
|
||||
def _make_serializable(value):
|
||||
"""Convert a value to a JSON-serializable type."""
|
||||
if isinstance(value, (str, int, float, bool, type(None))):
|
||||
return value
|
||||
if isinstance(value, (list, tuple)):
|
||||
return [_make_serializable(v) for v in value]
|
||||
if isinstance(value, dict):
|
||||
return {k: _make_serializable(v) for k, v in value.items()}
|
||||
return str(value)
|
||||
|
||||
|
||||
def _sanitize_text(text_content: str) -> str:
|
||||
"""Strip non-printable and invalid characters that break embedding tokenizers."""
|
||||
# Remove null bytes and control characters (keep newlines and tabs)
|
||||
@@ -174,8 +185,6 @@ async def fetch_obsidian_documents() -> list[Document]:
|
||||
parsed = obsidian_service.parse_markdown(content, md_path)
|
||||
|
||||
# Create LangChain Document with obsidian source
|
||||
document = Document(
|
||||
page_content=parsed["content"],
|
||||
metadata = {
|
||||
"source": "obsidian",
|
||||
"filepath": parsed["filepath"],
|
||||
@@ -187,7 +196,10 @@ async def fetch_obsidian_documents() -> list[Document]:
|
||||
for k, v in parsed["metadata"].items()
|
||||
if k not in ["created_at", "created_by"]
|
||||
},
|
||||
},
|
||||
}
|
||||
document = Document(
|
||||
page_content=parsed["content"],
|
||||
metadata=_make_serializable(metadata),
|
||||
)
|
||||
documents.append(document)
|
||||
|
||||
@@ -289,8 +301,6 @@ async def sync_obsidian_documents() -> dict[str, int]:
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
parsed = obsidian_service.parse_markdown(content, filepath)
|
||||
document = Document(
|
||||
page_content=parsed["content"],
|
||||
metadata = {
|
||||
"source": "obsidian",
|
||||
"filepath": parsed["filepath"],
|
||||
@@ -302,7 +312,10 @@ async def sync_obsidian_documents() -> dict[str, int]:
|
||||
for k, v in parsed["metadata"].items()
|
||||
if k not in ["created_at", "created_by"]
|
||||
},
|
||||
},
|
||||
}
|
||||
document = Document(
|
||||
page_content=parsed["content"],
|
||||
metadata=_make_serializable(metadata),
|
||||
)
|
||||
documents.append(document)
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user