Improve Obsidian RAG retrieval for large vaults

- Markdown-aware chunking (split on headers before size-based splitting)
- Prepend note filename to each chunk for self-contained context
- Source-filtered retrieval (obsidian/paperless queries stay isolated)
- MMR search with k=8, fetch_k=24 for better recall and diversity
- Add source metadata to Paperless docs and folder metadata to Obsidian docs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-04 13:34:15 -04:00
parent 9bccac82f3
commit add9946bc2
2 changed files with 81 additions and 9 deletions
+3 -3
View File
@@ -121,7 +121,7 @@ async def simba_search(query: str):
Relevant information from Simba's documents
"""
print(f"[SIMBA SEARCH] Tool called with query: {query}")
serialized, docs = await query_vector_store(query=query)
serialized, docs = await query_vector_store(query=query, source="paperless")
print(f"[SIMBA SEARCH] Found {len(docs)} documents")
print(f"[SIMBA SEARCH] Serialized result length: {len(serialized)}")
print(f"[SIMBA SEARCH] First 200 chars: {serialized[:200]}")
@@ -329,8 +329,8 @@ async def obsidian_search_notes(query: str) -> str:
return "Obsidian integration is not configured. Please set OBSIDIAN_VAULT_PATH environment variable."
try:
# Query vector store for obsidian documents
serialized, docs = await query_vector_store(query=query)
# Query vector store filtered to obsidian source only
serialized, docs = await query_vector_store(query=query, source="obsidian")
return serialized
except Exception as e: