Improve Obsidian RAG retrieval for large vaults
- Markdown-aware chunking (split on headers before size-based splitting) - Prepend note filename to each chunk for self-contained context - Source-filtered retrieval (obsidian/paperless queries stay isolated) - MMR search with k=8, fetch_k=24 for better recall and diversity - Add source metadata to Paperless docs and folder metadata to Obsidian docs Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -121,7 +121,7 @@ async def simba_search(query: str):
|
||||
Relevant information from Simba's documents
|
||||
"""
|
||||
print(f"[SIMBA SEARCH] Tool called with query: {query}")
|
||||
serialized, docs = await query_vector_store(query=query)
|
||||
serialized, docs = await query_vector_store(query=query, source="paperless")
|
||||
print(f"[SIMBA SEARCH] Found {len(docs)} documents")
|
||||
print(f"[SIMBA SEARCH] Serialized result length: {len(serialized)}")
|
||||
print(f"[SIMBA SEARCH] First 200 chars: {serialized[:200]}")
|
||||
@@ -329,8 +329,8 @@ async def obsidian_search_notes(query: str) -> str:
|
||||
return "Obsidian integration is not configured. Please set OBSIDIAN_VAULT_PATH environment variable."
|
||||
|
||||
try:
|
||||
# Query vector store for obsidian documents
|
||||
serialized, docs = await query_vector_store(query=query)
|
||||
# Query vector store filtered to obsidian source only
|
||||
serialized, docs = await query_vector_store(query=query, source="obsidian")
|
||||
return serialized
|
||||
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user