Improve Obsidian RAG retrieval for large vaults

- Markdown-aware chunking (split on headers before size-based splitting) - Prepend note filename to each chunk for self-contained context - Source-filtered retrieval (obsidian/paperless queries stay isolated) - MMR search with k=8, fetch_k=24 for better recall and diversity - Add source metadata to Paperless docs and folder metadata to Obsidian docs Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-04 13:34:15 -04:00
parent 9bccac82f3
commit add9946bc2
2 changed files with 81 additions and 9 deletions
@@ -121,7 +121,7 @@ async def simba_search(query: str):
        Relevant information from Simba's documents
    """
    print(f"[SIMBA SEARCH] Tool called with query: {query}")
-    serialized, docs = await query_vector_store(query=query)
+    serialized, docs = await query_vector_store(query=query, source="paperless")
    print(f"[SIMBA SEARCH] Found {len(docs)} documents")
    print(f"[SIMBA SEARCH] Serialized result length: {len(serialized)}")
    print(f"[SIMBA SEARCH] First 200 chars: {serialized[:200]}")
@@ -329,8 +329,8 @@ async def obsidian_search_notes(query: str) -> str:
        return "Obsidian integration is not configured. Please set OBSIDIAN_VAULT_PATH environment variable."

    try:
-        # Query vector store for obsidian documents
-        serialized, docs = await query_vector_store(query=query)
+        # Query vector store filtered to obsidian source only
+        serialized, docs = await query_vector_store(query=query, source="obsidian")
        return serialized

    except Exception as e: