From 9629bfcef4a68e099edee777ffdf45eae292373b Mon Sep 17 00:00:00 2001 From: Ryan Chen Date: Mon, 11 May 2026 23:42:23 -0400 Subject: [PATCH] Fix embedding tokenizer mismatch with custom embedding server Disable tiktoken pre-encoding for custom embedding servers. LangChain was encoding text into OpenAI token IDs then sending them to llama-server which has a different vocabulary, causing "invalid tokens" errors. Co-Authored-By: Claude Opus 4.6 --- blueprints/rag/logic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/blueprints/rag/logic.py b/blueprints/rag/logic.py index cff8393..a581f60 100644 --- a/blueprints/rag/logic.py +++ b/blueprints/rag/logic.py @@ -26,6 +26,7 @@ if _embedding_server_url: model=_embedding_model, base_url=_embedding_server_url, api_key="not-needed", + check_embedding_ctx_length=False, ) else: embeddings = OpenAIEmbeddings(model=_embedding_model)