llm: disable fast decoding path for mistral

intel · Dec 5, 2024 · 62920f6 · 62920f6
1 parent 76aa494
commit 62920f6
Showing 1 changed file with 9 additions and 0 deletions.
diff --git a/service/llm_biz.py b/service/llm_biz.py
@@ -27,6 +27,15 @@
 import model_config
 
 
+import ipex_llm.transformers.models.mistral
+
+# W/A for https://github.com/intel/AI-Playground/issues/94
+# Disable decoding_fast_path to avoid calling forward_qkv() which is not supported by bigdl-core-xe-*-23
+ipex_llm.transformers.models.mistral.use_decoding_fast_path = (
+    lambda *args, **kwargs: False
+)
+
+
 class LLMParams:
     prompt: List[Dict[str, str]]
     device: int