[BugFix] Disable fp8 kv-cache by default for DeepSeek V3.2 (vllm-project#27121)

LucasWilkinson · gemini-code-assist[bot] · 0xrushi · commit aa5a77c571c7 · 2025-10-25T23:00:00.000-04:00
Signed-off-by: Lucas Wilkinson &lt;lwilkins@redhat.com&gt;
Signed-off-by: Lucas Wilkinson &lt;LucasWilkinson@users.noreply.github.com&gt;
Co-authored-by: gemini-code-assist[bot] &lt;176961590+gemini-code-assist[bot]@users.noreply.github.com&gt;
Signed-off-by: 0xrushi &lt;6279035+0xrushi@users.noreply.github.com&gt;
diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py
@@ -481,12 +481,9 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
         is_v32 = hasattr(hf_config, "index_topk")
         assert is_v32
 
-        # For DeepSeekV3.2, we use a custom fp8 format as default (i.e.
-        #   "auto")
+        # For DeepSeekV3.2, a custom fp8 format is used when fp8 kv-cache is enabled.
         cache_config = vllm_config.cache_config
-        if cache_config.cache_dtype == "auto" or cache_config.cache_dtype.startswith(
-            "fp8"
-        ):
+        if cache_config.cache_dtype.startswith("fp8"):
             cache_config.cache_dtype = "fp8_ds_mla"
             logger.info("Using custom fp8 kv-cache format for DeepSeekV3.2")
         if cache_config.cache_dtype == "bfloat16":