@@ -400,7 +400,7 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
400400 "exactly equal." , mamba_padding_pct )
401401
402402
403- class DeepseekV3ForCausalLM (VerifyAndUpdateConfig ):
403+ class DeepseekV32ForCausalLM (VerifyAndUpdateConfig ):
404404
405405 @classmethod
406406 def verify_and_update_config (cls , vllm_config : "VllmConfig" ) -> None :
@@ -409,20 +409,20 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
409409 """
410410 hf_config = vllm_config .model_config .hf_config
411411
412+ # Mirror the check in vllm/model_executor/models/deepseek_v2.py
412413 is_v32 = hasattr (hf_config , "index_topk" )
414+ assert is_v32
413415
414- if is_v32 :
415- # For DeepSeekV3.2, we use a custom fp8 format as default (i.e.
416- # "auto")
417- cache_config = vllm_config .cache_config
418- if cache_config .cache_dtype == "auto" or \
419- cache_config .cache_dtype .startswith ("fp8" ):
420- cache_config .cache_dtype = "fp8_ds_mla"
421- logger .info (
422- "Using custom fp8 kv-cache format for DeepSeekV3.2" )
423- if cache_config .cache_dtype == "bfloat16" :
424- cache_config .cache_dtype = "auto"
425- logger .info ("Using bfloat16 kv-cache for DeepSeekV3.2" )
416+ # For DeepSeekV3.2, we use a custom fp8 format as default (i.e.
417+ # "auto")
418+ cache_config = vllm_config .cache_config
419+ if cache_config .cache_dtype == "auto" or \
420+ cache_config .cache_dtype .startswith ("fp8" ):
421+ cache_config .cache_dtype = "fp8_ds_mla"
422+ logger .info ("Using custom fp8 kv-cache format for DeepSeekV3.2" )
423+ if cache_config .cache_dtype == "bfloat16" :
424+ cache_config .cache_dtype = "auto"
425+ logger .info ("Using bfloat16 kv-cache for DeepSeekV3.2" )
426426
427427
428428MODELS_CONFIG_MAP : dict [str , type [VerifyAndUpdateConfig ]] = {
@@ -441,5 +441,5 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
441441 "MambaForCausalLM" : MambaModelConfig ,
442442 "Mamba2ForCausalLM" : MambaModelConfig ,
443443 "FalconMambaForCausalLM" : MambaModelConfig ,
444- "DeepseekV3ForCausalLM " : DeepseekV3ForCausalLM ,
444+ "DeepseekV32ForCausalLM " : DeepseekV32ForCausalLM ,
445445}
0 commit comments