diff --git a/vllm/model_executor/models/mllama4.py b/vllm/model_executor/models/mllama4.py index 924f10d82b38..e73dc0c2be82 100644 --- a/vllm/model_executor/models/mllama4.py +++ b/vllm/model_executor/models/mllama4.py @@ -906,11 +906,13 @@ def _consolidate_qkv_weights( def _rename_weight_for_modelopt_checkpoint(self, name: str) -> str: """Rename weights from ModelOpt llama4 fp8 checkpoints to vLLM format.""" - if name.startswith("model."): + if name.startswith("model.") or name.startswith( + "language_model.model."): + renamed = name.replace("model.", "language_model.model.", + 1) if name.startswith("model.") else name # Handle expert scale parameters with flat naming if "feed_forward.experts." in name and ("_input_scale" in name or "_weight_scale" in name): - renamed = name.replace("model.", "language_model.model.", 1) # Map checkpoint naming to vLLM's expected naming if "down_proj_input_scale" in renamed: return renamed.replace("down_proj_input_scale", @@ -929,7 +931,6 @@ def _rename_weight_for_modelopt_checkpoint(self, name: str) -> str: # Handle attention scale parameters elif "self_attn." in name and (".k_scale" in name or ".v_scale" in name): - renamed = name.replace("model.", "language_model.model.", 1) if ".k_proj.k_scale" in renamed: return renamed.replace(".k_proj.k_scale", ".attn.k_scale") elif ".v_proj.v_scale" in renamed: @@ -937,7 +938,7 @@ def _rename_weight_for_modelopt_checkpoint(self, name: str) -> str: return renamed # Standard model.* to language_model.model.* renaming - return name.replace("model.", "language_model.model.", 1) + return renamed elif name.startswith("lm_head.weight"): return name.replace("lm_head.weight",