From fdc4236f2f438e5be07a17a4daeb56c3fa84874d Mon Sep 17 00:00:00 2001
From: Lu Fang <fanglu@fb.com>
Date: Tue, 15 Apr 2025 16:35:58 -0700
Subject: [PATCH] fix pp for llama4

Signed-off-by: Lu Fang <fanglu@fb.com>
---
 vllm/model_executor/models/mllama4.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vllm/model_executor/models/mllama4.py b/vllm/model_executor/models/mllama4.py
index 0966f546ddf9..69e3ea8bd063 100644
--- a/vllm/model_executor/models/mllama4.py
+++ b/vllm/model_executor/models/mllama4.py
@@ -672,9 +672,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             self.config,
             None,
             prefix=maybe_prefix(prefix, "multi_modal_projector"))
-
         self.language_model = _initialize_model(
-            vllm_config=vllm_config.with_hf_config(config.text_config),
+            vllm_config=vllm_config.with_hf_config(config.text_config,
+                                                   ["LlamaForCausalLM"]),
             prefix=maybe_prefix(prefix, "language_model"),
             model_class=Llama4ForCausalLM,
         )
@@ -824,7 +824,7 @@ def load_weights(self, weights: Iterable[Tuple[str,
         # language_model is an Llama4ForCausalLM instance. We load it's
         # using llama4's load_weights routine.
         language_model_weights, other_weights = self.separate_weights(
-            weights, prefix="language_model.model.")
+            weights, prefix="language_model.")
         loader = AutoWeightsLoader(self)
         loaded_language_model_params = loader.load_weights(
             language_model_weights)