vllm-project · DarkLight1337 · Apr 18, 2025 · Apr 15, 2025 · Apr 17, 2025 · houseroad
diff --git a/vllm/model_executor/models/mllama4.py b/vllm/model_executor/models/mllama4.py
@@ -672,9 +672,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             self.config,
             None,
             prefix=maybe_prefix(prefix, "multi_modal_projector"))
-
         self.language_model = _initialize_model(
-            vllm_config=vllm_config.with_hf_config(config.text_config),
+            vllm_config=vllm_config.with_hf_config(config.text_config,
+                                                   ["LlamaForCausalLM"]),
 def _initialize_model( 
     vllm_config: VllmConfig, 
     *, 
     prefix: str = "", 
     model_class: Optional[type[nn.Module]] = None, 
 ) -> nn.Module: 
     """Initialize a model with the given configurations.""" 
     model_config = vllm_config.model_config 
     if model_class is None: 
         model_class, _ = get_model_architecture(model_config) 
 return replace(self, model_config=model_config) 
 self.model_config.verify_with_parallel_config(self.parallel_config) 
 normalized_arch = list( 
     filter(lambda model: model in self.models, architectures)) 
 def _initialize_model( 
     vllm_config: VllmConfig, 
     *, 
     prefix: str = "", 
     model_class: Optional[type[nn.Module]] = None, 
 ) -> nn.Module: 
     """Initialize a model with the given configurations.""" 
     model_config = vllm_config.model_config 
     if model_class is None: 
         model_class, _ = get_model_architecture(model_config) 
 return replace(self, model_config=model_config) 
 self.model_config.verify_with_parallel_config(self.parallel_config) 
 normalized_arch = list( 
     filter(lambda model: model in self.models, architectures)) 
             prefix=maybe_prefix(prefix, "language_model"),
             model_class=Llama4ForCausalLM,
         )
@@ -824,7 +824,7 @@ def load_weights(self, weights: Iterable[Tuple[str,
         # language_model is an Llama4ForCausalLM instance. We load it's
         # using llama4's load_weights routine.
         language_model_weights, other_weights = self.separate_weights(
-            weights, prefix="language_model.model.")
+            weights, prefix="language_model.")
         loader = AutoWeightsLoader(self)
         loaded_language_model_params = loader.load_weights(
             language_model_weights)