remove llama4 registry

luccafong · luccafong · commit 4276ac0343d1 · 2025-04-05T22:42:10.000-07:00
Signed-off-by: Lu Fang &lt;fanglu@fb.com&gt;
diff --git a/tests/models/test_registry.py b/tests/models/test_registry.py
@@ -24,10 +24,6 @@
 @pytest.mark.parametrize("model_arch", ModelRegistry.get_supported_archs())
 def test_registry_imports(model_arch):
 
-    # Llama4ForCausalLM does not have a standalone model
-    if model_arch == "Llama4ForCausalLM":
-        return
-
     model_info = HF_EXAMPLE_MODELS.get_hf_info(model_arch)
     model_info.check_transformers_version(on_fail="skip")
 
@@ -99,9 +95,6 @@ def test_hf_registry_coverage():
     untested_archs = set(ModelRegistry.get_supported_archs() -
                          HF_EXAMPLE_MODELS.get_supported_archs())
 
-    # Llama4ForCausalLM does not have a standalone model
-    untested_archs.discard("Llama4ForCausalLM")
-
     assert not untested_archs, (
         "Please add the following architectures to "
         f"`tests/models/registry.py`: {untested_archs}")
diff --git a/vllm/model_executor/models/mllama4.py b/vllm/model_executor/models/mllama4.py
@@ -55,8 +55,9 @@
 from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
 
 from .interfaces import MultiModalEmbeddings, SupportsMultiModal
-from .utils import (AutoWeightsLoader, flatten_bn, init_vllm_registered_model,
-                    maybe_prefix, merge_multimodal_embeddings)
+from .llama4 import Llama4ForCausalLM
+from .utils import (AutoWeightsLoader, flatten_bn, maybe_prefix,
+                    merge_multimodal_embeddings)
 from .vision import scatter_patch_features, select_patch_features
 
 logger = init_logger(__name__)
@@ -710,12 +711,12 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             self.config,
             None,
             prefix=maybe_prefix(prefix, "multi_modal_projector"))
-        self.language_model = init_vllm_registered_model(
-            vllm_config=vllm_config,
-            hf_config=config.text_config,
-            architectures=["Llama4ForCausalLM"],
-            prefix=maybe_prefix(prefix, "language_model"))
-
+        language_model_vllm_config = vllm_config.with_hf_config(
+            config.text_config, architectures=["Llama4ForCausalLM"])
+        self.language_model = Llama4ForCausalLM(
+            vllm_config=language_model_vllm_config,
+            prefix=maybe_prefix(prefix, "language_model"),
+        )
         self.tokenizer = cached_tokenizer_from_config(vllm_config.model_config)
 
     def _parse_and_validate_image_input(
@@ -857,9 +858,8 @@ def load_weights(self, weights: Iterable[Tuple[str,
 
         # language_model is an Llama4ForCausalLM instance. We load it's
         # using llama4's load_weights routine.
-        language_model_prefix = "language_model.model."
         language_model_weights, other_weights = self.separate_weights(
-            weights, prefix=language_model_prefix)
+            weights, prefix="language_model.model.")
         loader = AutoWeightsLoader(self)
         loaded_language_model_params = loader.load_weights(
             language_model_weights)
diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py
@@ -73,7 +73,6 @@
     "JAISLMHeadModel": ("jais", "JAISLMHeadModel"),
     "JambaForCausalLM": ("jamba", "JambaForCausalLM"),
     "LlamaForCausalLM": ("llama", "LlamaForCausalLM"),
-    "Llama4ForCausalLM": ("llama4", "Llama4ForCausalLM"),
     # For decapoda-research/llama-*
     "LLaMAForCausalLM": ("llama", "LlamaForCausalLM"),
     "MambaForCausalLM": ("mamba", "MambaForCausalLM"),