clean up model names and whitespaces (#120)

yeqcharlotte · houseroad · commit 6ad393f6907a · 2025-04-05T12:12:12.000-07:00
Signed-off-by: Ye (Charlotte) Qi &lt;yeq@meta.com&gt;
Signed-off-by: Lu Fang &lt;lufang@fb.com&gt;
diff --git a/examples/offline_inference/vision_language.py b/examples/offline_inference/vision_language.py
@@ -585,8 +585,7 @@ def run_mllama(questions: list[str], modality: str) -> ModelRequestData:
 def run_llama4(questions: list[str], modality: str):
     assert modality == "image"
 
-    # FIXME: meta-llama/Llama-4-Scout-17B-16E-Instruct
-    model_name = "ll-re/Llama-4-Scout-17B-16E-Instruct"
+    model_name = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
 
     engine_args = EngineArgs(
         model=model_name,
diff --git a/examples/offline_inference/vision_language_multi_image.py b/examples/offline_inference/vision_language_multi_image.py
@@ -254,8 +254,7 @@ def load_internvl(question: str, image_urls: list[str]) -> ModelRequestData:
 
 
 def load_llama4(question: str, image_urls: list[str]) -> ModelRequestData:
-    # FIXME: meta-llama/Llama-4-Scout-17B-16E-Instruct
-    model_name = "ll-re/Llama-4-Scout-17B-16E-Instruct"
+    model_name = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
 
     engine_args = EngineArgs(
         model=model_name,
diff --git a/tests/models/decoder_only/vision_language/test_models.py b/tests/models/decoder_only/vision_language/test_models.py
@@ -537,8 +537,7 @@
         )],
     ),
     "llama4": VLMTestInfo(
-        # FIXME: meta-llama/Llama-4-Scout-17B-16E-Instruct
-        models=["ll-re/Llama-4-Scout-17B-16E-Instruct"],
+        models=["meta-llama/Llama-4-Scout-17B-16E-Instruct"],
         prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|header_start|>user<|header_end|>\n\n{img_prompt}<|eot|><|header_start|>assistant<|header_end|>\n\n", # noqa: E501
         img_idx_to_prompt=lambda _: "<|image|>",
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
diff --git a/tests/models/multimodal/processing/test_common.py b/tests/models/multimodal/processing/test_common.py
@@ -280,8 +280,7 @@ def _test_processing_correctness_mistral(
     "Skywork/Skywork-R1V-38B",
     "fixie-ai/ultravox-v0_5-llama-3_2-1b",
     "openai/whisper-large-v3",
-    # FIXME: meta-llama/Llama-4-Scout-17B-16E-Instruct
-    "ll-re/Llama-4-Scout-17B-16E-Instruct",
+    "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 ])
 @pytest.mark.parametrize("hit_rate", [0.3, 0.5, 1.0])
 @pytest.mark.parametrize("num_batches", [32])
diff --git a/tests/models/multimodal/processing/test_llama4.py b/tests/models/multimodal/processing/test_llama4.py
@@ -10,8 +10,8 @@
 from ...utils import build_model_context
 
 
-# FIXME: meta-llama/Llama-4-Scout-17B-16E-Instruct
-@pytest.mark.parametrize("model_id", ["ll-re/Llama-4-Scout-17B-16E-Instruct"])
+@pytest.mark.parametrize("model_id",
+                         ["meta-llama/Llama-4-Scout-17B-16E-Instruct"])
 @pytest.mark.parametrize("mm_processor_kwargs", [{}])
 @pytest.mark.parametrize("num_imgs", [1, 5])
 @pytest.mark.parametrize("disable_mm_preprocessor_cache", [True, False])
diff --git a/tests/models/registry.py b/tests/models/registry.py
@@ -337,8 +337,7 @@ def check_available_online(
                                                          tokenizer="facebook/bart-base",
                                                          trust_remote_code=True),  # noqa: E501
     "MllamaForConditionalGeneration": _HfExamplesInfo("meta-llama/Llama-3.2-11B-Vision-Instruct"),  # noqa: E501
-    # FIXME: meta-llama/Llama-4-Scout-17B-16E-Instruct
-    "Llama4ForConditionalGeneration": _HfExamplesInfo("ll-re/Llama-4-Scout-17B-16E-Instruct"),  # noqa: E501
+    "Llama4ForConditionalGeneration": _HfExamplesInfo("meta-llama/Llama-4-Scout-17B-16E-Instruct"),  # noqa: E501
     "WhisperForConditionalGeneration": _HfExamplesInfo("openai/whisper-large-v3"),  # noqa: E501
 }
 
diff --git a/vllm/model_executor/layers/fused_moe/cutlass_moe.py b/vllm/model_executor/layers/fused_moe/cutlass_moe.py
@@ -103,7 +103,7 @@ def cutlass_moe_fp8(
     per_act_token = a1_scale.numel() != 1 if a1_scale is not None else (
         a2_scale.numel() != 1 if a2_scale is not None else False)
     if apply_router_weight_on_input:
-        # FIXME: this only works for topK=1, will need to update for topK>1
+        # TODO: this only works for topK=1, will need to update for topK>1
         a = a * topk_weights.to(out_dtype)
 
     a_q, a1_scale = ops.scaled_fp8_quant(
diff --git a/vllm/model_executor/models/mllama4.py b/vllm/model_executor/models/mllama4.py
@@ -310,9 +310,7 @@ def forward(
     ):
         # Self Attention
         residual = hidden_state
-
         hidden_state = self.input_layernorm(hidden_state)
-
         hidden_state = self.self_attn(hidden_state)
         hidden_state = residual + hidden_state
 
@@ -463,15 +461,12 @@ def forward(
         positional_embedding = self.positional_embedding_vlm.to(
             dtype=hidden_state.dtype, device=hidden_state.device)
         hidden_state = hidden_state + positional_embedding
-
         hidden_state = self.layernorm_pre(hidden_state)
-
         hidden_state = hidden_state.view(num_tiles, -1, hidden_dim)
 
         # Apply encoder
         output = self.model(hidden_state)
         hidden_state = output.last_hidden_state
-
         hidden_state = self.layernorm_post(hidden_state)
 
         # Remove CLS token output

Original file line number	Diff line number	Diff line change
`@@ -337,8 +337,7 @@ def check_available_online(`
`337`	`337`	`tokenizer="facebook/bart-base",`
`338`	`338`	`trust_remote_code=True), # noqa: E501`
`339`	`339`	`"MllamaForConditionalGeneration": _HfExamplesInfo("meta-llama/Llama-3.2-11B-Vision-Instruct"), # noqa: E501`
`340`		`- # FIXME: meta-llama/Llama-4-Scout-17B-16E-Instruct`
`341`		`- "Llama4ForConditionalGeneration": _HfExamplesInfo("ll-re/Llama-4-Scout-17B-16E-Instruct"), # noqa: E501`
	`340`	`+ "Llama4ForConditionalGeneration": _HfExamplesInfo("meta-llama/Llama-4-Scout-17B-16E-Instruct"), # noqa: E501`
`342`	`341`	`"WhisperForConditionalGeneration": _HfExamplesInfo("openai/whisper-large-v3"), # noqa: E501`
`343`	`342`	`}`
`344`	`343`