From b2a2ef257bc948774336c323810706c4e7670f10 Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Thu, 24 Apr 2025 17:09:01 +0200
Subject: [PATCH 1/7] Bump Transformers to 4.51.3

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 requirements/test.in  | 2 +-
 requirements/test.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements/test.in b/requirements/test.in
index 3be580db0674..c5d2c4cd4c30 100644
--- a/requirements/test.in
+++ b/requirements/test.in
@@ -34,7 +34,7 @@ num2words # required for smolvlm test
 opencv-python-headless >= 4.11.0 # required for video test
 datamodel_code_generator # required for minicpm3 test
 lm-eval[api]==0.4.8 # required for model evaluation test
-transformers==4.51.1
+transformers==4.51.3
 tokenizers==0.21.1
 huggingface-hub[hf_xet]>=0.30.0  # Required for Xet downloads.
 schemathesis>=3.39.15 # Required for openai schema test.
diff --git a/requirements/test.txt b/requirements/test.txt
index 6dcd4ff01460..9642a5bfe68d 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -737,7 +737,7 @@ tqdm==4.66.6
     #   transformers
 tqdm-multiprocess==0.0.11
     # via lm-eval
-transformers==4.51.1
+transformers==4.51.3
     # via
     #   -r requirements/test.in
     #   genai-perf

From 3df8ed6f518d6fb33624880aaedd2a5a9e4265e3 Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Fri, 25 Apr 2025 12:13:25 +0200
Subject: [PATCH 2/7] Skip ChatGLM after 4.51.1

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 tests/models/registry.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/models/registry.py b/tests/models/registry.py
index c15ae3619844..6b1ec64115e3 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -123,7 +123,8 @@ def check_available_online(
     "BambaForCausalLM": _HfExamplesInfo("ibm-ai-platform/Bamba-9B"),
     "BloomForCausalLM": _HfExamplesInfo("bigscience/bloomz-1b1"),
     "ChatGLMModel": _HfExamplesInfo("THUDM/chatglm3-6b",
-                                    trust_remote_code=True),
+                                    trust_remote_code=True,
+                                    max_transformers_version="4.51.1"),
     "ChatGLMForConditionalGeneration": _HfExamplesInfo("thu-coai/ShieldLM-6B-chatglm3",  # noqa: E501
                                                        trust_remote_code=True),
     "CohereForCausalLM": _HfExamplesInfo("CohereForAI/c4ai-command-r-v01",

From 5c3c30b7098cfade3ddafbe1db9029feeb75b7d5 Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Fri, 25 Apr 2025 12:10:35 +0200
Subject: [PATCH 3/7] Add `tiny` property to test registry

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 tests/models/registry.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tests/models/registry.py b/tests/models/registry.py
index 6b1ec64115e3..9b7b9c3749ba 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -17,6 +17,15 @@ class _HfExamplesInfo:
     extras: Mapping[str, str] = field(default_factory=dict)
     """Extra models to use for testing this architecture."""
 
+    arch: Optional[str] = None
+    """The architecture class name.
+    This is set in `HfExampleModels.__init__`."""
+
+    @property
+    def tiny(self) -> str:
+        """The tiny model to use for testing with this architecture."""
+        return f"hf-tiny-model-private/tiny-random-{self.arch}"
+
     tokenizer: Optional[str] = None
     """Set the tokenizer to load for this architecture."""
 
@@ -414,6 +423,9 @@ def __init__(self, hf_models: Mapping[str, _HfExamplesInfo]) -> None:
         super().__init__()
 
         self.hf_models = hf_models
+        for arch, hf_model in hf_models.items():
+            if hf_model.arch is None:
+                self.hf_models[arch].arch = arch
 
     def get_supported_archs(self) -> Set[str]:
         return self.hf_models.keys()

From dbefac0b4ab095631a7a8b9f966948c4a68ebac1 Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Fri, 25 Apr 2025 12:14:36 +0200
Subject: [PATCH 4/7] Use tiny models from test registry

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 .../decoder_only/language/test_models.py      | 44 +++++++++----------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/tests/models/decoder_only/language/test_models.py b/tests/models/decoder_only/language/test_models.py
index 79fa3fa99773..e1f51e7608a5 100644
--- a/tests/models/decoder_only/language/test_models.py
+++ b/tests/models/decoder_only/language/test_models.py
@@ -9,6 +9,7 @@
 
 from vllm.platforms import current_platform
 
+from ...registry import HF_EXAMPLE_MODELS
 from ...utils import check_logprobs_close
 
 # These have unsupported head_dim for FA. We do not
@@ -33,54 +34,50 @@
 
 # @maybe_test_rocm_aiter
 @pytest.mark.parametrize(
-    "model",
+    "model_arch",
     [
         pytest.param(
-            "bigscience/bloom-560m",  # bloom - testing alibi slopes
+            "BloomForCausalLM",  # testing alibi slopes
             marks=[pytest.mark.core_model, pytest.mark.cpu_model],
         ),
         pytest.param(
-            "openai-community/gpt2",  # gpt2
+            "GPT2LMHeadModel",  # gpt2
             marks=[pytest.mark.core_model, pytest.mark.cpu_model],
         ),
-        pytest.param("Milos/slovak-gpt-j-405M"),  # gptj
-        pytest.param("bigcode/tiny_starcoder_py"),  # gpt_bigcode
-        pytest.param("EleutherAI/pythia-70m"),  # gpt_neox
+        pytest.param("GPTJForCausalLM"),
+        pytest.param("GPTBigCodeForCausalLM"),
+        pytest.param("GPTNeoXForCausalLM"),
         pytest.param(
-            "google/gemma-1.1-2b-it",  # gemma
+            "GemmaForCausalLM",  # gemma
             marks=[pytest.mark.core_model, pytest.mark.cpu_model],
         ),
+        pytest.param("GlmForCausalLM"),
         pytest.param(
-            "THUDM/chatglm3-6b",  # chatglm (text-only)
-        ),
-        pytest.param(
-            "meta-llama/Llama-3.2-1B-Instruct",  # llama
+            "LlamaForCausalLM",
             marks=[pytest.mark.core_model, pytest.mark.cpu_model],
         ),
         pytest.param(
-            "openbmb/MiniCPM3-4B",
+            "MiniCPM3ForCausalLM",
             # fused_moe not supported on CPU
             marks=[pytest.mark.core_model],
         ),
         pytest.param(
-            "facebook/opt-125m",  # opt
+            "OPTForCausalLM",
             marks=[pytest.mark.core_model, pytest.mark.cpu_model],
         ),
         pytest.param(
-            "microsoft/phi-2",  # phi
+            "PhiForCausalLM",
             marks=[pytest.mark.core_model],
         ),
+        pytest.param("QWenLMHeadModel", ),
         pytest.param(
-            "Qwen/Qwen-7B",  # qwen (text-only)
-        ),
-        pytest.param(
-            "Qwen/Qwen2.5-0.5B-Instruct",  # qwen2
+            "Qwen2ForCausalLM",
             marks=[pytest.mark.core_model],
         ),
-        pytest.param("stabilityai/stablelm-3b-4e1t"),  # stablelm
-        pytest.param("bigcode/starcoder2-3b"),  # starcoder2
+        pytest.param("StableLmForCausalLM"),
+        pytest.param("Starcoder2ForCausalLM"),
         pytest.param(
-            "ehristoforu/Falcon3-MoE-2x7B-Insruct",  # mixtral
+            "MixtralForCausalLM",
             marks=[pytest.mark.cpu_model],
         )
     ])
@@ -89,10 +86,13 @@
 @pytest.mark.parametrize("num_logprobs", [5])
 @pytest.mark.parametrize(
     "use_rocm_aiter", [True, False] if current_platform.is_rocm() else [False])
-def test_models(hf_runner, vllm_runner, example_prompts, model: str,
+def test_models(hf_runner, vllm_runner, example_prompts, model_arch: str,
                 dtype: str, max_tokens: int, num_logprobs: int,
                 use_rocm_aiter: bool, monkeypatch) -> None:
 
+    model_info = HF_EXAMPLE_MODELS.get_hf_info(model_arch)
+    model = model_info.tiny
+
     if model in REQUIRES_V0:
         monkeypatch.setenv("VLLM_USE_V1", "0")
 

From 7b09ab6cfa2bf0b382f04cb2d28e96e85e7cb7a9 Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Fri, 25 Apr 2025 12:33:22 +0200
Subject: [PATCH 5/7] Make it work with frozen dataclasses

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 tests/models/registry.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/tests/models/registry.py b/tests/models/registry.py
index 9b7b9c3749ba..12680bfabf09 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from collections.abc import Mapping, Set
-from dataclasses import dataclass, field
+from dataclasses import asdict, dataclass, field
 from typing import Any, Literal, Optional
 
 import pytest
@@ -18,12 +18,13 @@ class _HfExamplesInfo:
     """Extra models to use for testing this architecture."""
 
     arch: Optional[str] = None
-    """The architecture class name.
-    This is set in `HfExampleModels.__init__`."""
+    """The architecture class name."""
 
     @property
     def tiny(self) -> str:
         """The tiny model to use for testing with this architecture."""
+        if self.arch is None:
+            raise ValueError("Architecture name is not set.")
         return f"hf-tiny-model-private/tiny-random-{self.arch}"
 
     tokenizer: Optional[str] = None
@@ -417,15 +418,17 @@ def check_available_online(
     **_TRANSFORMERS_MODELS,
 }
 
+# Add arch here so it doesn't have to be duplicated in the definitions above
+_EXAMPLE_MODELS = {
+    k: _HfExamplesInfo(arch=k, **asdict(v)) for k, v in _EXAMPLE_MODELS.items()
+}
+
 
 class HfExampleModels:
     def __init__(self, hf_models: Mapping[str, _HfExamplesInfo]) -> None:
         super().__init__()
 
         self.hf_models = hf_models
-        for arch, hf_model in hf_models.items():
-            if hf_model.arch is None:
-                self.hf_models[arch].arch = arch
 
     def get_supported_archs(self) -> Set[str]:
         return self.hf_models.keys()

From 5fe0d00ccd7a5287311bd873b31c65e42e7949ec Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Fri, 25 Apr 2025 12:48:26 +0200
Subject: [PATCH 6/7] Use correct org to get tiny models

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 tests/models/registry.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/models/registry.py b/tests/models/registry.py
index 12680bfabf09..456965e342e0 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -25,7 +25,7 @@ def tiny(self) -> str:
         """The tiny model to use for testing with this architecture."""
         if self.arch is None:
             raise ValueError("Architecture name is not set.")
-        return f"hf-tiny-model-private/tiny-random-{self.arch}"
+        return f"hf-internal-testing/tiny-random-{self.arch}"
 
     tokenizer: Optional[str] = None
     """Set the tokenizer to load for this architecture."""

From 2993dd02279d68633232877789d6d0d4f14e6b5f Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Fri, 25 Apr 2025 15:06:53 +0200
Subject: [PATCH 7/7] Remove tiny models as they're incompatible with vLLM

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 .../models/decoder_only/language/test_models.py |  3 +--
 tests/models/registry.py                        | 17 +----------------
 2 files changed, 2 insertions(+), 18 deletions(-)

diff --git a/tests/models/decoder_only/language/test_models.py b/tests/models/decoder_only/language/test_models.py
index e1f51e7608a5..85714b85e7eb 100644
--- a/tests/models/decoder_only/language/test_models.py
+++ b/tests/models/decoder_only/language/test_models.py
@@ -90,8 +90,7 @@ def test_models(hf_runner, vllm_runner, example_prompts, model_arch: str,
                 dtype: str, max_tokens: int, num_logprobs: int,
                 use_rocm_aiter: bool, monkeypatch) -> None:
 
-    model_info = HF_EXAMPLE_MODELS.get_hf_info(model_arch)
-    model = model_info.tiny
+    model = HF_EXAMPLE_MODELS.get_hf_info(model_arch).default
 
     if model in REQUIRES_V0:
         monkeypatch.setenv("VLLM_USE_V1", "0")
diff --git a/tests/models/registry.py b/tests/models/registry.py
index 456965e342e0..6b1ec64115e3 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from collections.abc import Mapping, Set
-from dataclasses import asdict, dataclass, field
+from dataclasses import dataclass, field
 from typing import Any, Literal, Optional
 
 import pytest
@@ -17,16 +17,6 @@ class _HfExamplesInfo:
     extras: Mapping[str, str] = field(default_factory=dict)
     """Extra models to use for testing this architecture."""
 
-    arch: Optional[str] = None
-    """The architecture class name."""
-
-    @property
-    def tiny(self) -> str:
-        """The tiny model to use for testing with this architecture."""
-        if self.arch is None:
-            raise ValueError("Architecture name is not set.")
-        return f"hf-internal-testing/tiny-random-{self.arch}"
-
     tokenizer: Optional[str] = None
     """Set the tokenizer to load for this architecture."""
 
@@ -418,11 +408,6 @@ def check_available_online(
     **_TRANSFORMERS_MODELS,
 }
 
-# Add arch here so it doesn't have to be duplicated in the definitions above
-_EXAMPLE_MODELS = {
-    k: _HfExamplesInfo(arch=k, **asdict(v)) for k, v in _EXAMPLE_MODELS.items()
-}
-
 
 class HfExampleModels:
     def __init__(self, hf_models: Mapping[str, _HfExamplesInfo]) -> None: