From b2a2ef257bc948774336c323810706c4e7670f10 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Thu, 24 Apr 2025 17:09:01 +0200 Subject: [PATCH 1/7] Bump Transformers to 4.51.3 Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/test.in | 2 +- requirements/test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/test.in b/requirements/test.in index 3be580db0674..c5d2c4cd4c30 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -34,7 +34,7 @@ num2words # required for smolvlm test opencv-python-headless >= 4.11.0 # required for video test datamodel_code_generator # required for minicpm3 test lm-eval[api]==0.4.8 # required for model evaluation test -transformers==4.51.1 +transformers==4.51.3 tokenizers==0.21.1 huggingface-hub[hf_xet]>=0.30.0 # Required for Xet downloads. schemathesis>=3.39.15 # Required for openai schema test. diff --git a/requirements/test.txt b/requirements/test.txt index 6dcd4ff01460..9642a5bfe68d 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -737,7 +737,7 @@ tqdm==4.66.6 # transformers tqdm-multiprocess==0.0.11 # via lm-eval -transformers==4.51.1 +transformers==4.51.3 # via # -r requirements/test.in # genai-perf From 3df8ed6f518d6fb33624880aaedd2a5a9e4265e3 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 25 Apr 2025 12:13:25 +0200 Subject: [PATCH 2/7] Skip ChatGLM after 4.51.1 Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index c15ae3619844..6b1ec64115e3 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -123,7 +123,8 @@ def check_available_online( "BambaForCausalLM": _HfExamplesInfo("ibm-ai-platform/Bamba-9B"), "BloomForCausalLM": _HfExamplesInfo("bigscience/bloomz-1b1"), "ChatGLMModel": _HfExamplesInfo("THUDM/chatglm3-6b", - trust_remote_code=True), + trust_remote_code=True, + max_transformers_version="4.51.1"), "ChatGLMForConditionalGeneration": _HfExamplesInfo("thu-coai/ShieldLM-6B-chatglm3", # noqa: E501 trust_remote_code=True), "CohereForCausalLM": _HfExamplesInfo("CohereForAI/c4ai-command-r-v01", From 5c3c30b7098cfade3ddafbe1db9029feeb75b7d5 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 25 Apr 2025 12:10:35 +0200 Subject: [PATCH 3/7] Add `tiny` property to test registry Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/models/registry.py b/tests/models/registry.py index 6b1ec64115e3..9b7b9c3749ba 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -17,6 +17,15 @@ class _HfExamplesInfo: extras: Mapping[str, str] = field(default_factory=dict) """Extra models to use for testing this architecture.""" + arch: Optional[str] = None + """The architecture class name. + This is set in `HfExampleModels.__init__`.""" + + @property + def tiny(self) -> str: + """The tiny model to use for testing with this architecture.""" + return f"hf-tiny-model-private/tiny-random-{self.arch}" + tokenizer: Optional[str] = None """Set the tokenizer to load for this architecture.""" @@ -414,6 +423,9 @@ def __init__(self, hf_models: Mapping[str, _HfExamplesInfo]) -> None: super().__init__() self.hf_models = hf_models + for arch, hf_model in hf_models.items(): + if hf_model.arch is None: + self.hf_models[arch].arch = arch def get_supported_archs(self) -> Set[str]: return self.hf_models.keys() From dbefac0b4ab095631a7a8b9f966948c4a68ebac1 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 25 Apr 2025 12:14:36 +0200 Subject: [PATCH 4/7] Use tiny models from test registry Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .../decoder_only/language/test_models.py | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/tests/models/decoder_only/language/test_models.py b/tests/models/decoder_only/language/test_models.py index 79fa3fa99773..e1f51e7608a5 100644 --- a/tests/models/decoder_only/language/test_models.py +++ b/tests/models/decoder_only/language/test_models.py @@ -9,6 +9,7 @@ from vllm.platforms import current_platform +from ...registry import HF_EXAMPLE_MODELS from ...utils import check_logprobs_close # These have unsupported head_dim for FA. We do not @@ -33,54 +34,50 @@ # @maybe_test_rocm_aiter @pytest.mark.parametrize( - "model", + "model_arch", [ pytest.param( - "bigscience/bloom-560m", # bloom - testing alibi slopes + "BloomForCausalLM", # testing alibi slopes marks=[pytest.mark.core_model, pytest.mark.cpu_model], ), pytest.param( - "openai-community/gpt2", # gpt2 + "GPT2LMHeadModel", # gpt2 marks=[pytest.mark.core_model, pytest.mark.cpu_model], ), - pytest.param("Milos/slovak-gpt-j-405M"), # gptj - pytest.param("bigcode/tiny_starcoder_py"), # gpt_bigcode - pytest.param("EleutherAI/pythia-70m"), # gpt_neox + pytest.param("GPTJForCausalLM"), + pytest.param("GPTBigCodeForCausalLM"), + pytest.param("GPTNeoXForCausalLM"), pytest.param( - "google/gemma-1.1-2b-it", # gemma + "GemmaForCausalLM", # gemma marks=[pytest.mark.core_model, pytest.mark.cpu_model], ), + pytest.param("GlmForCausalLM"), pytest.param( - "THUDM/chatglm3-6b", # chatglm (text-only) - ), - pytest.param( - "meta-llama/Llama-3.2-1B-Instruct", # llama + "LlamaForCausalLM", marks=[pytest.mark.core_model, pytest.mark.cpu_model], ), pytest.param( - "openbmb/MiniCPM3-4B", + "MiniCPM3ForCausalLM", # fused_moe not supported on CPU marks=[pytest.mark.core_model], ), pytest.param( - "facebook/opt-125m", # opt + "OPTForCausalLM", marks=[pytest.mark.core_model, pytest.mark.cpu_model], ), pytest.param( - "microsoft/phi-2", # phi + "PhiForCausalLM", marks=[pytest.mark.core_model], ), + pytest.param("QWenLMHeadModel", ), pytest.param( - "Qwen/Qwen-7B", # qwen (text-only) - ), - pytest.param( - "Qwen/Qwen2.5-0.5B-Instruct", # qwen2 + "Qwen2ForCausalLM", marks=[pytest.mark.core_model], ), - pytest.param("stabilityai/stablelm-3b-4e1t"), # stablelm - pytest.param("bigcode/starcoder2-3b"), # starcoder2 + pytest.param("StableLmForCausalLM"), + pytest.param("Starcoder2ForCausalLM"), pytest.param( - "ehristoforu/Falcon3-MoE-2x7B-Insruct", # mixtral + "MixtralForCausalLM", marks=[pytest.mark.cpu_model], ) ]) @@ -89,10 +86,13 @@ @pytest.mark.parametrize("num_logprobs", [5]) @pytest.mark.parametrize( "use_rocm_aiter", [True, False] if current_platform.is_rocm() else [False]) -def test_models(hf_runner, vllm_runner, example_prompts, model: str, +def test_models(hf_runner, vllm_runner, example_prompts, model_arch: str, dtype: str, max_tokens: int, num_logprobs: int, use_rocm_aiter: bool, monkeypatch) -> None: + model_info = HF_EXAMPLE_MODELS.get_hf_info(model_arch) + model = model_info.tiny + if model in REQUIRES_V0: monkeypatch.setenv("VLLM_USE_V1", "0") From 7b09ab6cfa2bf0b382f04cb2d28e96e85e7cb7a9 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 25 Apr 2025 12:33:22 +0200 Subject: [PATCH 5/7] Make it work with frozen dataclasses Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index 9b7b9c3749ba..12680bfabf09 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 from collections.abc import Mapping, Set -from dataclasses import dataclass, field +from dataclasses import asdict, dataclass, field from typing import Any, Literal, Optional import pytest @@ -18,12 +18,13 @@ class _HfExamplesInfo: """Extra models to use for testing this architecture.""" arch: Optional[str] = None - """The architecture class name. - This is set in `HfExampleModels.__init__`.""" + """The architecture class name.""" @property def tiny(self) -> str: """The tiny model to use for testing with this architecture.""" + if self.arch is None: + raise ValueError("Architecture name is not set.") return f"hf-tiny-model-private/tiny-random-{self.arch}" tokenizer: Optional[str] = None @@ -417,15 +418,17 @@ def check_available_online( **_TRANSFORMERS_MODELS, } +# Add arch here so it doesn't have to be duplicated in the definitions above +_EXAMPLE_MODELS = { + k: _HfExamplesInfo(arch=k, **asdict(v)) for k, v in _EXAMPLE_MODELS.items() +} + class HfExampleModels: def __init__(self, hf_models: Mapping[str, _HfExamplesInfo]) -> None: super().__init__() self.hf_models = hf_models - for arch, hf_model in hf_models.items(): - if hf_model.arch is None: - self.hf_models[arch].arch = arch def get_supported_archs(self) -> Set[str]: return self.hf_models.keys() From 5fe0d00ccd7a5287311bd873b31c65e42e7949ec Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 25 Apr 2025 12:48:26 +0200 Subject: [PATCH 6/7] Use correct org to get tiny models Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index 12680bfabf09..456965e342e0 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -25,7 +25,7 @@ def tiny(self) -> str: """The tiny model to use for testing with this architecture.""" if self.arch is None: raise ValueError("Architecture name is not set.") - return f"hf-tiny-model-private/tiny-random-{self.arch}" + return f"hf-internal-testing/tiny-random-{self.arch}" tokenizer: Optional[str] = None """Set the tokenizer to load for this architecture.""" From 2993dd02279d68633232877789d6d0d4f14e6b5f Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 25 Apr 2025 15:06:53 +0200 Subject: [PATCH 7/7] Remove tiny models as they're incompatible with vLLM Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .../models/decoder_only/language/test_models.py | 3 +-- tests/models/registry.py | 17 +---------------- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/tests/models/decoder_only/language/test_models.py b/tests/models/decoder_only/language/test_models.py index e1f51e7608a5..85714b85e7eb 100644 --- a/tests/models/decoder_only/language/test_models.py +++ b/tests/models/decoder_only/language/test_models.py @@ -90,8 +90,7 @@ def test_models(hf_runner, vllm_runner, example_prompts, model_arch: str, dtype: str, max_tokens: int, num_logprobs: int, use_rocm_aiter: bool, monkeypatch) -> None: - model_info = HF_EXAMPLE_MODELS.get_hf_info(model_arch) - model = model_info.tiny + model = HF_EXAMPLE_MODELS.get_hf_info(model_arch).default if model in REQUIRES_V0: monkeypatch.setenv("VLLM_USE_V1", "0") diff --git a/tests/models/registry.py b/tests/models/registry.py index 456965e342e0..6b1ec64115e3 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 from collections.abc import Mapping, Set -from dataclasses import asdict, dataclass, field +from dataclasses import dataclass, field from typing import Any, Literal, Optional import pytest @@ -17,16 +17,6 @@ class _HfExamplesInfo: extras: Mapping[str, str] = field(default_factory=dict) """Extra models to use for testing this architecture.""" - arch: Optional[str] = None - """The architecture class name.""" - - @property - def tiny(self) -> str: - """The tiny model to use for testing with this architecture.""" - if self.arch is None: - raise ValueError("Architecture name is not set.") - return f"hf-internal-testing/tiny-random-{self.arch}" - tokenizer: Optional[str] = None """Set the tokenizer to load for this architecture.""" @@ -418,11 +408,6 @@ def check_available_online( **_TRANSFORMERS_MODELS, } -# Add arch here so it doesn't have to be duplicated in the definitions above -_EXAMPLE_MODELS = { - k: _HfExamplesInfo(arch=k, **asdict(v)) for k, v in _EXAMPLE_MODELS.items() -} - class HfExampleModels: def __init__(self, hf_models: Mapping[str, _HfExamplesInfo]) -> None: