diff --git a/tests/models/language/pooling/test_embedding.py b/tests/models/language/pooling/test_embedding.py index 51283dc630c2..2dd35c415158 100644 --- a/tests/models/language/pooling/test_embedding.py +++ b/tests/models/language/pooling/test_embedding.py @@ -7,7 +7,7 @@ from vllm.config import PoolerConfig from vllm.platforms import current_platform -from ...utils import check_embeddings_close +from ...utils import check_embeddings_close, check_transformers_version @pytest.fixture(autouse=True) @@ -56,6 +56,9 @@ def test_models( model, monkeypatch, ) -> None: + if model == "Alibaba-NLP/gte-Qwen2-1.5B-instruct": + check_transformers_version(model, max_transformers_version="4.53.2") + if model == "BAAI/bge-multilingual-gemma2" and current_platform.is_rocm(): # ROCm Triton FA does not currently support sliding window attention # switch to use ROCm CK FA backend diff --git a/tests/models/language/pooling/test_gte.py b/tests/models/language/pooling/test_gte.py index 6d2eff709961..48a0cd64fec1 100644 --- a/tests/models/language/pooling/test_gte.py +++ b/tests/models/language/pooling/test_gte.py @@ -4,6 +4,7 @@ import pytest +from ...utils import check_transformers_version from .embed_utils import EmbedModelInfo, correctness_test_embed_models from .mteb_utils import mteb_test_embed_models @@ -60,6 +61,10 @@ @pytest.mark.parametrize("model_info", MODELS) def test_embed_models_mteb(hf_runner, vllm_runner, model_info: EmbedModelInfo) -> None: + if model_info.name == "Alibaba-NLP/gte-Qwen2-1.5B-instruct": + check_transformers_version(model_info.name, + max_transformers_version="4.53.2") + vllm_extra_kwargs: dict[str, Any] = {} if model_info.architecture == "GteNewModel": vllm_extra_kwargs["hf_overrides"] = {"architectures": ["GteNewModel"]} @@ -72,6 +77,10 @@ def test_embed_models_mteb(hf_runner, vllm_runner, def test_embed_models_correctness(hf_runner, vllm_runner, model_info: EmbedModelInfo, example_prompts) -> None: + if model_info.name == "Alibaba-NLP/gte-Qwen2-1.5B-instruct": + check_transformers_version(model_info.name, + max_transformers_version="4.53.2") + vllm_extra_kwargs: dict[str, Any] = {} if model_info.architecture == "GteNewModel": vllm_extra_kwargs["hf_overrides"] = {"architectures": ["GteNewModel"]} diff --git a/tests/models/language/pooling/test_reward.py b/tests/models/language/pooling/test_reward.py index 7add1d975c63..beafa0aed986 100644 --- a/tests/models/language/pooling/test_reward.py +++ b/tests/models/language/pooling/test_reward.py @@ -10,6 +10,7 @@ from vllm.platforms import current_platform from ....conftest import HfRunner +from ...utils import check_transformers_version @pytest.fixture(autouse=True) @@ -86,6 +87,9 @@ def test_prm_models( dtype: str, monkeypatch, ) -> None: + check_transformers_version("Qwen/Qwen2.5-Math-PRM-7B", + max_transformers_version="4.53.2") + if current_platform.is_cpu() and os.environ.get("VLLM_USE_V1", "0") == "0": pytest.skip("CPU only supports V1") diff --git a/tests/models/utils.py b/tests/models/utils.py index 1513db52209e..4657df60b169 100644 --- a/tests/models/utils.py +++ b/tests/models/utils.py @@ -412,3 +412,14 @@ def dummy_hf_overrides( }) return hf_config + + +def check_transformers_version(model: str, + min_transformers_version: Optional[str] = None, + max_transformers_version: Optional[str] = None): + from .registry import _HfExamplesInfo + + return _HfExamplesInfo(model, + min_transformers_version=min_transformers_version, + max_transformers_version=max_transformers_version + ).check_transformers_version(on_fail="skip")