From 3cc7f37c0a5d0c7ff65efac4411aea57efba7ee0 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 26 Feb 2025 16:33:58 +0100 Subject: [PATCH 01/12] Upgrade `transformers` to `v4.49.0` Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- docs/source/models/supported_models.md | 4 --- requirements-common.txt | 2 +- requirements-test.in | 2 +- requirements-test.txt | 2 +- .../vision_language/test_models.py | 36 +++++++++---------- 5 files changed, 20 insertions(+), 26 deletions(-) diff --git a/docs/source/models/supported_models.md b/docs/source/models/supported_models.md index 9959f7233e86..5a11cdc1a023 100644 --- a/docs/source/models/supported_models.md +++ b/docs/source/models/supported_models.md @@ -898,10 +898,6 @@ Currently the PaliGemma model series is implemented without PrefixLM attention m `mistral-community/pixtral-12b` does not support V1 yet. ::: -:::{note} -To use Qwen2.5-VL series models, you have to install Huggingface `transformers` library from source via `pip install git+https://github.com/huggingface/transformers`. -::: - ### Pooling Models See [this page](pooling-models) for more information on how to use pooling models. diff --git a/requirements-common.txt b/requirements-common.txt index 942c3e039eaf..75d451eb5be5 100644 --- a/requirements-common.txt +++ b/requirements-common.txt @@ -6,7 +6,7 @@ requests >= 2.26.0 tqdm blake3 py-cpuinfo -transformers >= 4.48.2 # Required for Bamba model and Transformers backend. +transformers >= 4.49.0 tokenizers >= 0.19.1 # Required for Llama 3. protobuf # Required by LlamaTokenizer. fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint. diff --git a/requirements-test.in b/requirements-test.in index 53c531360d87..ac4cc64eb3c6 100644 --- a/requirements-test.in +++ b/requirements-test.in @@ -28,7 +28,7 @@ matplotlib # required for qwen-vl test mistral_common[opencv] >= 1.5.0 # required for pixtral test datamodel_code_generator # required for minicpm3 test lm-eval[api]==0.4.4 # required for model evaluation test -transformers==4.48.2 +transformers==4.49.0 # quantization bitsandbytes>=0.45.0 buildkite-test-collector==0.1.9 diff --git a/requirements-test.txt b/requirements-test.txt index 11f0e10969a6..19b5776b60de 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -628,7 +628,7 @@ tqdm==4.66.6 # transformers tqdm-multiprocess==0.0.11 # via lm-eval -transformers==4.48.2 +transformers==4.49.0 # via # -r requirements-test.in # genai-perf diff --git a/tests/models/decoder_only/vision_language/test_models.py b/tests/models/decoder_only/vision_language/test_models.py index 2c66edb539dc..8484576e533e 100644 --- a/tests/models/decoder_only/vision_language/test_models.py +++ b/tests/models/decoder_only/vision_language/test_models.py @@ -121,25 +121,6 @@ else ("half", "float")), marks=[pytest.mark.core_model], ), - # TODO(ywang96): Move Qwen2-VL out of core models in favor of Qwen2.5-VL - # once we upgraded to transformers>=4.49.0. - "qwen2_vl": VLMTestInfo( - models=["Qwen/Qwen2-VL-2B-Instruct"], - test_type=( - VLMTestType.IMAGE, - VLMTestType.MULTI_IMAGE, - VLMTestType.VIDEO - ), - prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501 - img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501 - video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501 - max_model_len=4096, - max_num_seqs=2, - auto_cls=AutoModelForVision2Seq, - vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output, - image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)], - marks=[pytest.mark.core_model, pytest.mark.cpu_model], - ), "qwen2_5_vl": VLMTestInfo( models=["Qwen/Qwen2.5-VL-3B-Instruct"], test_type=( @@ -442,6 +423,23 @@ vllm_output_post_proc=model_utils.qwen_vllm_to_hf_output, prompt_path_encoder=model_utils.qwen_prompt_path_encoder, ), + "qwen2_vl": VLMTestInfo( + models=["Qwen/Qwen2-VL-2B-Instruct"], + test_type=( + VLMTestType.IMAGE, + VLMTestType.MULTI_IMAGE, + VLMTestType.VIDEO + ), + prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501 + img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501 + video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501 + max_model_len=4096, + max_num_seqs=2, + auto_cls=AutoModelForVision2Seq, + vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output, + image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)], + marks=[pytest.mark.cpu_model], + ), ### Tensor parallel / multi-gpu broadcast tests "chameleon-broadcast": VLMTestInfo( models=["facebook/chameleon-7b"], From 31c8275531b446991664d5bc135167338b5ea988 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 26 Feb 2025 18:08:30 +0100 Subject: [PATCH 02/12] Ensure `cv2` import is lazy Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/model_executor/models/glm4v.py | 4 +++- vllm/model_executor/models/molmo.py | 4 +++- vllm/model_executor/models/qwen_vl.py | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/models/glm4v.py b/vllm/model_executor/models/glm4v.py index 48543c5642ea..aa3bc5eecd7b 100644 --- a/vllm/model_executor/models/glm4v.py +++ b/vllm/model_executor/models/glm4v.py @@ -12,7 +12,6 @@ from torchvision import transforms from torchvision.transforms import InterpolationMode from transformers import PreTrainedTokenizer, TensorType -from transformers.image_utils import ImageInput from transformers.tokenization_utils_base import TextInput from vllm.attention.layer import MultiHeadAttention @@ -355,6 +354,9 @@ class GLM4VProcessor: This model doesn't define its own HF processor, so we implement our own one here. """ + # lazy import ImageInput because huggingface/transformers#34275 adds a + # non-lazy cv2 import. TODO: move this back if this is fixed + from transformers.image_utils import ImageInput def __init__( self, diff --git a/vllm/model_executor/models/molmo.py b/vllm/model_executor/models/molmo.py index cc4d38d8740b..12b1e321f7f6 100644 --- a/vllm/model_executor/models/molmo.py +++ b/vllm/model_executor/models/molmo.py @@ -13,7 +13,6 @@ from einops import rearrange from transformers import (BatchFeature, PretrainedConfig, ProcessorMixin, TensorType) -from transformers.image_utils import ImageInput from transformers.tokenization_utils_base import TextInput from vllm.attention import Attention @@ -978,6 +977,9 @@ class MolmoProcessorWrapper: The original definition can be found here: https://huggingface.co/allenai/Molmo-7B-D-0924/blob/main/preprocessing_molmo.py """ + # lazy import ImageInput because huggingface/transformers#34275 adds a + # non-lazy cv2 import. TODO: move this back if this is fixed + from transformers.image_utils import ImageInput def __init__(self, processor: ProcessorMixin): super().__init__() diff --git a/vllm/model_executor/models/qwen_vl.py b/vllm/model_executor/models/qwen_vl.py index e0d8bf2fa3d2..980c3a869ece 100644 --- a/vllm/model_executor/models/qwen_vl.py +++ b/vllm/model_executor/models/qwen_vl.py @@ -19,7 +19,6 @@ from torchvision.transforms import InterpolationMode from transformers import (BatchFeature, PretrainedConfig, PreTrainedTokenizer, TensorType) -from transformers.image_utils import ImageInput from transformers.tokenization_utils_base import TextInput from vllm.config import VllmConfig @@ -440,6 +439,9 @@ class QwenVLProcessor: The image processor is defined here: https://huggingface.co/Qwen/Qwen-VL/blob/main/visual.py#L354 """ + # lazy import ImageInput because huggingface/transformers#34275 adds a + # non-lazy cv2 import. TODO: move this back if this is fixed + from transformers.image_utils import ImageInput def __init__( self, From d41d81df2af1c7f904983772705812ef93a17a7e Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Thu, 27 Feb 2025 12:14:52 +0100 Subject: [PATCH 03/12] Update LMFE Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements-common.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-common.txt b/requirements-common.txt index 75d451eb5be5..1e4873eb4c88 100644 --- a/requirements-common.txt +++ b/requirements-common.txt @@ -17,7 +17,7 @@ prometheus_client >= 0.18.0 pillow # Required for image processing prometheus-fastapi-instrumentator >= 7.0.0 tiktoken >= 0.6.0 # Required for DBRX tokenizer -lm-format-enforcer >= 0.10.9, < 0.11 +lm-format-enforcer >= 0.10.11, < 0.11 outlines == 0.1.11 lark == 1.2.2 xgrammar == 0.1.11; platform_machine == "x86_64" From 4eee5c90d15c7e7b466a7595285ad9b139b1e48f Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Thu, 27 Feb 2025 12:44:50 +0100 Subject: [PATCH 04/12] Revert `ImageInput` lazt import because it doesn't work Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/model_executor/models/glm4v.py | 4 +--- vllm/model_executor/models/molmo.py | 4 +--- vllm/model_executor/models/qwen_vl.py | 4 +--- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/vllm/model_executor/models/glm4v.py b/vllm/model_executor/models/glm4v.py index aa3bc5eecd7b..48543c5642ea 100644 --- a/vllm/model_executor/models/glm4v.py +++ b/vllm/model_executor/models/glm4v.py @@ -12,6 +12,7 @@ from torchvision import transforms from torchvision.transforms import InterpolationMode from transformers import PreTrainedTokenizer, TensorType +from transformers.image_utils import ImageInput from transformers.tokenization_utils_base import TextInput from vllm.attention.layer import MultiHeadAttention @@ -354,9 +355,6 @@ class GLM4VProcessor: This model doesn't define its own HF processor, so we implement our own one here. """ - # lazy import ImageInput because huggingface/transformers#34275 adds a - # non-lazy cv2 import. TODO: move this back if this is fixed - from transformers.image_utils import ImageInput def __init__( self, diff --git a/vllm/model_executor/models/molmo.py b/vllm/model_executor/models/molmo.py index 12b1e321f7f6..cc4d38d8740b 100644 --- a/vllm/model_executor/models/molmo.py +++ b/vllm/model_executor/models/molmo.py @@ -13,6 +13,7 @@ from einops import rearrange from transformers import (BatchFeature, PretrainedConfig, ProcessorMixin, TensorType) +from transformers.image_utils import ImageInput from transformers.tokenization_utils_base import TextInput from vllm.attention import Attention @@ -977,9 +978,6 @@ class MolmoProcessorWrapper: The original definition can be found here: https://huggingface.co/allenai/Molmo-7B-D-0924/blob/main/preprocessing_molmo.py """ - # lazy import ImageInput because huggingface/transformers#34275 adds a - # non-lazy cv2 import. TODO: move this back if this is fixed - from transformers.image_utils import ImageInput def __init__(self, processor: ProcessorMixin): super().__init__() diff --git a/vllm/model_executor/models/qwen_vl.py b/vllm/model_executor/models/qwen_vl.py index 980c3a869ece..e0d8bf2fa3d2 100644 --- a/vllm/model_executor/models/qwen_vl.py +++ b/vllm/model_executor/models/qwen_vl.py @@ -19,6 +19,7 @@ from torchvision.transforms import InterpolationMode from transformers import (BatchFeature, PretrainedConfig, PreTrainedTokenizer, TensorType) +from transformers.image_utils import ImageInput from transformers.tokenization_utils_base import TextInput from vllm.config import VllmConfig @@ -439,9 +440,6 @@ class QwenVLProcessor: The image processor is defined here: https://huggingface.co/Qwen/Qwen-VL/blob/main/visual.py#L354 """ - # lazy import ImageInput because huggingface/transformers#34275 adds a - # non-lazy cv2 import. TODO: move this back if this is fixed - from transformers.image_utils import ImageInput def __init__( self, From 7812ca111e3bb8afaec09ce913765511a6cb2c1e Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 21 Mar 2025 14:43:09 +0100 Subject: [PATCH 05/12] Update pin to 4.50.0 Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/common.txt | 2 +- requirements/test.in | 2 +- requirements/test.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements/common.txt b/requirements/common.txt index 328720517ae9..dbd59803650e 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -5,7 +5,7 @@ requests >= 2.26.0 tqdm blake3 py-cpuinfo -transformers >= 4.49.0 +transformers >= 4.50.0 tokenizers >= 0.19.1 # Required for Llama 3. protobuf # Required by LlamaTokenizer. fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint. diff --git a/requirements/test.in b/requirements/test.in index 38887bf45b66..a45223f1d62f 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -28,7 +28,7 @@ matplotlib # required for qwen-vl test mistral_common[opencv] >= 1.5.0 # required for pixtral test datamodel_code_generator # required for minicpm3 test lm-eval[api]==0.4.4 # required for model evaluation test -transformers==4.49.0 +transformers==4.50.0 # quantization bitsandbytes>=0.45.0 buildkite-test-collector==0.1.9 diff --git a/requirements/test.txt b/requirements/test.txt index c6537072967c..bfcf63402777 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -628,7 +628,7 @@ tqdm==4.66.6 # transformers tqdm-multiprocess==0.0.11 # via lm-eval -transformers==4.49.0 +transformers==4.50.0 # via # -r requirements/test.in # genai-perf From 4c0d7f4807ec10ea6f78435c8ee5e58af3b8ab82 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 25 Mar 2025 11:55:53 +0100 Subject: [PATCH 06/12] Uncomment pipeline parallel test Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- docs/source/models/supported_models.md | 2 +- tests/distributed/test_pipeline_parallel.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/models/supported_models.md b/docs/source/models/supported_models.md index 56ea8c5d8372..79d2e7ad2b8b 100644 --- a/docs/source/models/supported_models.md +++ b/docs/source/models/supported_models.md @@ -73,7 +73,7 @@ The Transformers fallback explicitly supports the following features: - (except GGUF) - -- (requires `transformers>=4.49.0`) +- #### Remote code diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py index e757db45c8cf..bd7b37763f41 100644 --- a/tests/distributed/test_pipeline_parallel.py +++ b/tests/distributed/test_pipeline_parallel.py @@ -245,7 +245,7 @@ def iter_params(self, model_id: str): # [LANGUAGE GENERATION] "microsoft/Phi-3.5-MoE-instruct", "meta-llama/Llama-3.2-1B-Instruct", - # "ArthurZ/Ilama-3.2-1B", NOTE: Uncomment after #13905 + "ArthurZ/Ilama-3.2-1B", "ibm/PowerLM-3b", # [LANGUAGE EMBEDDING] "intfloat/e5-mistral-7b-instruct", From 3df5bd687dbfd2f18af5b05625e8241912bb4103 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 25 Mar 2025 18:15:22 +0100 Subject: [PATCH 07/12] Bump patch version Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/common.txt | 2 +- requirements/test.in | 2 +- requirements/test.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements/common.txt b/requirements/common.txt index a95b5d130184..bbbee7d498c6 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -6,7 +6,7 @@ requests >= 2.26.0 tqdm blake3 py-cpuinfo -transformers >= 4.50.0 +transformers >= 4.50.1 tokenizers >= 0.19.1 # Required for Llama 3. protobuf # Required by LlamaTokenizer. fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint. diff --git a/requirements/test.in b/requirements/test.in index 86be52e2addd..2fb750de7e00 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -30,7 +30,7 @@ matplotlib # required for qwen-vl test mistral_common[opencv] >= 1.5.4 # required for pixtral test datamodel_code_generator # required for minicpm3 test lm-eval[api]==0.4.4 # required for model evaluation test -transformers==4.50.0 +transformers==4.50.1 # quantization bitsandbytes>=0.45.3 buildkite-test-collector==0.1.9 diff --git a/requirements/test.txt b/requirements/test.txt index c21b49e9f0f7..a484ed70e9f4 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -641,7 +641,7 @@ tqdm==4.66.6 # transformers tqdm-multiprocess==0.0.11 # via lm-eval -transformers==4.50.0 +transformers==4.50.1 # via # -r requirements/test.in # genai-perf From da3666eb36cbe739122a4d5a3a9b2eddb09ad3ed Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 26 Mar 2025 19:39:57 +0100 Subject: [PATCH 08/12] Add `max_transformers_version` to test example info Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/registry.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index 5c84e85aaa90..459a2993f9c7 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -34,6 +34,11 @@ class _HfExamplesInfo: The minimum version of HF Transformers that is required to run this model. """ + max_transformers_version: Optional[str] = None + """ + The maximum version of HF Transformers that this model runs on. + """ + is_available_online: bool = True """ Set this to ``False`` if the name of this architecture no longer exists on @@ -57,21 +62,30 @@ def check_transformers_version( If the installed transformers version does not meet the requirements, perform the given action. """ - if self.min_transformers_version is None: + if (self.min_transformers_version is None + and self.max_transformers_version is None): return current_version = TRANSFORMERS_VERSION - required_version = self.min_transformers_version - if Version(current_version) < Version(required_version): + min_version = self.min_transformers_version + max_version = self.max_transformers_version + if min_version and Version(current_version) < Version(min_version): + msg = ( + f"You have `transformers=={current_version}` installed, but " + f"`transformers>={min_version}` is required to run this " + "model") + elif max_version and Version(current_version) > Version(max_version): msg = ( f"You have `transformers=={current_version}` installed, but " - f"`transformers>={required_version}` is required to run this " + f"`transformers<={max_version}` is required to run this " "model") + else: + return - if on_fail == "error": - raise RuntimeError(msg) - else: - pytest.skip(msg) + if on_fail == "error": + raise RuntimeError(msg) + else: + pytest.skip(msg) def check_available_online( self, @@ -268,11 +282,13 @@ def check_available_online( "MantisForConditionalGeneration": _HfExamplesInfo("TIGER-Lab/Mantis-8B-siglip-llama3", # noqa: E501 hf_overrides={"architectures": ["MantisForConditionalGeneration"]}), # noqa: E501 "MiniCPMO": _HfExamplesInfo("openbmb/MiniCPM-o-2_6", + max_transformers_version="4.48", trust_remote_code=True), "MiniCPMV": _HfExamplesInfo("openbmb/MiniCPM-Llama3-V-2_5", extras={"2.6": "openbmb/MiniCPM-V-2_6"}, # noqa: E501 trust_remote_code=True), "MolmoForCausalLM": _HfExamplesInfo("allenai/Molmo-7B-D-0924", + max_transformers_version="4.48", extras={"olmo": "allenai/Molmo-7B-O-0924"}, # noqa: E501 trust_remote_code=True), "NVLM_D": _HfExamplesInfo("nvidia/NVLM-D-72B", From 821bdd86ea2a1a9a62e07da36e4a5599935d3c09 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 26 Mar 2025 23:45:39 +0100 Subject: [PATCH 09/12] Llava onevision workaround Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/models/decoder_only/vision_language/test_models.py | 2 ++ .../decoder_only/vision_language/vlm_utils/model_utils.py | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/tests/models/decoder_only/vision_language/test_models.py b/tests/models/decoder_only/vision_language/test_models.py index 4caae8a46dab..1eda6d451365 100644 --- a/tests/models/decoder_only/vision_language/test_models.py +++ b/tests/models/decoder_only/vision_language/test_models.py @@ -317,6 +317,7 @@ prompt_formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501 num_video_frames=16, max_model_len=16384, + hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs("llava-hf/llava-onevision-qwen2-0.5b-ov-hf"), # noqa: E501 auto_cls=AutoModelForVision2Seq, vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output, custom_test_opts=[CustomTestOptions( @@ -523,6 +524,7 @@ max_model_len=16384, max_num_seqs=2, auto_cls=AutoModelForVision2Seq, + hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs("llava-hf/llava-onevision-qwen2-0.5b-ov-hf"), # noqa: E501 vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output, custom_test_opts=[CustomTestOptions( inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs( diff --git a/tests/models/decoder_only/vision_language/vlm_utils/model_utils.py b/tests/models/decoder_only/vision_language/vlm_utils/model_utils.py index c84bf6dc15f4..aa19523e6516 100644 --- a/tests/models/decoder_only/vision_language/vlm_utils/model_utils.py +++ b/tests/models/decoder_only/vision_language/vlm_utils/model_utils.py @@ -104,6 +104,13 @@ def _llava_vllm_to_hf_output(vllm_output: RunnerOutput, model: str, return hf_output_ids, hf_output_str, out_logprobs +def llava_onevision_hf_model_kwargs(model: str) -> dict: + """Workaround to fix the sliding window issue in llava_onevision.""" + config = AutoConfig.from_pretrained(model) + config.text_config.sliding_window = None + return config.to_dict() + + def llava_onevision_vllm_to_hf_output(vllm_output: RunnerOutput, model: str) -> RunnerOutput: """Sanitize vllm output [llava-onevision] to compare with hf output.""" From 495cdb67a6f5dd3fa19c9f1e9384c66f515a8c9f Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Thu, 27 Mar 2025 11:06:23 +0100 Subject: [PATCH 10/12] Update to next transformers pin Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/common.txt | 2 +- requirements/test.in | 2 +- requirements/test.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements/common.txt b/requirements/common.txt index bbbee7d498c6..ed6106179ae0 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -6,7 +6,7 @@ requests >= 2.26.0 tqdm blake3 py-cpuinfo -transformers >= 4.50.1 +transformers >= 4.50.2 tokenizers >= 0.19.1 # Required for Llama 3. protobuf # Required by LlamaTokenizer. fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint. diff --git a/requirements/test.in b/requirements/test.in index 2fb750de7e00..8c8ce6e63692 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -30,7 +30,7 @@ matplotlib # required for qwen-vl test mistral_common[opencv] >= 1.5.4 # required for pixtral test datamodel_code_generator # required for minicpm3 test lm-eval[api]==0.4.4 # required for model evaluation test -transformers==4.50.1 +transformers==4.50.2 # quantization bitsandbytes>=0.45.3 buildkite-test-collector==0.1.9 diff --git a/requirements/test.txt b/requirements/test.txt index a484ed70e9f4..dc2354958d20 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -641,7 +641,7 @@ tqdm==4.66.6 # transformers tqdm-multiprocess==0.0.11 # via lm-eval -transformers==4.50.1 +transformers==4.50.2 # via # -r requirements/test.in # genai-perf From 80feca073fbae4db5833bde5edb34abdbfe24a5e Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Thu, 27 Mar 2025 11:35:04 +0100 Subject: [PATCH 11/12] Add `transformers_version_reason` Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .../vision_language/test_models.py | 14 ---------- tests/models/registry.py | 26 +++++++++++++------ 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/tests/models/decoder_only/vision_language/test_models.py b/tests/models/decoder_only/vision_language/test_models.py index 1eda6d451365..21ec1853b934 100644 --- a/tests/models/decoder_only/vision_language/test_models.py +++ b/tests/models/decoder_only/vision_language/test_models.py @@ -8,9 +8,7 @@ from pathlib import PosixPath import pytest -from packaging.version import Version from transformers import AutoModelForImageTextToText, AutoModelForVision2Seq -from transformers import __version__ as TRANSFORMERS_VERSION from vllm.platforms import current_platform from vllm.utils import identity @@ -199,12 +197,6 @@ hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output, stop_str=["<|end▁of▁sentence|>", "<|begin▁of▁sentence|>"], # noqa: E501 image_size_factors=[(), (1.0, ), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)], - marks=[ - pytest.mark.skipif( - Version(TRANSFORMERS_VERSION) >= Version("4.48"), - reason="HF model is not compatible with transformers>=4.48", - ) - ], ), "fuyu": VLMTestInfo( models=["adept/fuyu-8b"], @@ -347,12 +339,6 @@ auto_cls=AutoModelForImageTextToText, vllm_output_post_proc=model_utils.mantis_vllm_to_hf_output, patch_hf_runner=model_utils.mantis_patch_hf_runner, - marks=[ - pytest.mark.skipif( - Version(TRANSFORMERS_VERSION) >= Version("4.48"), - reason="HF model is not compatible with transformers>=4.48", - ) - ], ), "minicpmv_25": VLMTestInfo( models=["openbmb/MiniCPM-Llama3-V-2_5"], diff --git a/tests/models/registry.py b/tests/models/registry.py index 459a2993f9c7..274e41a0f42f 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -39,6 +39,11 @@ class _HfExamplesInfo: The maximum version of HF Transformers that this model runs on. """ + transformers_version_reason: Optional[str] = None + """ + The reason for the minimum/maximum version requirement. + """ + is_available_online: bool = True """ Set this to ``False`` if the name of this architecture no longer exists on @@ -69,19 +74,17 @@ def check_transformers_version( current_version = TRANSFORMERS_VERSION min_version = self.min_transformers_version max_version = self.max_transformers_version + msg = f"`transformers=={current_version}` installed, but `transformers" if min_version and Version(current_version) < Version(min_version): - msg = ( - f"You have `transformers=={current_version}` installed, but " - f"`transformers>={min_version}` is required to run this " - "model") + msg += f">={min_version}` is required to run this model." elif max_version and Version(current_version) > Version(max_version): - msg = ( - f"You have `transformers=={current_version}` installed, but " - f"`transformers<={max_version}` is required to run this " - "model") + msg += f"<={max_version}` is required to run this model." else: return + if self.transformers_version_reason: + msg += f" Reason: {self.transformers_version_reason}" + if on_fail == "error": raise RuntimeError(msg) else: @@ -259,6 +262,9 @@ def check_available_online( "Blip2ForConditionalGeneration": _HfExamplesInfo("Salesforce/blip2-opt-2.7b"), # noqa: E501 "ChameleonForConditionalGeneration": _HfExamplesInfo("facebook/chameleon-7b"), # noqa: E501 "DeepseekVLV2ForCausalLM": _HfExamplesInfo("deepseek-ai/deepseek-vl2-tiny", # noqa: E501 + extras={"fork": "Isotr0py/deepseek-vl2-tiny"}, # noqa: E501 + max_transformers_version="4.48", # noqa: E501 + transformers_version_reason="HF model is not compatible.", # noqa: E501 hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]}), # noqa: E501 "FuyuForCausalLM": _HfExamplesInfo("adept/fuyu-8b"), "Gemma3ForConditionalGeneration": _HfExamplesInfo("google/gemma-3-4b-it", @@ -280,15 +286,19 @@ def check_available_online( "LlavaNextVideoForConditionalGeneration": _HfExamplesInfo("llava-hf/LLaVA-NeXT-Video-7B-hf"), # noqa: E501 "LlavaOnevisionForConditionalGeneration": _HfExamplesInfo("llava-hf/llava-onevision-qwen2-0.5b-ov-hf"), # noqa: E501 "MantisForConditionalGeneration": _HfExamplesInfo("TIGER-Lab/Mantis-8B-siglip-llama3", # noqa: E501 + max_transformers_version="4.48", # noqa: E501 + transformers_version_reason="HF model is not compatible.", # noqa: E501 hf_overrides={"architectures": ["MantisForConditionalGeneration"]}), # noqa: E501 "MiniCPMO": _HfExamplesInfo("openbmb/MiniCPM-o-2_6", max_transformers_version="4.48", + transformers_version_reason="Use of deprecated imports which have been removed.", # noqa: E501 trust_remote_code=True), "MiniCPMV": _HfExamplesInfo("openbmb/MiniCPM-Llama3-V-2_5", extras={"2.6": "openbmb/MiniCPM-V-2_6"}, # noqa: E501 trust_remote_code=True), "MolmoForCausalLM": _HfExamplesInfo("allenai/Molmo-7B-D-0924", max_transformers_version="4.48", + transformers_version_reason="Use of private method which no longer exists.", # noqa: E501 extras={"olmo": "allenai/Molmo-7B-O-0924"}, # noqa: E501 trust_remote_code=True), "NVLM_D": _HfExamplesInfo("nvidia/NVLM-D-72B", From 9ef1e0b3158f8ed0dbe7321370f922a4db8ba4ff Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Mon, 31 Mar 2025 12:40:02 +0200 Subject: [PATCH 12/12] Bump transformers pin Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- requirements/common.txt | 2 +- requirements/test.in | 2 +- requirements/test.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements/common.txt b/requirements/common.txt index 3c30ef503072..c7bbdb71b742 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -6,7 +6,7 @@ requests >= 2.26.0 tqdm blake3 py-cpuinfo -transformers >= 4.50.2 +transformers >= 4.50.3 tokenizers >= 0.19.1 # Required for Llama 3. protobuf # Required by LlamaTokenizer. fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint. diff --git a/requirements/test.in b/requirements/test.in index 8d5ad12c7cdc..cf89794b93fc 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -30,7 +30,7 @@ matplotlib # required for qwen-vl test mistral_common[opencv] >= 1.5.4 # required for pixtral test datamodel_code_generator # required for minicpm3 test lm-eval[api]==0.4.4 # required for model evaluation test -transformers==4.50.2 +transformers==4.50.3 # quantization bitsandbytes>=0.45.3 buildkite-test-collector==0.1.9 diff --git a/requirements/test.txt b/requirements/test.txt index d09c9d6dfba2..26ed9dbe32cb 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -643,7 +643,7 @@ tqdm==4.66.6 # transformers tqdm-multiprocess==0.0.11 # via lm-eval -transformers==4.50.2 +transformers==4.50.3 # via # -r requirements/test.in # genai-perf