vllm-project · ywang96 · Feb 20, 2025 · Feb 20, 2025 · Feb 20, 2025 · Feb 20, 2025
diff --git a/docs/source/models/supported_models.md b/docs/source/models/supported_models.md
@@ -893,10 +893,6 @@ Currently the PaliGemma model series is implemented without PrefixLM attention m
 `mistral-community/pixtral-12b` does not support V1 yet.
 :::
 
-:::{note}
-To use Qwen2.5-VL series models, you have to install Huggingface `transformers` library from source via `pip install git+https://github.com/huggingface/transformers`.
-:::
-
 ### Pooling Models
 
 See [this page](pooling-models) for more information on how to use pooling models.

diff --git a/requirements-common.txt b/requirements-common.txt
@@ -6,7 +6,7 @@ requests >= 2.26.0
 tqdm
 blake3
 py-cpuinfo
-transformers >= 4.48.2  # Required for Bamba model and Transformers backend.
+transformers >= 4.49.0  # Required for Qwen2.5-VL
 tokenizers >= 0.19.1  # Required for Llama 3.
 protobuf # Required by LlamaTokenizer.
 fastapi[standard] >= 0.107.0, < 0.113.0; python_version < '3.9'

diff --git a/requirements-test.in b/requirements-test.in
@@ -28,7 +28,7 @@ matplotlib # required for qwen-vl test
 mistral_common[opencv] >= 1.5.0 # required for pixtral test
 datamodel_code_generator # required for minicpm3 test
 lm-eval[api]==0.4.4 # required for model evaluation test
-transformers==4.48.2 
+transformers==4.49.0
 # quantization
 bitsandbytes>=0.45.0
 buildkite-test-collector==0.1.9
@@ -38,4 +38,4 @@ tritonclient==2.51.0
 
 numpy < 2.0.0
 runai-model-streamer==0.11.0
-runai-model-streamer-s3==0.11.0
+runai-model-streamer-s3==0.11.0
diff --git a/requirements-test.txt b/requirements-test.txt
@@ -628,7 +628,7 @@ tqdm==4.66.6
     #   transformers
 tqdm-multiprocess==0.0.11
     # via lm-eval
-transformers==4.48.2
+transformers==4.49.0
     # via
     #   -r requirements-test.in
     #   genai-perf

@@ -121,25 +121,6 @@
                else ("half", "float")),
         marks=[pytest.mark.core_model],
     ),
-    # TODO(ywang96): Move Qwen2-VL out of core models in favor of Qwen2.5-VL
-    # once we upgraded to transformers>=4.49.0.
-    "qwen2_vl": VLMTestInfo(
-        models=["Qwen/Qwen2-VL-2B-Instruct"],
-        test_type=(
-            VLMTestType.IMAGE,
-            VLMTestType.MULTI_IMAGE,
-            VLMTestType.VIDEO
-        ),
-        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
-        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501
-        max_model_len=4096,
-        max_num_seqs=2,
-        auto_cls=AutoModelForVision2Seq,
-        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
-        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
-        marks=[pytest.mark.core_model, pytest.mark.cpu_model],
-    ),
     "qwen2_5_vl": VLMTestInfo(
         models=["Qwen/Qwen2.5-VL-3B-Instruct"],
         test_type=(
@@ -442,6 +423,23 @@
         vllm_output_post_proc=model_utils.qwen_vllm_to_hf_output,
         prompt_path_encoder=model_utils.qwen_prompt_path_encoder,
     ),
+    "qwen2_vl": VLMTestInfo(
+        models=["Qwen/Qwen2-VL-2B-Instruct"],
+        test_type=(
+            VLMTestType.IMAGE,
+            VLMTestType.MULTI_IMAGE,
+            VLMTestType.VIDEO
+        ),
+        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
+        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501
+        max_model_len=4096,
+        max_num_seqs=2,
+        auto_cls=AutoModelForVision2Seq,
+        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
+        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
+        marks=[pytest.mark.cpu_model],
+    ),
     ### Tensor parallel / multi-gpu broadcast tests
     "chameleon-broadcast": VLMTestInfo(
         models=["facebook/chameleon-7b"],