Apply fixes from vllm-project#16076

DarkLight1337 · DarkLight1337 · commit d76eac1ac213 · 2025-04-07T08:57:23.000Z
Signed-off-by: DarkLight1337 &lt;tlleungac@connect.ust.hk&gt;
diff --git a/examples/offline_inference/audio_language.py b/examples/offline_inference/audio_language.py
@@ -47,7 +47,7 @@ def run_minicpmo(question: str, audio_count: int) -> ModelRequestData:
         model=model_name,
         trust_remote_code=True,
         max_model_len=4096,
-        max_num_seqs=5,
+        max_num_seqs=2,
         limit_mm_per_prompt={"audio": audio_count},
     )
 
diff --git a/tests/entrypoints/openai/test_audio.py b/tests/entrypoints/openai/test_audio.py
@@ -26,7 +26,10 @@ def server():
         "--trust-remote-code",
     ]
 
-    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
+    with RemoteOpenAIServer(MODEL_NAME,
+                            args,
+                            env_dict={"VLLM_AUDIO_FETCH_TIMEOUT":
+                                      "30"}) as remote_server:
         yield remote_server
 
 
diff --git a/tests/models/decoder_only/vision_language/test_models.py b/tests/models/decoder_only/vision_language/test_models.py
@@ -160,17 +160,32 @@
     ),
     "aya_vision": VLMTestInfo(
         models=["CohereForAI/aya-vision-8b"],
-        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
+        test_type=(VLMTestType.IMAGE),
         prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", # noqa: E501
         single_image_prompts=IMAGE_ASSETS.prompts({
             "stop_sign": "<image>What's the content in the center of the image?",  # noqa: E501
             "cherry_blossom": "<image>What is the season?",  # noqa: E501
         }),
         multi_image_prompt="<image><image>Describe the two images in detail.",  # noqa: E501
-        max_model_len=8192,
+        max_model_len=4096,
         max_num_seqs=2,
         auto_cls=AutoModelForImageTextToText,
-        vllm_runner_kwargs={"mm_processor_kwargs": {"crop_to_patches": True}}
+        vllm_runner_kwargs={"mm_processor_kwargs": {"crop_to_patches": True}},
+    ),
+    "aya_vision-multi_image": VLMTestInfo(
+        models=["CohereForAI/aya-vision-8b"],
+        test_type=(VLMTestType.MULTI_IMAGE),
+        prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", # noqa: E501
+        single_image_prompts=IMAGE_ASSETS.prompts({
+            "stop_sign": "<image>What's the content in the center of the image?",  # noqa: E501
+            "cherry_blossom": "<image>What is the season?",  # noqa: E501
+        }),
+        multi_image_prompt="<image><image>Describe the two images in detail.",  # noqa: E501
+        max_model_len=4096,
+        max_num_seqs=2,
+        auto_cls=AutoModelForImageTextToText,
+        vllm_runner_kwargs={"mm_processor_kwargs": {"crop_to_patches": True}},
+        marks=[large_gpu_mark(min_gb=32)],
     ),
     "blip2": VLMTestInfo(
         # TODO: Change back to 2.7b once head_dim = 80 is supported
diff --git a/tests/models/decoder_only/vision_language/test_phi3v.py b/tests/models/decoder_only/vision_language/test_phi3v.py
@@ -5,7 +5,9 @@
 from typing import Optional
 
 import pytest
+from packaging.version import Version
 from transformers import AutoTokenizer
+from transformers import __version__ as TRANSFORMERS_VERSION
 
 from vllm.multimodal.image import rescale_image_size
 from vllm.platforms import current_platform
@@ -81,6 +83,13 @@ def run_test(
     from transformers import AutoImageProcessor  # noqa: F401
     from transformers import AutoProcessor  # noqa: F401
 
+    # Once the model repo is updated to 4.49, we should be able to run the
+    # test in `test_models.py` without the above workaround
+    if Version(TRANSFORMERS_VERSION) >= Version("4.49"):
+        pytest.skip(f"`transformers=={TRANSFORMERS_VERSION}` installed, "
+                    "but `transformers<=4.49` is required to run this model. "
+                    "Reason: Cannot run HF implementation")
+
     # NOTE: take care of the order. run vLLM first, and then run HF.
     # vLLM needs a fresh new process without cuda initialization.
     # if we run HF first, the cuda initialization will be done and it
diff --git a/tests/models/decoder_only/vision_language/test_pixtral.py b/tests/models/decoder_only/vision_language/test_pixtral.py
@@ -176,6 +176,8 @@ def test_chat(
             model,
             dtype=dtype,
             tokenizer_mode="mistral",
+            load_format="mistral",
+            config_format="mistral",
             max_model_len=max_model_len,
             limit_mm_per_prompt=LIMIT_MM_PER_PROMPT,
     ) as vllm_model:
diff --git a/tests/models/registry.py b/tests/models/registry.py
@@ -277,7 +277,9 @@ def check_available_online(
                                         trust_remote_code=True,
                                         hf_overrides={"architectures": ["GLM4VForCausalLM"]}),  # noqa: E501
     "H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m",
-                                      extras={"2b": "h2oai/h2ovl-mississippi-2b"}),  # noqa: E501
+                                      extras={"2b": "h2oai/h2ovl-mississippi-2b"},  # noqa: E501
+                                      max_transformers_version="4.48",  # noqa: E501
+                                      transformers_version_reason="HF model is not compatible."),  # noqa: E501
     "InternVLChatModel": _HfExamplesInfo("OpenGVLab/InternVL2-1B",
                                          extras={"2B": "OpenGVLab/InternVL2-2B"},  # noqa: E501
                                          trust_remote_code=True),

Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,7 @@ def run_minicpmo(question: str, audio_count: int) -> ModelRequestData:`
`47`	`47`	`model=model_name,`
`48`	`48`	`trust_remote_code=True,`
`49`	`49`	`max_model_len=4096,`
`50`		`- max_num_seqs=5,`
	`50`	`+ max_num_seqs=2,`
`51`	`51`	`limit_mm_per_prompt={"audio": audio_count},`
`52`	`52`	`)`
`53`	`53`
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,10 @@ def server():`
`26`	`26`	`"--trust-remote-code",`
`27`	`27`	`]`
`28`	`28`
`29`		`- with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:`
	`29`	`+ with RemoteOpenAIServer(MODEL_NAME,`
	`30`	`+ args,`
	`31`	`+ env_dict={"VLLM_AUDIO_FETCH_TIMEOUT":`
	`32`	`+ "30"}) as remote_server:`
`30`	`33`	`yield remote_server`
`31`	`34`
`32`	`35`