Skip to content

Commit d76eac1

Browse files
committed
Apply fixes from vllm-project#16076
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
1 parent ffff11f commit d76eac1

File tree

6 files changed

+37
-6
lines changed

6 files changed

+37
-6
lines changed

examples/offline_inference/audio_language.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def run_minicpmo(question: str, audio_count: int) -> ModelRequestData:
4747
model=model_name,
4848
trust_remote_code=True,
4949
max_model_len=4096,
50-
max_num_seqs=5,
50+
max_num_seqs=2,
5151
limit_mm_per_prompt={"audio": audio_count},
5252
)
5353

tests/entrypoints/openai/test_audio.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,10 @@ def server():
2626
"--trust-remote-code",
2727
]
2828

29-
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
29+
with RemoteOpenAIServer(MODEL_NAME,
30+
args,
31+
env_dict={"VLLM_AUDIO_FETCH_TIMEOUT":
32+
"30"}) as remote_server:
3033
yield remote_server
3134

3235

tests/models/decoder_only/vision_language/test_models.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,17 +160,32 @@
160160
),
161161
"aya_vision": VLMTestInfo(
162162
models=["CohereForAI/aya-vision-8b"],
163-
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
163+
test_type=(VLMTestType.IMAGE),
164164
prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", # noqa: E501
165165
single_image_prompts=IMAGE_ASSETS.prompts({
166166
"stop_sign": "<image>What's the content in the center of the image?", # noqa: E501
167167
"cherry_blossom": "<image>What is the season?", # noqa: E501
168168
}),
169169
multi_image_prompt="<image><image>Describe the two images in detail.", # noqa: E501
170-
max_model_len=8192,
170+
max_model_len=4096,
171171
max_num_seqs=2,
172172
auto_cls=AutoModelForImageTextToText,
173-
vllm_runner_kwargs={"mm_processor_kwargs": {"crop_to_patches": True}}
173+
vllm_runner_kwargs={"mm_processor_kwargs": {"crop_to_patches": True}},
174+
),
175+
"aya_vision-multi_image": VLMTestInfo(
176+
models=["CohereForAI/aya-vision-8b"],
177+
test_type=(VLMTestType.MULTI_IMAGE),
178+
prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", # noqa: E501
179+
single_image_prompts=IMAGE_ASSETS.prompts({
180+
"stop_sign": "<image>What's the content in the center of the image?", # noqa: E501
181+
"cherry_blossom": "<image>What is the season?", # noqa: E501
182+
}),
183+
multi_image_prompt="<image><image>Describe the two images in detail.", # noqa: E501
184+
max_model_len=4096,
185+
max_num_seqs=2,
186+
auto_cls=AutoModelForImageTextToText,
187+
vllm_runner_kwargs={"mm_processor_kwargs": {"crop_to_patches": True}},
188+
marks=[large_gpu_mark(min_gb=32)],
174189
),
175190
"blip2": VLMTestInfo(
176191
# TODO: Change back to 2.7b once head_dim = 80 is supported

tests/models/decoder_only/vision_language/test_phi3v.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
from typing import Optional
66

77
import pytest
8+
from packaging.version import Version
89
from transformers import AutoTokenizer
10+
from transformers import __version__ as TRANSFORMERS_VERSION
911

1012
from vllm.multimodal.image import rescale_image_size
1113
from vllm.platforms import current_platform
@@ -81,6 +83,13 @@ def run_test(
8183
from transformers import AutoImageProcessor # noqa: F401
8284
from transformers import AutoProcessor # noqa: F401
8385

86+
# Once the model repo is updated to 4.49, we should be able to run the
87+
# test in `test_models.py` without the above workaround
88+
if Version(TRANSFORMERS_VERSION) >= Version("4.49"):
89+
pytest.skip(f"`transformers=={TRANSFORMERS_VERSION}` installed, "
90+
"but `transformers<=4.49` is required to run this model. "
91+
"Reason: Cannot run HF implementation")
92+
8493
# NOTE: take care of the order. run vLLM first, and then run HF.
8594
# vLLM needs a fresh new process without cuda initialization.
8695
# if we run HF first, the cuda initialization will be done and it

tests/models/decoder_only/vision_language/test_pixtral.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,8 @@ def test_chat(
176176
model,
177177
dtype=dtype,
178178
tokenizer_mode="mistral",
179+
load_format="mistral",
180+
config_format="mistral",
179181
max_model_len=max_model_len,
180182
limit_mm_per_prompt=LIMIT_MM_PER_PROMPT,
181183
) as vllm_model:

tests/models/registry.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,9 @@ def check_available_online(
277277
trust_remote_code=True,
278278
hf_overrides={"architectures": ["GLM4VForCausalLM"]}), # noqa: E501
279279
"H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m",
280-
extras={"2b": "h2oai/h2ovl-mississippi-2b"}), # noqa: E501
280+
extras={"2b": "h2oai/h2ovl-mississippi-2b"}, # noqa: E501
281+
max_transformers_version="4.48", # noqa: E501
282+
transformers_version_reason="HF model is not compatible."), # noqa: E501
281283
"InternVLChatModel": _HfExamplesInfo("OpenGVLab/InternVL2-1B",
282284
extras={"2B": "OpenGVLab/InternVL2-2B"}, # noqa: E501
283285
trust_remote_code=True),

0 commit comments

Comments
 (0)