Skip to content

Commit bef180f

Browse files
[V0 Deprecation] Enable the remaining multimodal tests in V1 (#25307)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
1 parent d88918e commit bef180f

File tree

8 files changed

+195
-214
lines changed

8 files changed

+195
-214
lines changed

tests/conftest.py

Lines changed: 56 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import tempfile
2020
import threading
2121
from collections.abc import Generator
22+
from contextlib import nullcontext
2223
from enum import Enum
2324
from typing import Any, Callable, Optional, TypedDict, TypeVar, Union, cast
2425

@@ -45,14 +46,14 @@
4546
from vllm.distributed import (cleanup_dist_env_and_memory,
4647
init_distributed_environment,
4748
initialize_model_parallel)
48-
from vllm.inputs import (ExplicitEncoderDecoderPrompt, TextPrompt,
49-
to_enc_dec_tuple_list, zip_enc_dec_prompts)
49+
from vllm.inputs import TextPrompt
5050
from vllm.logger import init_logger
5151
from vllm.multimodal.utils import fetch_image
5252
from vllm.outputs import RequestOutput
5353
from vllm.sampling_params import BeamSearchParams
5454
from vllm.sequence import Logprob
5555
from vllm.transformers_utils.utils import maybe_model_redirect
56+
from vllm.utils import set_default_torch_num_threads
5657

5758
logger = init_logger(__name__)
5859

@@ -306,6 +307,35 @@ def __init__(
306307
is_cross_encoder: bool = False,
307308
skip_tokenizer_init: bool = False,
308309
auto_cls: type[_BaseAutoModelClass] = AutoModelForCausalLM,
310+
# Set this to avoid hanging issue
311+
default_torch_num_threads: Optional[int] = None,
312+
) -> None:
313+
init_ctx = (nullcontext() if default_torch_num_threads is None else
314+
set_default_torch_num_threads(default_torch_num_threads))
315+
316+
with init_ctx:
317+
self._init(
318+
model_name=model_name,
319+
dtype=dtype,
320+
model_kwargs=model_kwargs,
321+
trust_remote_code=trust_remote_code,
322+
is_sentence_transformer=is_sentence_transformer,
323+
is_cross_encoder=is_cross_encoder,
324+
skip_tokenizer_init=skip_tokenizer_init,
325+
auto_cls=auto_cls,
326+
)
327+
328+
def _init(
329+
self,
330+
model_name: str,
331+
dtype: str = "auto",
332+
*,
333+
model_kwargs: Optional[dict[str, Any]] = None,
334+
trust_remote_code: bool = True,
335+
is_sentence_transformer: bool = False,
336+
is_cross_encoder: bool = False,
337+
skip_tokenizer_init: bool = False,
338+
auto_cls: type[_BaseAutoModelClass] = AutoModelForCausalLM,
309339
) -> None:
310340
model_name = maybe_model_redirect(model_name)
311341
self.model_name = model_name
@@ -714,26 +744,32 @@ def __init__(
714744
enable_chunked_prefill: Optional[bool] = False,
715745
swap_space: int = 4,
716746
enforce_eager: Optional[bool] = False,
747+
# Set this to avoid hanging issue
748+
default_torch_num_threads: Optional[int] = None,
717749
**kwargs,
718750
) -> None:
719-
self.llm = LLM(
720-
model=model_name,
721-
runner=runner,
722-
convert=convert,
723-
tokenizer=tokenizer_name,
724-
tokenizer_mode=tokenizer_mode,
725-
trust_remote_code=trust_remote_code,
726-
dtype=dtype,
727-
seed=seed,
728-
swap_space=swap_space,
729-
enforce_eager=enforce_eager,
730-
disable_log_stats=disable_log_stats,
731-
tensor_parallel_size=tensor_parallel_size,
732-
max_model_len=max_model_len,
733-
block_size=block_size,
734-
enable_chunked_prefill=enable_chunked_prefill,
735-
**kwargs,
736-
)
751+
init_ctx = (nullcontext() if default_torch_num_threads is None else
752+
set_default_torch_num_threads(default_torch_num_threads))
753+
754+
with init_ctx:
755+
self.llm = LLM(
756+
model=model_name,
757+
runner=runner,
758+
convert=convert,
759+
tokenizer=tokenizer_name,
760+
tokenizer_mode=tokenizer_mode,
761+
trust_remote_code=trust_remote_code,
762+
dtype=dtype,
763+
seed=seed,
764+
swap_space=swap_space,
765+
enforce_eager=enforce_eager,
766+
disable_log_stats=disable_log_stats,
767+
tensor_parallel_size=tensor_parallel_size,
768+
max_model_len=max_model_len,
769+
block_size=block_size,
770+
enable_chunked_prefill=enable_chunked_prefill,
771+
**kwargs,
772+
)
737773

738774
def get_inputs(
739775
self,

tests/models/multimodal/generation/test_common.py

Lines changed: 79 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,6 @@
3232
if current_platform.is_rocm():
3333
os.environ["VLLM_USE_TRITON_FLASH_ATTN"] = "0"
3434

35-
REQUIRES_V0_MODELS = [
36-
# V1 Test: not enough KV cache space in C1.
37-
"fuyu",
38-
# V1 Test: Deadlock issue when processing mm_inputs
39-
"llava-onevision-transformers",
40-
]
41-
4235
# yapf: disable
4336
COMMON_BROADCAST_SETTINGS = {
4437
"test_type": VLMTestType.IMAGE,
@@ -186,8 +179,11 @@
186179
image_size_factors=[(0.25, 0.5, 1.0)],
187180
vllm_runner_kwargs={
188181
"model_impl": "transformers",
182+
"default_torch_num_threads": 1,
189183
},
190-
marks=[pytest.mark.core_model],
184+
# FIXME: Investigate why the test hangs
185+
# when processing the 3rd prompt in vLLM
186+
marks=[pytest.mark.core_model, pytest.mark.skip(reason="Test hangs")],
191187
),
192188
"idefics3-transformers": VLMTestInfo(
193189
models=["HuggingFaceTB/SmolVLM-256M-Instruct"],
@@ -320,6 +316,7 @@
320316
vllm_output_post_proc=model_utils.fuyu_vllm_to_hf_output,
321317
num_logprobs=10,
322318
image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
319+
marks=[large_gpu_mark(min_gb=32)],
323320
),
324321
"gemma3": VLMTestInfo(
325322
models=["google/gemma-3-4b-it"],
@@ -861,13 +858,14 @@ def _mark_splits(
861858
test_type=VLMTestType.IMAGE,
862859
create_new_process_for_each_test=False,
863860
))
864-
def test_single_image_models(tmp_path: PosixPath, model_type: str,
865-
test_case: ExpandableVLMTestArgs,
866-
hf_runner: type[HfRunner],
867-
vllm_runner: type[VllmRunner],
868-
image_assets: ImageTestAssets, monkeypatch):
869-
if model_type in REQUIRES_V0_MODELS:
870-
monkeypatch.setenv("VLLM_USE_V1", "0")
861+
def test_single_image_models(
862+
tmp_path: PosixPath,
863+
model_type: str,
864+
test_case: ExpandableVLMTestArgs,
865+
hf_runner: type[HfRunner],
866+
vllm_runner: type[VllmRunner],
867+
image_assets: ImageTestAssets,
868+
):
871869
model_test_info = VLM_TEST_SETTINGS[model_type]
872870
runners.run_single_image_test(
873871
tmp_path=tmp_path,
@@ -886,13 +884,14 @@ def test_single_image_models(tmp_path: PosixPath, model_type: str,
886884
test_type=VLMTestType.MULTI_IMAGE,
887885
create_new_process_for_each_test=False,
888886
))
889-
def test_multi_image_models(tmp_path: PosixPath, model_type: str,
890-
test_case: ExpandableVLMTestArgs,
891-
hf_runner: type[HfRunner],
892-
vllm_runner: type[VllmRunner],
893-
image_assets: ImageTestAssets, monkeypatch):
894-
if model_type in REQUIRES_V0_MODELS:
895-
monkeypatch.setenv("VLLM_USE_V1", "0")
887+
def test_multi_image_models(
888+
tmp_path: PosixPath,
889+
model_type: str,
890+
test_case: ExpandableVLMTestArgs,
891+
hf_runner: type[HfRunner],
892+
vllm_runner: type[VllmRunner],
893+
image_assets: ImageTestAssets,
894+
):
896895
model_test_info = VLM_TEST_SETTINGS[model_type]
897896
runners.run_multi_image_test(
898897
tmp_path=tmp_path,
@@ -911,13 +910,13 @@ def test_multi_image_models(tmp_path: PosixPath, model_type: str,
911910
test_type=VLMTestType.EMBEDDING,
912911
create_new_process_for_each_test=False,
913912
))
914-
def test_image_embedding_models(model_type: str,
915-
test_case: ExpandableVLMTestArgs,
916-
hf_runner: type[HfRunner],
917-
vllm_runner: type[VllmRunner],
918-
image_assets: ImageTestAssets, monkeypatch):
919-
if model_type in REQUIRES_V0_MODELS:
920-
monkeypatch.setenv("VLLM_USE_V1", "0")
913+
def test_image_embedding_models(
914+
model_type: str,
915+
test_case: ExpandableVLMTestArgs,
916+
hf_runner: type[HfRunner],
917+
vllm_runner: type[VllmRunner],
918+
image_assets: ImageTestAssets,
919+
):
921920
model_test_info = VLM_TEST_SETTINGS[model_type]
922921
runners.run_embedding_test(
923922
model_test_info=model_test_info,
@@ -935,11 +934,13 @@ def test_image_embedding_models(model_type: str,
935934
test_type=VLMTestType.VIDEO,
936935
create_new_process_for_each_test=False,
937936
))
938-
def test_video_models(model_type: str, test_case: ExpandableVLMTestArgs,
939-
hf_runner: type[HfRunner], vllm_runner: type[VllmRunner],
940-
video_assets: VideoTestAssets, monkeypatch):
941-
if model_type in REQUIRES_V0_MODELS:
942-
monkeypatch.setenv("VLLM_USE_V1", "0")
937+
def test_video_models(
938+
model_type: str,
939+
test_case: ExpandableVLMTestArgs,
940+
hf_runner: type[HfRunner],
941+
vllm_runner: type[VllmRunner],
942+
video_assets: VideoTestAssets,
943+
):
943944
model_test_info = VLM_TEST_SETTINGS[model_type]
944945
runners.run_video_test(
945946
model_test_info=model_test_info,
@@ -957,11 +958,13 @@ def test_video_models(model_type: str, test_case: ExpandableVLMTestArgs,
957958
test_type=VLMTestType.AUDIO,
958959
create_new_process_for_each_test=False,
959960
))
960-
def test_audio_models(model_type: str, test_case: ExpandableVLMTestArgs,
961-
hf_runner: type[HfRunner], vllm_runner: type[VllmRunner],
962-
audio_assets: AudioTestAssets, monkeypatch):
963-
if model_type in REQUIRES_V0_MODELS:
964-
monkeypatch.setenv("VLLM_USE_V1", "0")
961+
def test_audio_models(
962+
model_type: str,
963+
test_case: ExpandableVLMTestArgs,
964+
hf_runner: type[HfRunner],
965+
vllm_runner: type[VllmRunner],
966+
audio_assets: AudioTestAssets,
967+
):
965968
model_test_info = VLM_TEST_SETTINGS[model_type]
966969
runners.run_audio_test(
967970
model_test_info=model_test_info,
@@ -984,10 +987,7 @@ def test_custom_inputs_models(
984987
test_case: ExpandableVLMTestArgs,
985988
hf_runner: type[HfRunner],
986989
vllm_runner: type[VllmRunner],
987-
monkeypatch,
988990
):
989-
if model_type in REQUIRES_V0_MODELS:
990-
monkeypatch.setenv("VLLM_USE_V1", "0")
991991
model_test_info = VLM_TEST_SETTINGS[model_type]
992992
runners.run_custom_inputs_test(
993993
model_test_info=model_test_info,
@@ -1006,13 +1006,14 @@ def test_custom_inputs_models(
10061006
create_new_process_for_each_test=True,
10071007
))
10081008
@create_new_process_for_each_test()
1009-
def test_single_image_models_heavy(tmp_path: PosixPath, model_type: str,
1010-
test_case: ExpandableVLMTestArgs,
1011-
hf_runner: type[HfRunner],
1012-
vllm_runner: type[VllmRunner],
1013-
image_assets: ImageTestAssets, monkeypatch):
1014-
if model_type in REQUIRES_V0_MODELS:
1015-
monkeypatch.setenv("VLLM_USE_V1", "0")
1009+
def test_single_image_models_heavy(
1010+
tmp_path: PosixPath,
1011+
model_type: str,
1012+
test_case: ExpandableVLMTestArgs,
1013+
hf_runner: type[HfRunner],
1014+
vllm_runner: type[VllmRunner],
1015+
image_assets: ImageTestAssets,
1016+
):
10161017
model_test_info = VLM_TEST_SETTINGS[model_type]
10171018
runners.run_single_image_test(
10181019
tmp_path=tmp_path,
@@ -1032,13 +1033,14 @@ def test_single_image_models_heavy(tmp_path: PosixPath, model_type: str,
10321033
create_new_process_for_each_test=True,
10331034
))
10341035
@create_new_process_for_each_test()
1035-
def test_multi_image_models_heavy(tmp_path: PosixPath, model_type: str,
1036-
test_case: ExpandableVLMTestArgs,
1037-
hf_runner: type[HfRunner],
1038-
vllm_runner: type[VllmRunner],
1039-
image_assets: ImageTestAssets, monkeypatch):
1040-
if model_type in REQUIRES_V0_MODELS:
1041-
monkeypatch.setenv("VLLM_USE_V1", "0")
1036+
def test_multi_image_models_heavy(
1037+
tmp_path: PosixPath,
1038+
model_type: str,
1039+
test_case: ExpandableVLMTestArgs,
1040+
hf_runner: type[HfRunner],
1041+
vllm_runner: type[VllmRunner],
1042+
image_assets: ImageTestAssets,
1043+
):
10421044
model_test_info = VLM_TEST_SETTINGS[model_type]
10431045
runners.run_multi_image_test(
10441046
tmp_path=tmp_path,
@@ -1058,14 +1060,13 @@ def test_multi_image_models_heavy(tmp_path: PosixPath, model_type: str,
10581060
create_new_process_for_each_test=True,
10591061
))
10601062
@create_new_process_for_each_test()
1061-
def test_image_embedding_models_heavy(model_type: str,
1062-
test_case: ExpandableVLMTestArgs,
1063-
hf_runner: type[HfRunner],
1064-
vllm_runner: type[VllmRunner],
1065-
image_assets: ImageTestAssets,
1066-
monkeypatch):
1067-
if model_type in REQUIRES_V0_MODELS:
1068-
monkeypatch.setenv("VLLM_USE_V1", "0")
1063+
def test_image_embedding_models_heavy(
1064+
model_type: str,
1065+
test_case: ExpandableVLMTestArgs,
1066+
hf_runner: type[HfRunner],
1067+
vllm_runner: type[VllmRunner],
1068+
image_assets: ImageTestAssets,
1069+
):
10691070
model_test_info = VLM_TEST_SETTINGS[model_type]
10701071
runners.run_embedding_test(
10711072
model_test_info=model_test_info,
@@ -1083,12 +1084,13 @@ def test_image_embedding_models_heavy(model_type: str,
10831084
test_type=VLMTestType.VIDEO,
10841085
create_new_process_for_each_test=True,
10851086
))
1086-
def test_video_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs,
1087-
hf_runner: type[HfRunner],
1088-
vllm_runner: type[VllmRunner],
1089-
video_assets: VideoTestAssets, monkeypatch):
1090-
if model_type in REQUIRES_V0_MODELS:
1091-
monkeypatch.setenv("VLLM_USE_V1", "0")
1087+
def test_video_models_heavy(
1088+
model_type: str,
1089+
test_case: ExpandableVLMTestArgs,
1090+
hf_runner: type[HfRunner],
1091+
vllm_runner: type[VllmRunner],
1092+
video_assets: VideoTestAssets,
1093+
):
10921094
model_test_info = VLM_TEST_SETTINGS[model_type]
10931095
runners.run_video_test(
10941096
model_test_info=model_test_info,
@@ -1106,12 +1108,13 @@ def test_video_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs,
11061108
test_type=VLMTestType.AUDIO,
11071109
create_new_process_for_each_test=True,
11081110
))
1109-
def test_audio_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs,
1110-
hf_runner: type[HfRunner],
1111-
vllm_runner: type[VllmRunner],
1112-
audio_assets: AudioTestAssets, monkeypatch):
1113-
if model_type in REQUIRES_V0_MODELS:
1114-
monkeypatch.setenv("VLLM_USE_V1", "0")
1111+
def test_audio_models_heavy(
1112+
model_type: str,
1113+
test_case: ExpandableVLMTestArgs,
1114+
hf_runner: type[HfRunner],
1115+
vllm_runner: type[VllmRunner],
1116+
audio_assets: AudioTestAssets,
1117+
):
11151118
model_test_info = VLM_TEST_SETTINGS[model_type]
11161119
runners.run_audio_test(
11171120
model_test_info=model_test_info,
@@ -1135,10 +1138,7 @@ def test_custom_inputs_models_heavy(
11351138
test_case: ExpandableVLMTestArgs,
11361139
hf_runner: type[HfRunner],
11371140
vllm_runner: type[VllmRunner],
1138-
monkeypatch,
11391141
):
1140-
if model_type in REQUIRES_V0_MODELS:
1141-
monkeypatch.setenv("VLLM_USE_V1", "0")
11421142
model_test_info = VLM_TEST_SETTINGS[model_type]
11431143
runners.run_custom_inputs_test(
11441144
model_test_info=model_test_info,

0 commit comments

Comments
 (0)