Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 56 additions & 20 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import tempfile
import threading
from collections.abc import Generator
from contextlib import nullcontext
from enum import Enum
from typing import Any, Callable, Optional, TypedDict, TypeVar, Union, cast

Expand All @@ -45,14 +46,14 @@
from vllm.distributed import (cleanup_dist_env_and_memory,
init_distributed_environment,
initialize_model_parallel)
from vllm.inputs import (ExplicitEncoderDecoderPrompt, TextPrompt,
to_enc_dec_tuple_list, zip_enc_dec_prompts)
from vllm.inputs import TextPrompt
from vllm.logger import init_logger
from vllm.multimodal.utils import fetch_image
from vllm.outputs import RequestOutput
from vllm.sampling_params import BeamSearchParams
from vllm.sequence import Logprob
from vllm.transformers_utils.utils import maybe_model_redirect
from vllm.utils import set_default_torch_num_threads

logger = init_logger(__name__)

Expand Down Expand Up @@ -306,6 +307,35 @@ def __init__(
is_cross_encoder: bool = False,
skip_tokenizer_init: bool = False,
auto_cls: type[_BaseAutoModelClass] = AutoModelForCausalLM,
# Set this to avoid hanging issue
default_torch_num_threads: Optional[int] = None,
) -> None:
init_ctx = (nullcontext() if default_torch_num_threads is None else
set_default_torch_num_threads(default_torch_num_threads))

with init_ctx:
self._init(
model_name=model_name,
dtype=dtype,
model_kwargs=model_kwargs,
trust_remote_code=trust_remote_code,
is_sentence_transformer=is_sentence_transformer,
is_cross_encoder=is_cross_encoder,
skip_tokenizer_init=skip_tokenizer_init,
auto_cls=auto_cls,
)

def _init(
self,
model_name: str,
dtype: str = "auto",
*,
model_kwargs: Optional[dict[str, Any]] = None,
trust_remote_code: bool = True,
is_sentence_transformer: bool = False,
is_cross_encoder: bool = False,
skip_tokenizer_init: bool = False,
auto_cls: type[_BaseAutoModelClass] = AutoModelForCausalLM,
) -> None:
model_name = maybe_model_redirect(model_name)
self.model_name = model_name
Expand Down Expand Up @@ -714,26 +744,32 @@ def __init__(
enable_chunked_prefill: Optional[bool] = False,
swap_space: int = 4,
enforce_eager: Optional[bool] = False,
# Set this to avoid hanging issue
default_torch_num_threads: Optional[int] = None,
**kwargs,
) -> None:
self.llm = LLM(
model=model_name,
runner=runner,
convert=convert,
tokenizer=tokenizer_name,
tokenizer_mode=tokenizer_mode,
trust_remote_code=trust_remote_code,
dtype=dtype,
seed=seed,
swap_space=swap_space,
enforce_eager=enforce_eager,
disable_log_stats=disable_log_stats,
tensor_parallel_size=tensor_parallel_size,
max_model_len=max_model_len,
block_size=block_size,
enable_chunked_prefill=enable_chunked_prefill,
**kwargs,
)
init_ctx = (nullcontext() if default_torch_num_threads is None else
set_default_torch_num_threads(default_torch_num_threads))

with init_ctx:
self.llm = LLM(
model=model_name,
runner=runner,
convert=convert,
tokenizer=tokenizer_name,
tokenizer_mode=tokenizer_mode,
trust_remote_code=trust_remote_code,
dtype=dtype,
seed=seed,
swap_space=swap_space,
enforce_eager=enforce_eager,
disable_log_stats=disable_log_stats,
tensor_parallel_size=tensor_parallel_size,
max_model_len=max_model_len,
block_size=block_size,
enable_chunked_prefill=enable_chunked_prefill,
**kwargs,
)

def get_inputs(
self,
Expand Down
158 changes: 79 additions & 79 deletions tests/models/multimodal/generation/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,6 @@
if current_platform.is_rocm():
os.environ["VLLM_USE_TRITON_FLASH_ATTN"] = "0"

REQUIRES_V0_MODELS = [
# V1 Test: not enough KV cache space in C1.
"fuyu",
# V1 Test: Deadlock issue when processing mm_inputs
"llava-onevision-transformers",
]

# yapf: disable
COMMON_BROADCAST_SETTINGS = {
"test_type": VLMTestType.IMAGE,
Expand Down Expand Up @@ -186,8 +179,11 @@
image_size_factors=[(0.25, 0.5, 1.0)],
vllm_runner_kwargs={
"model_impl": "transformers",
"default_torch_num_threads": 1,
},
marks=[pytest.mark.core_model],
# FIXME: Investigate why the test hangs
# when processing the 3rd prompt in vLLM
marks=[pytest.mark.core_model, pytest.mark.skip(reason="Test hangs")],
),
"idefics3-transformers": VLMTestInfo(
models=["HuggingFaceTB/SmolVLM-256M-Instruct"],
Expand Down Expand Up @@ -320,6 +316,7 @@
vllm_output_post_proc=model_utils.fuyu_vllm_to_hf_output,
num_logprobs=10,
image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
marks=[large_gpu_mark(min_gb=32)],
),
"gemma3": VLMTestInfo(
models=["google/gemma-3-4b-it"],
Expand Down Expand Up @@ -861,13 +858,14 @@ def _mark_splits(
test_type=VLMTestType.IMAGE,
create_new_process_for_each_test=False,
))
def test_single_image_models(tmp_path: PosixPath, model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
image_assets: ImageTestAssets, monkeypatch):
if model_type in REQUIRES_V0_MODELS:
monkeypatch.setenv("VLLM_USE_V1", "0")
def test_single_image_models(
tmp_path: PosixPath,
model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
image_assets: ImageTestAssets,
):
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_single_image_test(
tmp_path=tmp_path,
Expand All @@ -886,13 +884,14 @@ def test_single_image_models(tmp_path: PosixPath, model_type: str,
test_type=VLMTestType.MULTI_IMAGE,
create_new_process_for_each_test=False,
))
def test_multi_image_models(tmp_path: PosixPath, model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
image_assets: ImageTestAssets, monkeypatch):
if model_type in REQUIRES_V0_MODELS:
monkeypatch.setenv("VLLM_USE_V1", "0")
def test_multi_image_models(
tmp_path: PosixPath,
model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
image_assets: ImageTestAssets,
):
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_multi_image_test(
tmp_path=tmp_path,
Expand All @@ -911,13 +910,13 @@ def test_multi_image_models(tmp_path: PosixPath, model_type: str,
test_type=VLMTestType.EMBEDDING,
create_new_process_for_each_test=False,
))
def test_image_embedding_models(model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
image_assets: ImageTestAssets, monkeypatch):
if model_type in REQUIRES_V0_MODELS:
monkeypatch.setenv("VLLM_USE_V1", "0")
def test_image_embedding_models(
model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
image_assets: ImageTestAssets,
):
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_embedding_test(
model_test_info=model_test_info,
Expand All @@ -935,11 +934,13 @@ def test_image_embedding_models(model_type: str,
test_type=VLMTestType.VIDEO,
create_new_process_for_each_test=False,
))
def test_video_models(model_type: str, test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner], vllm_runner: type[VllmRunner],
video_assets: VideoTestAssets, monkeypatch):
if model_type in REQUIRES_V0_MODELS:
monkeypatch.setenv("VLLM_USE_V1", "0")
def test_video_models(
model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
video_assets: VideoTestAssets,
):
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_video_test(
model_test_info=model_test_info,
Expand All @@ -957,11 +958,13 @@ def test_video_models(model_type: str, test_case: ExpandableVLMTestArgs,
test_type=VLMTestType.AUDIO,
create_new_process_for_each_test=False,
))
def test_audio_models(model_type: str, test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner], vllm_runner: type[VllmRunner],
audio_assets: AudioTestAssets, monkeypatch):
if model_type in REQUIRES_V0_MODELS:
monkeypatch.setenv("VLLM_USE_V1", "0")
def test_audio_models(
model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
audio_assets: AudioTestAssets,
):
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_audio_test(
model_test_info=model_test_info,
Expand All @@ -984,10 +987,7 @@ def test_custom_inputs_models(
test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
monkeypatch,
):
if model_type in REQUIRES_V0_MODELS:
monkeypatch.setenv("VLLM_USE_V1", "0")
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_custom_inputs_test(
model_test_info=model_test_info,
Expand All @@ -1006,13 +1006,14 @@ def test_custom_inputs_models(
create_new_process_for_each_test=True,
))
@create_new_process_for_each_test()
def test_single_image_models_heavy(tmp_path: PosixPath, model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
image_assets: ImageTestAssets, monkeypatch):
if model_type in REQUIRES_V0_MODELS:
monkeypatch.setenv("VLLM_USE_V1", "0")
def test_single_image_models_heavy(
tmp_path: PosixPath,
model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
image_assets: ImageTestAssets,
):
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_single_image_test(
tmp_path=tmp_path,
Expand All @@ -1032,13 +1033,14 @@ def test_single_image_models_heavy(tmp_path: PosixPath, model_type: str,
create_new_process_for_each_test=True,
))
@create_new_process_for_each_test()
def test_multi_image_models_heavy(tmp_path: PosixPath, model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
image_assets: ImageTestAssets, monkeypatch):
if model_type in REQUIRES_V0_MODELS:
monkeypatch.setenv("VLLM_USE_V1", "0")
def test_multi_image_models_heavy(
tmp_path: PosixPath,
model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
image_assets: ImageTestAssets,
):
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_multi_image_test(
tmp_path=tmp_path,
Expand All @@ -1058,14 +1060,13 @@ def test_multi_image_models_heavy(tmp_path: PosixPath, model_type: str,
create_new_process_for_each_test=True,
))
@create_new_process_for_each_test()
def test_image_embedding_models_heavy(model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
image_assets: ImageTestAssets,
monkeypatch):
if model_type in REQUIRES_V0_MODELS:
monkeypatch.setenv("VLLM_USE_V1", "0")
def test_image_embedding_models_heavy(
model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
image_assets: ImageTestAssets,
):
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_embedding_test(
model_test_info=model_test_info,
Expand All @@ -1083,12 +1084,13 @@ def test_image_embedding_models_heavy(model_type: str,
test_type=VLMTestType.VIDEO,
create_new_process_for_each_test=True,
))
def test_video_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
video_assets: VideoTestAssets, monkeypatch):
if model_type in REQUIRES_V0_MODELS:
monkeypatch.setenv("VLLM_USE_V1", "0")
def test_video_models_heavy(
model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
video_assets: VideoTestAssets,
):
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_video_test(
model_test_info=model_test_info,
Expand All @@ -1106,12 +1108,13 @@ def test_video_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs,
test_type=VLMTestType.AUDIO,
create_new_process_for_each_test=True,
))
def test_audio_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
audio_assets: AudioTestAssets, monkeypatch):
if model_type in REQUIRES_V0_MODELS:
monkeypatch.setenv("VLLM_USE_V1", "0")
def test_audio_models_heavy(
model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
audio_assets: AudioTestAssets,
):
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_audio_test(
model_test_info=model_test_info,
Expand All @@ -1135,10 +1138,7 @@ def test_custom_inputs_models_heavy(
test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
monkeypatch,
):
if model_type in REQUIRES_V0_MODELS:
monkeypatch.setenv("VLLM_USE_V1", "0")
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_custom_inputs_test(
model_test_info=model_test_info,
Expand Down
Loading