Skip to content

Commit 3889968

Browse files
committed
add model model usability test
Signed-off-by: wangli <wangli858794774@gmail.com>
1 parent 96d6fa7 commit 3889968

File tree

6 files changed

+84
-2
lines changed

6 files changed

+84
-2
lines changed

.github/workflows/vllm_ascend_test.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ jobs:
120120
- name: Run vllm-project/vllm-ascend test on V0 engine
121121
env:
122122
VLLM_USE_V1: 0
123+
VLLM_WORKER_MULTIPROC_METHOD: spawn
123124
run: |
124125
if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
125126
pytest -sv tests/singlecard/test_offline_inference.py

tests/conftest.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
# Adapted from vllm-project/vllm/blob/main/tests/conftest.py
1818
#
1919

20+
import contextlib
2021
import gc
2122
from typing import List, Optional, Tuple, TypeVar, Union
2223

@@ -31,7 +32,7 @@
3132
from vllm.sampling_params import BeamSearchParams
3233
from vllm.utils import is_list_of
3334

34-
from tests.model_utils import (TokensTextLogprobs,
35+
from tests.model_utils import (PROMPT_TEMPLATES, TokensTextLogprobs,
3536
TokensTextLogprobsPromptLogprobs)
3637
# TODO: remove this part after the patch merged into vllm, if
3738
# we not explicitly patch here, some of them might be effectiveless
@@ -55,6 +56,8 @@
5556
def cleanup_dist_env_and_memory():
5657
destroy_model_parallel()
5758
destroy_distributed_environment()
59+
with contextlib.suppress(AssertionError):
60+
torch.distributed.destroy_process_group()
5861
gc.collect()
5962
torch.npu.empty_cache()
6063

@@ -344,3 +347,8 @@ def __exit__(self, exc_type, exc_value, traceback):
344347
@pytest.fixture(scope="session")
345348
def vllm_runner():
346349
return VllmRunner
350+
351+
352+
@pytest.fixture(params=list(PROMPT_TEMPLATES.keys()))
353+
def prompt_template(request):
354+
return PROMPT_TEMPLATES[request.param]

tests/model_utils.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
#
1919

2020
import warnings
21-
from typing import Dict, List, Optional, Sequence, Tuple, Union
21+
from typing import Callable, Dict, List, Optional, Sequence, Tuple, Union
2222

2323
import torch
2424
from vllm.config import ModelConfig, TaskOption
@@ -301,3 +301,16 @@ def build_model_context(model_name: str,
301301
limit_mm_per_prompt=limit_mm_per_prompt,
302302
)
303303
return InputContext(model_config)
304+
305+
306+
def qwen_prompt(questions: List[str]) -> List[str]:
307+
placeholder = "<|image_pad|>"
308+
return [("<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
309+
f"<|im_start|>user\n<|vision_start|>{placeholder}<|vision_end|>"
310+
f"{q}<|im_end|>\n<|im_start|>assistant\n") for q in questions]
311+
312+
313+
# Map of prompt templates for different models.
314+
PROMPT_TEMPLATES: dict[str, Callable] = {
315+
"qwen2.5vl": qwen_prompt,
316+
}

tests/multicard/test_offline_inference_distributed.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,14 @@
2626
import vllm # noqa: F401
2727

2828
from tests.conftest import VllmRunner
29+
from vllm.assets.image import ImageAsset
2930

3031
os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256"
3132

3233

3334
@pytest.mark.parametrize("model, distributed_executor_backend", [
3435
("Qwen/QwQ-32B", "mp"),
36+
("deepseek-ai/DeepSeek-V2-Lite", "mp"),
3537
])
3638
def test_models_distributed(model: str,
3739
distributed_executor_backend: str) -> None:
@@ -51,6 +53,34 @@ def test_models_distributed(model: str,
5153
vllm_model.generate_greedy(example_prompts, max_tokens)
5254

5355

56+
@pytest.mark.parametrize("model", ["Qwen/Qwen2.5-VL-32B-Instruct"])
57+
@pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "1",
58+
reason="qwen2.5_vl is not supported on v1")
59+
def test_multimodal(model: str, prompt_template, vllm_runner):
60+
image = ImageAsset("cherry_blossom") \
61+
.pil_image.convert("RGB")
62+
img_questions = [
63+
"What is the content of this image?",
64+
"Describe the content of this image in detail.",
65+
"What's in the image?",
66+
"Where is this image taken?",
67+
]
68+
images = [image] * len(img_questions)
69+
prompts = prompt_template(img_questions)
70+
with vllm_runner(model,
71+
max_model_len=4096,
72+
tensor_parallel_size=4,
73+
distributed_executor_backend="mp",
74+
mm_processor_kwargs={
75+
"min_pixels": 28 * 28,
76+
"max_pixels": 1280 * 28 * 28,
77+
"fps": 1,
78+
}) as vllm_model:
79+
vllm_model.generate_greedy(prompts=prompts,
80+
images=images,
81+
max_tokens=64)
82+
83+
5484
if __name__ == "__main__":
5585
import pytest
5686
pytest.main([__file__])

tests/ops/test_rotary_embedding.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,3 +202,4 @@ def test_rotary_embedding_quant_with_leading_dim(
202202
ref_key,
203203
atol=DEFAULT_ATOL,
204204
rtol=DEFAULT_RTOL)
205+
torch.npu.empty_cache()

tests/singlecard/test_offline_inference.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,17 @@
2424

2525
import pytest
2626
import vllm # noqa: F401
27+
from vllm.assets.image import ImageAsset
2728

2829
import vllm_ascend # noqa: F401
2930
from tests.conftest import VllmRunner
31+
from vllm.assets.image import ImageAsset
3032

3133
MODELS = [
3234
"Qwen/Qwen2.5-0.5B-Instruct",
3335
"vllm-ascend/Qwen2.5-0.5B-Instruct-w8a8",
3436
]
37+
MULTIMODALITY_MODELS = ["Qwen/Qwen2.5-VL-3B-Instruct"]
3538
os.environ["VLLM_USE_MODELSCOPE"] = "True"
3639
os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256"
3740

@@ -55,6 +58,32 @@ def test_models(model: str, dtype: str, max_tokens: int) -> None:
5558
vllm_model.generate_greedy(example_prompts, max_tokens)
5659

5760

61+
@pytest.mark.parametrize("model", MULTIMODALITY_MODELS)
62+
@pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "1",
63+
reason="qwen2.5_vl is not supported on v1")
64+
def test_multimodal(model: str, prompt_template, vllm_runner):
65+
image = ImageAsset("cherry_blossom") \
66+
.pil_image.convert("RGB")
67+
img_questions = [
68+
"What is the content of this image?",
69+
"Describe the content of this image in detail.",
70+
"What's in the image?",
71+
"Where is this image taken?",
72+
]
73+
images = [image] * len(img_questions)
74+
prompts = prompt_template(img_questions)
75+
with vllm_runner(model,
76+
max_model_len=4096,
77+
mm_processor_kwargs={
78+
"min_pixels": 28 * 28,
79+
"max_pixels": 1280 * 28 * 28,
80+
"fps": 1,
81+
}) as vllm_model:
82+
vllm_model.generate_greedy(prompts=prompts,
83+
images=images,
84+
max_tokens=64)
85+
86+
5887
if __name__ == "__main__":
5988
import pytest
6089
pytest.main([__file__])

0 commit comments

Comments
 (0)