2525import pytest
2626import vllm # noqa: F401
2727from conftest import VllmRunner
28+ from vllm .assets .image import ImageAsset
2829
2930import vllm_ascend # noqa: F401
3031
31- MODELS = [
32- "Qwen/Qwen2.5-0.5B- Instruct" ,
33- ]
32+ MODELS = ["Qwen/Qwen2.5-0.5B-Instruct" ]
33+ MULTIMODALITY_MODELS = [ "Qwen/Qwen2.5-VL-3B- Instruct" ]
34+
3435os .environ ["PYTORCH_NPU_ALLOC_CONF" ] = "max_split_size_mb:256"
3536
3637
@@ -53,6 +54,32 @@ def test_models(model: str, dtype: str, max_tokens: int) -> None:
5354 vllm_model .generate_greedy (example_prompts , max_tokens )
5455
5556
57+ @pytest .mark .parametrize ("model" , MULTIMODALITY_MODELS )
58+ @pytest .mark .skipif (os .getenv ("VLLM_USE_V1" ) == "1" ,
59+ reason = "qwen2.5_vl is not supported on v1" )
60+ def test_multimodal (model : str , prompt_template , vllm_runner ):
61+ image = ImageAsset ("cherry_blossom" ) \
62+ .pil_image .convert ("RGB" )
63+ img_questions = [
64+ "What is the content of this image?" ,
65+ "Describe the content of this image in detail." ,
66+ "What's in the image?" ,
67+ "Where is this image taken?" ,
68+ ]
69+ images = [image ] * len (img_questions )
70+ prompts = prompt_template (img_questions )
71+ with vllm_runner (model ,
72+ max_model_len = 4096 ,
73+ mm_processor_kwargs = {
74+ "min_pixels" : 28 * 28 ,
75+ "max_pixels" : 1280 * 28 * 28 ,
76+ "fps" : 1 ,
77+ }) as vllm_model :
78+ vllm_model .generate_greedy (prompts = prompts ,
79+ images = images ,
80+ max_tokens = 64 )
81+
82+
5683if __name__ == "__main__" :
5784 import pytest
5885 pytest .main ([__file__ ])
0 commit comments