@@ -40,10 +40,23 @@ class PPTestOptions(NamedTuple):
4040@dataclass
4141class PPTestSettings :
4242 parallel_setups : List [ParallelSetup ]
43+ # NOTE: the length of distributed_backends and
44+ # vllm_major_versions should be the same, and they
45+ # are first zipped together to iterate over all
46+ # test settings.
4347 distributed_backends : List [str ]
48+ # vllm major version: "0" for V0, "1" for V1
49+ vllm_major_versions : List [str ]
4450 task : TaskOption
4551 test_options : PPTestOptions
4652
53+ def __post_init__ (self ):
54+ if len (self .distributed_backends ) != len (self .vllm_major_versions ):
55+ raise ValueError (
56+ f"Length mismatch: distributed_backends "
57+ f"({ len (self .distributed_backends )} ) != "
58+ f"vllm_major_versions ({ len (self .vllm_major_versions )} )" )
59+
4760 @staticmethod
4861 def detailed (
4962 * ,
@@ -79,7 +92,9 @@ def detailed(
7992 eager_mode = True ,
8093 chunked_prefill = False ),
8194 ],
82- distributed_backends = ["mp" , "ray" ],
95+ # only ray is supported for V1
96+ distributed_backends = ["mp" , "ray" , "ray" ],
97+ vllm_major_versions = ["0" , "0" , "1" ],
8398 task = task ,
8499 test_options = PPTestOptions (multi_node_only = multi_node_only ,
85100 trust_remote_code = trust_remote_code ,
@@ -108,6 +123,7 @@ def fast(
108123 chunked_prefill = False ),
109124 ],
110125 distributed_backends = ["mp" ],
126+ vllm_major_versions = ["0" ],
111127 task = task ,
112128 test_options = PPTestOptions (multi_node_only = multi_node_only ,
113129 trust_remote_code = trust_remote_code ,
@@ -120,8 +136,9 @@ def iter_params(self, model_name: str):
120136 opts = self .test_options
121137
122138 for parallel_setup in self .parallel_setups :
123- for distributed_backend in self .distributed_backends :
124- yield (model_name , parallel_setup , distributed_backend ,
139+ for backend , vllm_major_version in zip (self .distributed_backends ,
140+ self .vllm_major_versions ):
141+ yield (model_name , parallel_setup , backend , vllm_major_version ,
125142 self .task , opts )
126143
127144
@@ -244,6 +261,7 @@ def _compare_tp(
244261 model_name : str ,
245262 parallel_setup : ParallelSetup ,
246263 distributed_backend : str ,
264+ vllm_major_version : str ,
247265 task : TaskOption ,
248266 test_options : PPTestOptions ,
249267 num_gpus_available : int ,
@@ -296,10 +314,13 @@ def _compare_tp(
296314 if hf_overrides :
297315 common_args .extend (["--hf-overrides" , hf_overrides ])
298316
299- if (distributed_backend == "ray" and tp_size == 2 and pp_size == 2
300- and chunked_prefill ):
301- # Test Ray ADAG for a subset of the tests
317+ specific_case = tp_size == 2 and pp_size == 2 and chunked_prefill
318+ if distributed_backend == "ray" and (vllm_major_version == "1"
319+ or specific_case ):
320+ # For V1, test Ray ADAG for all the tests
321+ # For V0, test Ray ADAG for a subset of the tests
302322 pp_env = {
323+ "VLLM_USE_V1" : vllm_major_version ,
303324 "VLLM_USE_RAY_COMPILED_DAG" : "1" ,
304325 "VLLM_USE_RAY_SPMD_WORKER" : "1" ,
305326 "VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL" : "1" ,
@@ -348,8 +369,8 @@ def _compare_tp(
348369
349370
350371@pytest .mark .parametrize (
351- ("model_name" , "parallel_setup" , "distributed_backend" , "task" ,
352- "test_options" ),
372+ ("model_name" , "parallel_setup" , "distributed_backend" ,
373+ "vllm_major_version" , "task" , " test_options" ),
353374 [
354375 params for model_name , settings in TEXT_GENERATION_MODELS .items ()
355376 for params in settings .iter_params (model_name )
@@ -361,22 +382,24 @@ def test_tp_language_generation(
361382 model_name : str ,
362383 parallel_setup : ParallelSetup ,
363384 distributed_backend : str ,
385+ vllm_major_version : str ,
364386 task : TaskOption ,
365387 test_options : PPTestOptions ,
366388 num_gpus_available ,
367389):
368390 _compare_tp (model_name ,
369391 parallel_setup ,
370392 distributed_backend ,
393+ vllm_major_version ,
371394 task ,
372395 test_options ,
373396 num_gpus_available ,
374397 method = "generate" )
375398
376399
377400@pytest .mark .parametrize (
378- ("model_name" , "parallel_setup" , "distributed_backend" , "task" ,
379- "test_options" ),
401+ ("model_name" , "parallel_setup" , "distributed_backend" ,
402+ "vllm_major_version" , "task" , " test_options" ),
380403 [
381404 params for model_name , settings in EMBEDDING_MODELS .items ()
382405 for params in settings .iter_params (model_name )
@@ -388,22 +411,24 @@ def test_tp_language_embedding(
388411 model_name : str ,
389412 parallel_setup : ParallelSetup ,
390413 distributed_backend : str ,
414+ vllm_major_version : str ,
391415 task : TaskOption ,
392416 test_options : PPTestOptions ,
393417 num_gpus_available ,
394418):
395419 _compare_tp (model_name ,
396420 parallel_setup ,
397421 distributed_backend ,
422+ vllm_major_version ,
398423 task ,
399424 test_options ,
400425 num_gpus_available ,
401426 method = "encode" )
402427
403428
404429@pytest .mark .parametrize (
405- ("model_name" , "parallel_setup" , "distributed_backend" , "task" ,
406- "test_options" ),
430+ ("model_name" , "parallel_setup" , "distributed_backend" ,
431+ "vllm_major_version" , "task" , " test_options" ),
407432 [
408433 params for model_name , settings in MULTIMODAL_MODELS .items ()
409434 for params in settings .iter_params (model_name )
@@ -415,13 +440,15 @@ def test_tp_multimodal_generation(
415440 model_name : str ,
416441 parallel_setup : ParallelSetup ,
417442 distributed_backend : str ,
443+ vllm_major_version : str ,
418444 task : TaskOption ,
419445 test_options : PPTestOptions ,
420446 num_gpus_available ,
421447):
422448 _compare_tp (model_name ,
423449 parallel_setup ,
424450 distributed_backend ,
451+ vllm_major_version ,
425452 task ,
426453 test_options ,
427454 num_gpus_available ,
0 commit comments