@@ -138,21 +138,23 @@ steps:
138138  - examples/offline_inference/rlhf.py 
139139  - examples/offline_inference/rlhf_colocate.py 
140140  - tests/examples/offline_inference/data_parallel.py 
141+   - tests/v1/test_async_llm_dp.py 
141142  commands :
142143  #  test with tp=2 and external_dp=2
143144  - VLLM_USE_V1=0 torchrun --nproc-per-node=4 distributed/test_torchrun_example.py 
144145  - torchrun --nproc-per-node=4 distributed/test_torchrun_example.py 
145146  #  test with internal dp
146147  - python3 ../examples/offline_inference/data_parallel.py 
148+   - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/test_async_llm_dp.py 
147149  - pytest -v -s distributed/test_utils.py 
148150  - pytest -v -s compile/test_basic_correctness.py 
149151  - pytest -v -s distributed/test_pynccl.py 
150152  - pytest -v -s spec_decode/e2e/test_integration_dist_tp4.py 
151153  #  TODO: create a dedicated test section for multi-GPU example tests
152154  #  when we have multiple distributed example tests
153155  - pushd ../examples/offline_inference 
154-   - VLLM_ENABLE_V1_MULTIPROCESSING=0  python3 rlhf.py
155-   - VLLM_ENABLE_V1_MULTIPROCESSING=0  RAY_DEDUP_LOGS=0 python3 rlhf_colocate.py
156+   - python3 rlhf.py 
157+   - RAY_DEDUP_LOGS=0 python3 rlhf_colocate.py 
156158  - popd 
157159
158160- label : Metrics, Tracing Test  #  10min
@@ -295,7 +297,7 @@ steps:
295297  source_file_dependencies :
296298  - vllm/lora 
297299  - tests/lora 
298-   command : pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_long_context.py --ignore=lora/ test_chatglm3_tp.py --ignore=lora/test_llama_tp.py --ignore=lora/test_minicpmv_tp.py  --ignore=lora/test_transfomers_model.py 
300+   command : pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py --ignore=lora/test_minicpmv_tp.py  --ignore=lora/test_transfomers_model.py 
299301  parallelism : 4 
300302
301303- label : PyTorch Fullgraph Smoke Test  #  9min
@@ -441,6 +443,7 @@ steps:
441443    - pytest -v -s models/encoder_decoder/audio_language -m core_model 
442444    - pytest -v -s models/encoder_decoder/language -m core_model 
443445    - pytest -v -s models/encoder_decoder/vision_language -m core_model 
446+     - pytest -v -s models/decoder_only/vision_language/test_interleaved.py 
444447
445448- label : Multi-Modal Models Test (Extended) 1  #  48m
446449  optional : true 
@@ -526,8 +529,11 @@ steps:
526529  - vllm/worker/worker.py 
527530  - vllm/worker/model_runner.py 
528531  - entrypoints/llm/test_collective_rpc.py 
532+   - tests/v1/test_async_llm_dp.py 
533+   - vllm/v1/engine/ 
529534  commands :
530-   - VLLM_ENABLE_V1_MULTIPROCESSING=0 pytest -v -s entrypoints/llm/test_collective_rpc.py 
535+   - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/test_async_llm_dp.py 
536+   - pytest -v -s entrypoints/llm/test_collective_rpc.py 
531537  - pytest -v -s ./compile/test_basic_correctness.py 
532538  - pytest -v -s ./compile/test_wrapper.py 
533539  - VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed' 
@@ -604,8 +610,6 @@ steps:
604610    #  FIXIT: find out which code initialize cuda before running the test
605611    #  before the fix, we need to use spawn to test it
606612    - export VLLM_WORKER_MULTIPROC_METHOD=spawn 
607-     #  This test runs llama 13B, so it is required to run on 4 GPUs.
608-     - pytest -v -s -x lora/test_long_context.py 
609613    #  There is some Tensor Parallelism related processing logic in LoRA that
610614    #  requires multi-GPU testing for validation.
611615    - pytest -v -s -x lora/test_chatglm3_tp.py 
0 commit comments