@@ -41,6 +41,16 @@ steps:
4141 # TODO: add `--strict` once warnings in docstrings are fixed
4242 - mkdocs build
4343
44+ - label : Pytorch Nightly Dependency Override Check # 2min
45+ # if this test fails, it means the nightly torch version is not compatible with some
46+ # of the dependencies. Please check the error message and add the package to whitelist
47+ # in /vllm/tools/generate_nightly_torch_test.py
48+ soft_fail : true
49+ source_file_dependencies :
50+ - requirements/nightly_torch_test.txt
51+ commands :
52+ - bash standalone_tests/pytorch_nightly_dependency.sh
53+
4454- label : Async Engine, Inputs, Utils, Worker Test # 24min
4555 mirror_hardwares : [amdexperimental]
4656 source_file_dependencies :
8999 - VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py
90100
91101- label : Chunked Prefill Test
92- mirror_hardwares : [amdexperimental]
102+ mirror_hardwares : [amdexperimental, amdproduction ]
93103 source_file_dependencies :
94104 - vllm/
95105 - tests/basic_correctness/test_chunked_prefill
@@ -168,6 +178,23 @@ steps:
168178 - VLLM_ALLOW_INSECURE_SERIALIZATION=1 RAY_DEDUP_LOGS=0 python3 rlhf_colocate.py
169179 - popd
170180
181+ - label : EPLB Algorithm Test
182+ working_dir : " /vllm-workspace/tests"
183+ source_file_dependencies :
184+ - vllm/distributed/eplb
185+ - tests/distributed/test_eplb_algo.py
186+ commands :
187+ - pytest -v -s distributed/test_eplb_algo.py
188+
189+ - label : EPLB Execution Test # 5min
190+ working_dir : " /vllm-workspace/tests"
191+ num_gpus : 4
192+ source_file_dependencies :
193+ - vllm/distributed/eplb
194+ - tests/distributed/test_eplb_execute.py
195+ commands :
196+ - pytest -v -s distributed/test_eplb_execute.py
197+
171198- label : Metrics, Tracing Test # 10min
172199 mirror_hardwares : [amdexperimental, amdproduction]
173200 num_gpus : 2
@@ -271,6 +298,15 @@ steps:
271298 commands :
272299 - pytest -v -s prefix_caching
273300
301+
302+ - label : Platform Tests (CUDA)
303+ mirror_hardwares : [amdexperimental]
304+ source_file_dependencies :
305+ - vllm/
306+ - tests/cuda
307+ commands :
308+ - pytest -v -s cuda/test_cuda_context.py
309+
274310- label : Samplers Test # 36min
275311 mirror_hardwares : [amdexperimental]
276312 source_file_dependencies :
@@ -606,13 +642,18 @@ steps:
606642 - vllm/executor/
607643 - vllm/model_executor/models/
608644 - tests/distributed/
645+ - tests/examples/offline_inference/data_parallel.py
609646 commands :
610647 - # the following commands are for the first node, with ip 192.168.10.10 (ray environment already set up)
611648 - VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep 'Same node test passed'
649+ - NUM_NODES=2 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_node_count.py | grep 'Node count test passed'
650+ - python3 ../examples/offline_inference/data_parallel.py --dp-size=2 --tp-size=1 --node-size=2 --node-rank=0 --master-addr=192.168.10.10 --master-port=12345 --enforce-eager --trust-remote-code
612651 - VLLM_MULTI_NODE=1 pytest -v -s distributed/test_multi_node_assignment.py
613652 - VLLM_MULTI_NODE=1 pytest -v -s distributed/test_pipeline_parallel.py
614653 - # the following commands are for the second node, with ip 192.168.10.11 (ray environment already set up)
615654 - VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep 'Same node test passed'
655+ - NUM_NODES=2 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_node_count.py | grep 'Node count test passed'
656+ - python3 ../examples/offline_inference/data_parallel.py --dp-size=2 --tp-size=1 --node-size=2 --node-rank=1 --master-addr=192.168.10.10 --master-port=12345 --enforce-eager --trust-remote-code
616657
617658- label : Distributed Tests (2 GPUs) # 40min
618659 mirror_hardwares : [amdexperimental]
@@ -736,7 +777,7 @@ steps:
736777 - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models.txt
737778
738779- label : Weight Loading Multiple GPU Test - Large Models # optional
739- mirror_hardwares : [amdexperimental]
780+ mirror_hardwares : [amdexperimental]
740781 working_dir : " /vllm-workspace/tests"
741782 num_gpus : 2
742783 gpu : a100
0 commit comments