@@ -416,15 +416,16 @@ steps:
416416  - pytest -v -s compile/test_basic_correctness.py 
417417  - pytest -v -s compile/piecewise/ 
418418
419- - label : PyTorch Fullgraph Test  #  20min 
420-   timeout_in_minutes : 30 
419+ - label : PyTorch Fullgraph Test  #  22min 
420+   timeout_in_minutes : 35 
421421  mirror_hardwares : [amdexperimental] 
422422  torch_nightly : true 
423423  source_file_dependencies :
424424  - vllm/ 
425425  - tests/compile 
426426  commands :
427427  - pytest -v -s compile/test_full_graph.py 
428+   - pytest -v -s compile/test_fusions_e2e.py 
428429
429430- label : Kernels Core Operation Test  #  48min
430431  timeout_in_minutes : 75 
@@ -807,8 +808,8 @@ steps:
807808    #  Whisper needs spawn method to avoid deadlock
808809    - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper 
809810
810- - label : Blackwell Test  #  38  min
811-   timeout_in_minutes : 60 
811+ - label : Blackwell Test  #  21  min
812+   timeout_in_minutes : 30 
812813  working_dir : " /vllm-workspace/" 
813814  gpu : b200 
814815  #  optional: true
@@ -821,8 +822,6 @@ steps:
821822  - vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py 
822823  - vllm/model_executor/layers/quantization/utils/flashinfer_utils.py 
823824  - vllm/v1/attention/backends/flashinfer.py 
824-   - vllm/compilation/fusion.py 
825-   - vllm/compilation/fusion_attn.py 
826825  commands :
827826    - nvidia-smi 
828827    - python3 examples/offline_inference/basic/chat.py 
@@ -839,15 +838,32 @@ steps:
839838    - pytest -v -s tests/kernels/quantization/test_nvfp4_scaled_mm.py 
840839    - pytest -v -s tests/kernels/quantization/test_flashinfer_scaled_mm.py 
841840    - pytest -v -s tests/kernels/quantization/test_flashinfer_nvfp4_scaled_mm.py 
841+     - pytest -v -s tests/kernels/quantization/test_nvfp4_qutlass.py 
842+     - pytest -v -s tests/kernels/quantization/test_mxfp4_qutlass.py 
842843    - pytest -v -s tests/kernels/moe/test_nvfp4_moe.py 
843844    - pytest -v -s tests/kernels/moe/test_ocp_mx_moe.py 
844-     #  Fusion
845-     - pytest -v -s tests/compile/test_fusion_all_reduce.py 
846-     - pytest -v -s tests/compile/test_fusion_attn.py::test_attention_quant_pattern 
847845    - pytest -v -s tests/kernels/moe/test_flashinfer.py 
846+ 
847+ - label : Blackwell Fusion Tests  #  30 min
848+   timeout_in_minutes : 40 
849+   working_dir : " /vllm-workspace/" 
850+   gpu : b200 
851+   source_file_dependencies :
852+   - csrc/quantization/fp4/ 
853+   - vllm/model_executor/layers/quantization/utils/flashinfer_utils.py 
854+   - vllm/v1/attention/backends/flashinfer.py 
855+   - vllm/compilation/ 
856+   #  can affect pattern matching
857+   - vllm/model_executor/layers/layernorm.py 
858+   - vllm/model_executor/layers/activation.py 
859+   - vllm/model_executor/layers/quantization/input_quant_fp8.py 
860+   commands :
861+     - nvidia-smi 
862+     - pytest -v -s tests/compile/test_fusion_attn.py 
848863    - pytest -v -s tests/compile/test_silu_mul_quant_fusion.py 
849-     - pytest -v -s tests/kernels/quantization/test_nvfp4_qutlass.py 
850-     - pytest -v -s tests/kernels/quantization/test_mxfp4_qutlass.py 
864+     #  this runner has 2 GPUs available even though num_gpus=2 is not set
865+     - pytest -v -s tests/compile/test_fusion_all_reduce.py 
866+     - pytest -v -s tests/compile/test_fusions_e2e.py 
851867
852868- label : Blackwell GPT-OSS Eval 
853869  timeout_in_minutes : 60 
@@ -1100,14 +1116,16 @@ steps:
11001116  - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4 
11011117
11021118# #### H200 test #####
1103- - label : Distrubted  Tests (H200) #  optional
1119+ - label : Distributed  Tests (H200) #  optional
11041120  gpu : h200 
11051121  optional : true 
11061122  working_dir : " /vllm-workspace/" 
11071123  num_gpus : 2 
11081124  commands :
11091125    - pytest -v -s tests/compile/test_async_tp.py 
11101126    - pytest -v -s tests/compile/test_sequence_parallelism.py 
1127+     - pytest -v -s tests/compile/test_fusion_all_reduce.py 
1128+     - pytest -v -s tests/compile/test_fusions_e2e.py::test_tp2_attn_quant_allreduce_rmsnorm 
11111129    - pytest -v -s tests/distributed/test_context_parallel.py 
11121130    - CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1  --dp-size=2 --max-model-len 2048 
11131131
0 commit comments