Skip to content

Commit 31d0127

Browse files
committed
Add e2e fusions to fullgraph test (should work with Triton backend), disable without flashinfer
Signed-off-by: Luka Govedič <lgovedic@redhat.com>
1 parent 4dbfcf7 commit 31d0127

File tree

2 files changed

+4
-5
lines changed

2 files changed

+4
-5
lines changed

.buildkite/test-pipeline.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -416,15 +416,16 @@ steps:
416416
- pytest -v -s compile/test_basic_correctness.py
417417
- pytest -v -s compile/piecewise/
418418

419-
- label: PyTorch Fullgraph Test # 20min
420-
timeout_in_minutes: 30
419+
- label: PyTorch Fullgraph Test # 22min
420+
timeout_in_minutes: 35
421421
mirror_hardwares: [amdexperimental]
422422
torch_nightly: true
423423
source_file_dependencies:
424424
- vllm/
425425
- tests/compile
426426
commands:
427427
- pytest -v -s compile/test_full_graph.py
428+
- pytest -v -s compile/test_fusions_e2e.py
428429

429430
- label: Kernels Core Operation Test # 48min
430431
timeout_in_minutes: 75

tests/compile/test_fusions_e2e.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
)
3434
]
3535

36-
if current_platform.is_device_capability((10, 0)):
36+
if current_platform.is_device_capability((10, 0)) and has_flashinfer():
3737
MODELS_FP8 += [
3838
(
3939
"nvidia/Llama-4-Scout-17B-16E-Instruct-FP8",
@@ -97,7 +97,6 @@ def test_attn_quant(
9797

9898
# Disable, compile cache to make sure custom passes run.
9999
# Otherwise, we can't verify fusion happened through the logs.
100-
# Log capture also doesn't work with multiprocessing yet.
101100
monkeypatch.setenv("VLLM_DISABLE_COMPILE_CACHE", "1")
102101

103102
# To capture subprocess logs, we need to know whether spawn or fork is used.
@@ -170,7 +169,6 @@ def test_tp2_attn_quant_allreduce_rmsnorm(
170169

171170
# Disable, compile cache to make sure custom passes run.
172171
# Otherwise, we can't verify fusion happened through the logs.
173-
# Log capture also doesn't work with multiprocessing yet.
174172
monkeypatch.setenv("VLLM_DISABLE_COMPILE_CACHE", "1")
175173

176174
# To capture subprocess logs, we need to know whether spawn or fork is used.

0 commit comments

Comments
 (0)