Skip to content

Commit c03b29b

Browse files
committed
Remove inductor graph partition from unit test (included in e2e tests)
Signed-off-by: Luka Govedič <lgovedic@redhat.com>
1 parent ae581e1 commit c03b29b

File tree

1 file changed

+0
-8
lines changed

1 file changed

+0
-8
lines changed

tests/compile/test_fusion_attn.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
)
3636
from vllm.model_executor.layers.quantization.utils.w8a8_utils import Fp8LinearOp
3737
from vllm.platforms import current_platform
38-
from vllm.utils import is_torch_equal_or_newer
3938
from vllm.utils.flashinfer import has_flashinfer
4039
from vllm.v1.kv_cache_interface import AttentionSpec
4140

@@ -290,7 +289,6 @@ def forward(self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor):
290289
# quant_fp4 only has the custom impl
291290
+ list(flat_product(BACKENDS_FP4, MODELS_FP4, [""])),
292291
)
293-
@pytest.mark.parametrize("use_inductor_graph_partition", [True, False])
294292
@pytest.mark.skipif(
295293
not current_platform.is_cuda_alike(), reason="Only test ROCm or CUDA"
296294
)
@@ -305,7 +303,6 @@ def test_attention_quant_pattern(
305303
model_name: str,
306304
model_class: type[AttentionQuantPatternModel],
307305
backend: _Backend,
308-
use_inductor_graph_partition: bool,
309306
dist_init,
310307
):
311308
"""Test AttentionStaticQuantPattern fusion pass"""
@@ -314,10 +311,6 @@ def test_attention_quant_pattern(
314311
):
315312
pytest.skip("FlashInfer attn fusion requires Blackwell and flashinfer")
316313

317-
# TODO(boyuan/luka): test inductor graph partition on rocm
318-
if use_inductor_graph_partition and not is_torch_equal_or_newer("2.9.0.dev"):
319-
pytest.skip("Inductor graph partition requires torch>=2.9")
320-
321314
custom_ops_list = custom_ops.split(",") if custom_ops else []
322315

323316
device = torch.device("cuda:0")
@@ -333,7 +326,6 @@ def test_attention_quant_pattern(
333326
compilation_config=CompilationConfig(
334327
mode=CompilationMode.VLLM_COMPILE,
335328
custom_ops=custom_ops_list,
336-
use_inductor_graph_partition=use_inductor_graph_partition,
337329
),
338330
cache_config=CacheConfig(cache_dtype="fp8"),
339331
)

0 commit comments

Comments
 (0)