Remove inductor graph partition from unit test (included in e2e tests)

ProExpertProg · ProExpertProg · commit c03b29bfb520 · 2025-10-15T20:31:11.000-04:00
Signed-off-by: Luka Govedič &lt;lgovedic@redhat.com&gt;
diff --git a/tests/compile/test_fusion_attn.py b/tests/compile/test_fusion_attn.py
@@ -35,7 +35,6 @@
 )
 from vllm.model_executor.layers.quantization.utils.w8a8_utils import Fp8LinearOp
 from vllm.platforms import current_platform
-from vllm.utils import is_torch_equal_or_newer
 from vllm.utils.flashinfer import has_flashinfer
 from vllm.v1.kv_cache_interface import AttentionSpec
 
@@ -290,7 +289,6 @@ def forward(self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor):
     # quant_fp4 only has the custom impl
     + list(flat_product(BACKENDS_FP4, MODELS_FP4, [""])),
 )
-@pytest.mark.parametrize("use_inductor_graph_partition", [True, False])
 @pytest.mark.skipif(
     not current_platform.is_cuda_alike(), reason="Only test ROCm or CUDA"
 )
@@ -305,7 +303,6 @@ def test_attention_quant_pattern(
     model_name: str,
     model_class: type[AttentionQuantPatternModel],
     backend: _Backend,
-    use_inductor_graph_partition: bool,
     dist_init,
 ):
     """Test AttentionStaticQuantPattern fusion pass"""
@@ -314,10 +311,6 @@ def test_attention_quant_pattern(
     ):
         pytest.skip("FlashInfer attn fusion requires Blackwell and flashinfer")
 
-    # TODO(boyuan/luka): test inductor graph partition on rocm
-    if use_inductor_graph_partition and not is_torch_equal_or_newer("2.9.0.dev"):
-        pytest.skip("Inductor graph partition requires torch>=2.9")
-
     custom_ops_list = custom_ops.split(",") if custom_ops else []
 
     device = torch.device("cuda:0")
@@ -333,7 +326,6 @@ def test_attention_quant_pattern(
         compilation_config=CompilationConfig(
             mode=CompilationMode.VLLM_COMPILE,
             custom_ops=custom_ops_list,
-            use_inductor_graph_partition=use_inductor_graph_partition,
         ),
         cache_config=CacheConfig(cache_dtype="fp8"),
     )