Remove last level references not removed in #26355 (#27260)

hmellor · web-flow · commit 8f18feb191d7 · 2025-10-22T09:18:17.000Z
Signed-off-by: Harry Mellor &lt;19981378+hmellor@users.noreply.github.com&gt;
diff --git a/tests/compile/piecewise/test_toy_llama.py b/tests/compile/piecewise/test_toy_llama.py
@@ -355,13 +355,13 @@ def test_toy_llama(
     )
 
     compile_config_no_compile = CompilationConfig(
-        level=CompilationMode.NONE,
+        mode=CompilationMode.NONE,
         cudagraph_mode=CUDAGraphMode.NONE,
         backend="eager",
     )
 
     compile_config_no_split = CompilationConfig(
-        level=CompilationMode.VLLM_COMPILE,
+        mode=CompilationMode.VLLM_COMPILE,
         use_inductor_graph_partition=use_inductor_graph_partition,
         cudagraph_mode=CUDAGraphMode.PIECEWISE,
         backend=backend,
diff --git a/tests/compile/test_aot_compile.py b/tests/compile/test_aot_compile.py
@@ -38,7 +38,7 @@ def forward(self, x: torch.Tensor):
 def make_vllm_config() -> VllmConfig:
     return VllmConfig(
         compilation_config=CompilationConfig(
-            level=CompilationMode.VLLM_COMPILE,
+            mode=CompilationMode.VLLM_COMPILE,
         )
     )
 
diff --git a/tests/compile/test_config.py b/tests/compile/test_config.py
@@ -168,7 +168,7 @@ def test_splitting_ops_dynamic():
     if is_torch_equal_or_newer("2.9.0.dev"):
         config = VllmConfig(
             compilation_config=CompilationConfig(
-                level=CompilationMode.VLLM_COMPILE,
+                mode=CompilationMode.VLLM_COMPILE,
                 use_inductor_graph_partition=True,
                 splitting_ops=["vllm::unified_attention"],
             )
@@ -180,7 +180,7 @@ def test_splitting_ops_dynamic():
     # When attn_fusion pass enabled, splitting_ops now default to attention ops.
     config = VllmConfig(
         compilation_config=CompilationConfig(
-            level=CompilationMode.VLLM_COMPILE,
+            mode=CompilationMode.VLLM_COMPILE,
             pass_config={"enable_attn_fusion": True, "enable_noop": True},
             custom_ops=["+quant_fp8"],
             cudagraph_mode=CUDAGraphMode.PIECEWISE,
@@ -195,7 +195,7 @@ def test_splitting_ops_dynamic():
     if is_torch_equal_or_newer("2.9.0.dev"):
         config = VllmConfig(
             compilation_config=CompilationConfig(
-                level=CompilationMode.VLLM_COMPILE,
+                mode=CompilationMode.VLLM_COMPILE,
                 use_inductor_graph_partition=True,
                 pass_config={"enable_attn_fusion": True, "enable_noop": True},
                 custom_ops=["+quant_fp8"],
diff --git a/tests/compile/test_full_graph.py b/tests/compile/test_full_graph.py
@@ -198,7 +198,7 @@ def run_model(compile_config: int | CompilationConfig, model: str, **model_kwarg
     compilation_config = (
         compile_config
         if isinstance(compile_config, CompilationConfig)
-        else CompilationConfig(level=compile_config)
+        else CompilationConfig(mode=compile_config)
     )
 
     prompts = [
diff --git a/tests/compile/test_fusions_e2e.py b/tests/compile/test_fusions_e2e.py
@@ -151,7 +151,7 @@ def test_attn_quant(
         cudagraph_mode=mode,
         splitting_ops=splitting_ops,
         # Common
-        level=CompilationMode.VLLM_COMPILE,
+        mode=CompilationMode.VLLM_COMPILE,
         pass_config=PassConfig(enable_attn_fusion=True, enable_noop=True),
         # Inductor caches custom passes by default as well via uuid
         inductor_compile_config={"force_disable_caches": True},
@@ -236,7 +236,7 @@ def test_tp2_attn_quant_allreduce_rmsnorm(
         custom_ops=custom_ops_list,
         splitting_ops=splitting_ops,
         # Common
-        level=CompilationMode.VLLM_COMPILE,
+        mode=CompilationMode.VLLM_COMPILE,
         pass_config=PassConfig(
             enable_attn_fusion=True,
             enable_noop=True,
@@ -273,7 +273,7 @@ def run_model(compile_config: int | CompilationConfig, model: str, **model_kwarg
     compilation_config = (
         compile_config
         if isinstance(compile_config, CompilationConfig)
-        else CompilationConfig(level=compile_config)
+        else CompilationConfig(mode=compile_config)
     )
 
     prompts = [
diff --git a/tests/model_executor/test_enabled_custom_ops.py b/tests/model_executor/test_enabled_custom_ops.py
@@ -36,7 +36,7 @@ class Relu3(ReLUSquaredActivation):
 
 
 @pytest.mark.parametrize(
-    "env, torch_level, backend, ops_enabled, default_on",
+    "env, compilation_mode, backend, ops_enabled, default_on",
     [
         # Default values based on compile level
         # - All by default (no Inductor compilation)
@@ -77,15 +77,15 @@ class Relu3(ReLUSquaredActivation):
 )
 def test_enabled_ops(
     env: str | None,
-    torch_level: int,
+    compilation_mode: int,
     backend: str,
     ops_enabled: list[int],
     default_on: bool,
 ):
     custom_ops = env.split(",") if env else []
     vllm_config = VllmConfig(
         compilation_config=CompilationConfig(
-            backend=backend, level=torch_level, custom_ops=custom_ops
+            backend=backend, mode=compilation_mode, custom_ops=custom_ops
         )
     )
     with set_current_vllm_config(vllm_config):

Original file line number	Diff line number	Diff line change
`@@ -355,13 +355,13 @@ def test_toy_llama(`
`355`	`355`	`)`
`356`	`356`
`357`	`357`	`compile_config_no_compile = CompilationConfig(`
`358`		`- level=CompilationMode.NONE,`
	`358`	`+ mode=CompilationMode.NONE,`
`359`	`359`	`cudagraph_mode=CUDAGraphMode.NONE,`
`360`	`360`	`backend="eager",`
`361`	`361`	`)`
`362`	`362`
`363`	`363`	`compile_config_no_split = CompilationConfig(`
`364`		`- level=CompilationMode.VLLM_COMPILE,`
	`364`	`+ mode=CompilationMode.VLLM_COMPILE,`
`365`	`365`	`use_inductor_graph_partition=use_inductor_graph_partition,`
`366`	`366`	`cudagraph_mode=CUDAGraphMode.PIECEWISE,`
`367`	`367`	`backend=backend,`
Original file line number	Diff line number	Diff line change
`@@ -38,7 +38,7 @@ def forward(self, x: torch.Tensor):`
`38`	`38`	`def make_vllm_config() -> VllmConfig:`
`39`	`39`	`return VllmConfig(`
`40`	`40`	`compilation_config=CompilationConfig(`
`41`		`- level=CompilationMode.VLLM_COMPILE,`
	`41`	`+ mode=CompilationMode.VLLM_COMPILE,`
`42`	`42`	`)`
`43`	`43`	`)`
`44`	`44`
Original file line number	Diff line number	Diff line change
`@@ -198,7 +198,7 @@ def run_model(compile_config: int \| CompilationConfig, model: str, **model_kwarg`
`198`	`198`	`compilation_config = (`
`199`	`199`	`compile_config`
`200`	`200`	`if isinstance(compile_config, CompilationConfig)`
`201`		`- else CompilationConfig(level=compile_config)`
	`201`	`+ else CompilationConfig(mode=compile_config)`
`202`	`202`	`)`
`203`	`203`
`204`	`204`	`prompts = [`
Original file line number	Diff line number	Diff line change
`@@ -36,7 +36,7 @@ class Relu3(ReLUSquaredActivation):`
`36`	`36`
`37`	`37`
`38`	`38`	`@pytest.mark.parametrize(`
`39`		`- "env, torch_level, backend, ops_enabled, default_on",`
	`39`	`+ "env, compilation_mode, backend, ops_enabled, default_on",`
`40`	`40`	`[`
`41`	`41`	`# Default values based on compile level`
`42`	`42`	`# - All by default (no Inductor compilation)`
`@@ -77,15 +77,15 @@ class Relu3(ReLUSquaredActivation):`
`77`	`77`	`)`
`78`	`78`	`def test_enabled_ops(`
`79`	`79`	`env: str \| None,`
`80`		`- torch_level: int,`
	`80`	`+ compilation_mode: int,`
`81`	`81`	`backend: str,`
`82`	`82`	`ops_enabled: list[int],`
`83`	`83`	`default_on: bool,`
`84`	`84`	`):`
`85`	`85`	`custom_ops = env.split(",") if env else []`
`86`	`86`	`vllm_config = VllmConfig(`
`87`	`87`	`compilation_config=CompilationConfig(`
`88`		`- backend=backend, level=torch_level, custom_ops=custom_ops`
	`88`	`+ backend=backend, mode=compilation_mode, custom_ops=custom_ops`
`89`	`89`	`)`
`90`	`90`	`)`
`91`	`91`	`with set_current_vllm_config(vllm_config):`