diff --git a/tests/compile/piecewise/test_toy_llama.py b/tests/compile/piecewise/test_toy_llama.py index 175ca4a23043..6887673eb6a5 100644 --- a/tests/compile/piecewise/test_toy_llama.py +++ b/tests/compile/piecewise/test_toy_llama.py @@ -355,13 +355,13 @@ def test_toy_llama( ) compile_config_no_compile = CompilationConfig( - level=CompilationMode.NONE, + mode=CompilationMode.NONE, cudagraph_mode=CUDAGraphMode.NONE, backend="eager", ) compile_config_no_split = CompilationConfig( - level=CompilationMode.VLLM_COMPILE, + mode=CompilationMode.VLLM_COMPILE, use_inductor_graph_partition=use_inductor_graph_partition, cudagraph_mode=CUDAGraphMode.PIECEWISE, backend=backend, diff --git a/tests/compile/test_aot_compile.py b/tests/compile/test_aot_compile.py index b2734af575a1..c65e5a25934d 100644 --- a/tests/compile/test_aot_compile.py +++ b/tests/compile/test_aot_compile.py @@ -38,7 +38,7 @@ def forward(self, x: torch.Tensor): def make_vllm_config() -> VllmConfig: return VllmConfig( compilation_config=CompilationConfig( - level=CompilationMode.VLLM_COMPILE, + mode=CompilationMode.VLLM_COMPILE, ) ) diff --git a/tests/compile/test_config.py b/tests/compile/test_config.py index c6fe65ab5146..4f782ef92c55 100644 --- a/tests/compile/test_config.py +++ b/tests/compile/test_config.py @@ -168,7 +168,7 @@ def test_splitting_ops_dynamic(): if is_torch_equal_or_newer("2.9.0.dev"): config = VllmConfig( compilation_config=CompilationConfig( - level=CompilationMode.VLLM_COMPILE, + mode=CompilationMode.VLLM_COMPILE, use_inductor_graph_partition=True, splitting_ops=["vllm::unified_attention"], ) @@ -180,7 +180,7 @@ def test_splitting_ops_dynamic(): # When attn_fusion pass enabled, splitting_ops now default to attention ops. config = VllmConfig( compilation_config=CompilationConfig( - level=CompilationMode.VLLM_COMPILE, + mode=CompilationMode.VLLM_COMPILE, pass_config={"enable_attn_fusion": True, "enable_noop": True}, custom_ops=["+quant_fp8"], cudagraph_mode=CUDAGraphMode.PIECEWISE, @@ -195,7 +195,7 @@ def test_splitting_ops_dynamic(): if is_torch_equal_or_newer("2.9.0.dev"): config = VllmConfig( compilation_config=CompilationConfig( - level=CompilationMode.VLLM_COMPILE, + mode=CompilationMode.VLLM_COMPILE, use_inductor_graph_partition=True, pass_config={"enable_attn_fusion": True, "enable_noop": True}, custom_ops=["+quant_fp8"], diff --git a/tests/compile/test_full_graph.py b/tests/compile/test_full_graph.py index 7a4e859b3e6c..0ad8c17d8668 100644 --- a/tests/compile/test_full_graph.py +++ b/tests/compile/test_full_graph.py @@ -198,7 +198,7 @@ def run_model(compile_config: int | CompilationConfig, model: str, **model_kwarg compilation_config = ( compile_config if isinstance(compile_config, CompilationConfig) - else CompilationConfig(level=compile_config) + else CompilationConfig(mode=compile_config) ) prompts = [ diff --git a/tests/compile/test_fusions_e2e.py b/tests/compile/test_fusions_e2e.py index efb5774b7870..50271e2a4d70 100644 --- a/tests/compile/test_fusions_e2e.py +++ b/tests/compile/test_fusions_e2e.py @@ -151,7 +151,7 @@ def test_attn_quant( cudagraph_mode=mode, splitting_ops=splitting_ops, # Common - level=CompilationMode.VLLM_COMPILE, + mode=CompilationMode.VLLM_COMPILE, pass_config=PassConfig(enable_attn_fusion=True, enable_noop=True), # Inductor caches custom passes by default as well via uuid inductor_compile_config={"force_disable_caches": True}, @@ -236,7 +236,7 @@ def test_tp2_attn_quant_allreduce_rmsnorm( custom_ops=custom_ops_list, splitting_ops=splitting_ops, # Common - level=CompilationMode.VLLM_COMPILE, + mode=CompilationMode.VLLM_COMPILE, pass_config=PassConfig( enable_attn_fusion=True, enable_noop=True, @@ -273,7 +273,7 @@ def run_model(compile_config: int | CompilationConfig, model: str, **model_kwarg compilation_config = ( compile_config if isinstance(compile_config, CompilationConfig) - else CompilationConfig(level=compile_config) + else CompilationConfig(mode=compile_config) ) prompts = [ diff --git a/tests/model_executor/test_enabled_custom_ops.py b/tests/model_executor/test_enabled_custom_ops.py index 254e9b3ab8af..41419553aa83 100644 --- a/tests/model_executor/test_enabled_custom_ops.py +++ b/tests/model_executor/test_enabled_custom_ops.py @@ -36,7 +36,7 @@ class Relu3(ReLUSquaredActivation): @pytest.mark.parametrize( - "env, torch_level, backend, ops_enabled, default_on", + "env, compilation_mode, backend, ops_enabled, default_on", [ # Default values based on compile level # - All by default (no Inductor compilation) @@ -77,7 +77,7 @@ class Relu3(ReLUSquaredActivation): ) def test_enabled_ops( env: str | None, - torch_level: int, + compilation_mode: int, backend: str, ops_enabled: list[int], default_on: bool, @@ -85,7 +85,7 @@ def test_enabled_ops( custom_ops = env.split(",") if env else [] vllm_config = VllmConfig( compilation_config=CompilationConfig( - backend=backend, level=torch_level, custom_ops=custom_ops + backend=backend, mode=compilation_mode, custom_ops=custom_ops ) ) with set_current_vllm_config(vllm_config):