@@ -83,7 +83,7 @@ def models_list(*, all: bool = True, keywords: list[str] | None = None):
8383
8484@pytest .mark .parametrize (
8585 "optimization_level" ,
86- [CompilationMode .DYNAMO_TRACE_ONCE , CompilationMode .PIECEWISE ],
86+ [CompilationMode .DYNAMO_TRACE_ONCE , CompilationMode .VLLM_COMPILE ],
8787)
8888@pytest .mark .parametrize ("model_info" , models_list (all = True ))
8989@create_new_process_for_each_test ()
@@ -106,7 +106,7 @@ def test_full_graph(
106106 [
107107 # additional compile sizes, only some of the models
108108 (
109- CompilationConfig (level = CompilationMode .PIECEWISE , compile_sizes = [1 , 2 ]),
109+ CompilationConfig (level = CompilationMode .VLLM_COMPILE , compile_sizes = [1 , 2 ]),
110110 model ,
111111 )
112112 for model in models_list (all = False )
@@ -115,7 +115,7 @@ def test_full_graph(
115115 # RMSNorm + quant fusion, only 8-bit quant models
116116 (
117117 CompilationConfig (
118- level = CompilationMode .PIECEWISE ,
118+ level = CompilationMode .VLLM_COMPILE ,
119119 custom_ops = ["+rms_norm" ],
120120 pass_config = PassConfig (enable_fusion = True , enable_noop = True ),
121121 ),
@@ -127,7 +127,8 @@ def test_full_graph(
127127 # Test depyf integration works
128128 (
129129 CompilationConfig (
130- level = CompilationMode .PIECEWISE , debug_dump_path = tempfile .gettempdir ()
130+ level = CompilationMode .VLLM_COMPILE ,
131+ debug_dump_path = tempfile .gettempdir (),
131132 ),
132133 ("facebook/opt-125m" , {}),
133134 ),
@@ -136,7 +137,7 @@ def test_full_graph(
136137 # graph inductor partition
137138 (
138139 CompilationConfig (
139- level = CompilationMode .PIECEWISE ,
140+ level = CompilationMode .VLLM_COMPILE ,
140141 # inductor graph partition uses
141142 # torch._C.Tag.cudagraph_unsafe to specify splitting ops
142143 use_inductor_graph_partition = True ,
@@ -167,7 +168,7 @@ def test_custom_compile_config(
167168
168169@pytest .mark .parametrize (
169170 "optimization_level" ,
170- [CompilationMode .NO_COMPILATION , CompilationMode .PIECEWISE ],
171+ [CompilationMode .NO_COMPILATION , CompilationMode .VLLM_COMPILE ],
171172)
172173def test_fp8_kv_scale_compile (optimization_level : int ):
173174 model = "Qwen/Qwen2-0.5B"
@@ -186,7 +187,7 @@ def test_inductor_graph_partition_attn_fusion(caplog_vllm):
186187
187188 model = "nvidia/Llama-4-Scout-17B-16E-Instruct-FP8"
188189 compilation_config = CompilationConfig (
189- level = CompilationMode .PIECEWISE ,
190+ level = CompilationMode .VLLM_COMPILE ,
190191 use_inductor_graph_partition = True ,
191192 cudagraph_mode = CUDAGraphMode .PIECEWISE ,
192193 custom_ops = ["+quant_fp8" ],
0 commit comments