@@ -111,39 +111,45 @@ def test_compile_correctness(
111111 with monkeypatch .context () as m :
112112 m .setenv ("VLLM_ATTENTION_BACKEND" , attn_backend )
113113 final_args = [
114- "--enforce-eager" ,
115114 * model_args ,
116115 "-pp" ,
117116 str (pp_size ),
118117 "-tp" ,
119118 str (tp_size ),
119+ "--compilation-config" ,
120+ '{"backend": "inductor", "cudagraph_mode": "none"}' ,
120121 ]
121122
122123 all_args : list [list [str ]] = []
123124 all_envs : list [dict [str , str ] | None ] = []
124125
125- for level in [
126- CompilationLevel .NO_COMPILATION ,
126+ for comp_level in [
127+ CompilationLevel .DYNAMO_AS_IS ,
128+ CompilationLevel .DYNAMO_ONCE ,
127129 CompilationLevel .PIECEWISE ,
128130 ]:
129- all_args .append (final_args + [f"-O{ level } " ])
130- all_envs .append ({})
131+ for level in [CompilationLevel .NO_COMPILATION , comp_level ]:
132+ all_args .append (final_args + [f"-O{ level } " ])
133+ all_envs .append ({})
131134
132- # inductor will change the output, so we only compare if the output
133- # is close, not exactly the same.
134- compare_all_settings (
135- model ,
136- all_args ,
137- all_envs ,
138- method = method if method != "generate" else "generate_close" ,
139- )
140- all_envs .clear ()
141- all_args .clear ()
135+ # inductor will change the output, so we only compare if the output
136+ # is close, not exactly the same.
137+ compare_all_settings (
138+ model ,
139+ all_args ,
140+ all_envs ,
141+ method = method if method != "generate" else "generate_close" ,
142+ )
143+ all_envs .clear ()
144+ all_args .clear ()
145+
146+ final_args [- 1 ] = '{"backend": "inductor", "cudagraph_mode": "none"}'
142147
143148 for level in [
144149 CompilationLevel .NO_COMPILATION ,
145150 CompilationLevel .DYNAMO_AS_IS ,
146151 CompilationLevel .DYNAMO_ONCE ,
152+ CompilationLevel .PIECEWISE ,
147153 ]:
148154 all_args .append (final_args + [f"-O{ level } " ])
149155 all_envs .append ({})
0 commit comments