@@ -163,6 +163,23 @@ def get_inductor_factors() -> list[Any]:
163163 return factors
164164
165165
166+ def is_compile_cache_enabled (
167+ vllm_additional_inductor_config : dict [str , Any ],
168+ ) -> bool :
169+ vllm_inductor_config_disable_cache = vllm_additional_inductor_config .get (
170+ "force_disable_caches" , False
171+ )
172+
173+ # TODO(gmagogsfm): Replace torch._inductor.config.force_disable_caches
174+ # with torch.compiler.config.force_disable_caches when minimum PyTorch
175+ # version reaches 2.10
176+ return (
177+ not envs .VLLM_DISABLE_COMPILE_CACHE
178+ and not torch ._inductor .config .force_disable_caches
179+ and not vllm_inductor_config_disable_cache
180+ )
181+
182+
166183class InductorStandaloneAdaptor (CompilerInterface ):
167184 """
168185 The adaptor for the Inductor compiler.
@@ -219,7 +236,8 @@ def compile(
219236 # Save the compiled artifact to disk in the specified path
220237 assert key is not None
221238 path = os .path .join (self .cache_dir , key )
222- if not envs .VLLM_DISABLE_COMPILE_CACHE :
239+
240+ if is_compile_cache_enabled (compiler_config ):
223241 compiled_graph .save (path = path , format = "unpacked" )
224242 compilation_counter .num_compiled_artifacts_saved += 1
225243 return compiled_graph , (key , path )
@@ -469,10 +487,8 @@ def _get_shape_env() -> AlwaysHitShapeEnv:
469487 config_patches = current_config ,
470488 )
471489
472- # We treat VLLM_DISABLE_COMPILE_CACHE as the overall switch for torch
473- # compilation cache. So turn off the checks if we disable the
474- # compilation cache.
475- if not envs .VLLM_DISABLE_COMPILE_CACHE :
490+ # Turn off the checks if we disable the compilation cache.
491+ if is_compile_cache_enabled (compiler_config ):
476492 if hash_str is None :
477493 raise RuntimeError (
478494 "vLLM failed to compile the model. The most "
0 commit comments