vllm-project · houseroad · Apr 16, 2025 · Apr 14, 2025
@@ -290,6 +290,19 @@ def _get_shape_env() -> AlwaysHitShapeEnv:
             # Dynamo metrics context, see method for more details.
             stack.enter_context(self.metrics_context())
 
+            # Disable remote caching. When these are on, on remote cache-hit,
+            # the monkey-patched functions never actually get called.
+            # vLLM today assumes and requires the monkey-patched functions to
+            # get hit.
+            # TODO(zou3519): we're going to replace this all with
+            # standalone_compile sometime.
+            if is_torch_equal_or_newer("2.6"):
+                stack.enter_context(
+                    torch._inductor.config.patch(fx_graph_remote_cache=False))
+                stack.enter_context(
+                    torch._functorch.config.patch(
+                        enable_remote_autograd_cache=False))
+
             compiled_graph = compile_fx(
                 graph,
                 example_inputs,