Add set_env_var helper and add pattern matcher debug utility

ProExpertProg · ProExpertProg · commit bc5dfafa6658 · 2025-08-19T12:11:03.000-04:00
Signed-off-by: Luka Govedic &lt;lgovedic@redhat.com&gt;
diff --git a/vllm/compilation/pass_manager.py b/vllm/compilation/pass_manager.py
@@ -1,11 +1,15 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from contextlib import ExitStack
+
 from torch import fx as fx
 
+from vllm import envs
 from vllm.config import VllmConfig
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
+from vllm.utils import set_env_var
 
 if current_platform.is_cuda_alike():
     from .fusion import FusionPass
@@ -43,13 +47,20 @@ def __init__(self):
         self.passes: list[VllmInductorPass] = []
 
     def __call__(self, graph: fx.Graph):
-        shape = get_pass_context().runtime_shape
-        for pass_ in self.passes:
-            if pass_.is_applicable_for_shape(shape):
-                pass_(graph)
-
-        # always run fix_functionalization last
-        self.fix_functionalization(graph)
+        with ExitStack() as stack:
+            if envs.VLLM_PATTERN_MATCH_DEBUG is not None:
+                # and get_tensor_model_parallel_rank() == 0:
+                stack.enter_context(
+                    set_env_var('TORCHINDUCTOR_PATTERN_MATCH_DEBUG',
+                                envs.VLLM_PATTERN_MATCH_DEBUG))
+
+            shape = get_pass_context().runtime_shape
+            for pass_ in self.passes:
+                if pass_.is_applicable_for_shape(shape):
+                    pass_(graph)
+
+            # always run fix_functionalization last
+            self.fix_functionalization(graph)
 
     def configure(self, config: VllmConfig):
         self.pass_config = config.compilation_config.pass_config
diff --git a/vllm/envs.py b/vllm/envs.py
@@ -160,6 +160,9 @@
     VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8: bool = False
     VLLM_USE_FLASHINFER_MOE_MXFP4_BF16: bool = False
     VLLM_TUNED_CONFIG_FOLDER: Optional[str] = None
+    VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE: bool = True
+    VLLM_USE_STANDALONE_COMPILE: bool = True
+    VLLM_PATTERN_MATCH_DEBUG: Optional[str] = None
 
 
 def get_default_cache_root():
@@ -363,6 +366,10 @@ def get_vllm_port() -> Optional[int]:
     "VLLM_USE_STANDALONE_COMPILE":
     lambda: os.environ.get("VLLM_USE_STANDALONE_COMPILE", "1") == "1",
 
+    # Debug pattern matching inside custom passes
+    "VLLM_PATTERN_MATCH_DEBUG":
+    lambda: os.environ.get("VLLM_PATTERN_MATCH_DEBUG", None),
+
     # local rank of the process in the distributed setting, used to determine
     # the GPU device id
     "LOCAL_RANK":
diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py
@@ -3346,3 +3346,16 @@ def decorate_logs(process_name: Optional[str] = None) -> None:
     pid = os.getpid()
     _add_prefix(sys.stdout, process_name, pid)
     _add_prefix(sys.stderr, process_name, pid)
+
+
+@contextlib.contextmanager
+def set_env_var(key, value):
+    old = os.environ.get(key)
+    os.environ[key] = value
+    try:
+        yield
+    finally:
+        if old is None:
+            del os.environ[key]
+        else:
+            os.environ[key] = old