short refactor and addressing comments

WorldExplored · vnadathur · WorldExplored · commit 6dfdf905a6f4 · 2025-10-09T17:47:11.000-04:00
- Concated the lazily.
- refactored comments.
- updated ignored factors
- logged erors
- persists if files don't already exist

Srreyansh Sethi &lt;srreyansh.sethi@gmail.com&gt;

Co-Authored-By: vnadathur &lt;236933696+vnadathur@users.noreply.github.com&gt;
Signed-off-by: WorldExplored &lt;srreyansh.sethi@gmail.com&gt;
diff --git a/tests/config/test_config_utils.py b/tests/config/test_config_utils.py
@@ -22,7 +22,7 @@ def _expected_path(p_str: str = ".") -> str:
     import pathlib
 
     p = pathlib.Path(p_str)
-    return str(p.expanduser().resolve())
+    return p.expanduser().resolve().as_posix()
 
 
 # Minimal dataclass to test get_hash_factors.
diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import ast
+import logging
 import dataclasses
 import hashlib
 import json
@@ -531,9 +532,17 @@ def __call__(self, graph: fx.GraphModule, example_inputs) -> Callable:
         config_hash = vllm_config.compute_hash()
         compiler_hash = self.compiler_manager.compute_hash(vllm_config)
         forward_code_files = list(sorted(self.compilation_config.traced_files))
+        class _LazyJoin:
+            def __init__(self, seq: list[str], sep: str = "\n"):
+                self.seq = seq
+                self.sep = sep
+
+            def __str__(self) -> str:
+                return self.sep.join(self.seq)
+
         logger.debug(
             "Traced files (to be considered for compilation cache):\n%s",
-            "\n".join(forward_code_files),
+            _LazyJoin(forward_code_files),
         )
         hash_content = []
         for filepath in forward_code_files:
@@ -558,7 +567,7 @@ def __call__(self, graph: fx.GraphModule, example_inputs) -> Callable:
             # graph.
             factors = [env_hash, config_hash, code_hash, compiler_hash]
             # Use SHA-256 for cache key hashing to be consistent across
-            # compute_hash functions. Truncate for a short, stable dir name.
+            # compute_hash functions. Truncate for a short cache dir name.
             hash_key = hashlib.sha256(str(factors).encode()).hexdigest()[:10]
             cache_dir = os.path.join(
                 envs.VLLM_CACHE_ROOT, "torch_compile_cache", hash_key
@@ -600,27 +609,36 @@ def __call__(self, graph: fx.GraphModule, example_inputs) -> Callable:
 
         # Persist and log only hash-relevant factors together.
         try:
-            logger.debug(
-                "Compile env factors (raw):\n%s\nVllm config hash: %s",
-                pprint.pformat(env_factors, width=120),
-                config_hash,
-            )
-            meta_path = os.path.join(local_cache_dir, "cache_key_factors.json")
-            with open(meta_path, "w") as f:
-                json.dump(
-                    {
-                        "env": env_factors,  # raw factors used for env_hash
-                        "config_hash": config_hash,
-                        "code_hash": code_hash,
-                        "compiler_hash": compiler_hash,
-                    },
-                    f,
-                    indent=2,
-                    sort_keys=True,
+            if logger.isEnabledFor(logging.DEBUG):
+                logger.debug(
+                    "Compile env factors (raw):\n%s\nVllm config hash: %s",
+                    pprint.pformat(env_factors, width=120),
+                    config_hash,
                 )
+            meta_path = os.path.join(local_cache_dir, "cache_key_factors.json")
+            if not os.path.exists(meta_path):
+                with open(meta_path, "w") as f:
+                    json.dump(
+                        {
+                            "env": env_factors,  # raw factors used for env_hash
+                            "config_hash": config_hash,
+                            "code_hash": code_hash,
+                            "compiler_hash": compiler_hash,
+                        },
+                        f,
+                        indent=2,
+                        sort_keys=True,
+                    )
         except Exception:
             # Best-effort only; metadata write failures are non-fatal.
-            pass
+            logger.warning(
+                (
+                    "Could not write compile cache metadata at %s; continuing without "
+                    "metadata. Compiled cache remains valid; diagnostics may be limited."
+                ),
+                local_cache_dir,
+                exc_info=True,
+            )
 
         # when dynamo calls the backend, it means the bytecode
         # transform and analysis are done
@@ -727,4 +745,4 @@ def copy_and_call(*args):
                 list_args[index] = static_tensor
             return self.split_gm(*list_args)
 
-        return copy_and_call
+        return copy_and_call
diff --git a/vllm/envs.py b/vllm/envs.py
@@ -1396,24 +1396,45 @@ def set_vllm_use_v1(use_v1: bool):
 
 def compile_factors() -> dict[str, object]:
     """
-    Return raw env factors for compile hashing using the legacy opt-out
-    strategy: include all known env vars except a minimal set that clearly
-    does not affect compiled graph structure or kernel routing.
+    Return environment variables used to compute the compile cache key. 
+    This includes all known vLLM environment variables.
+    This then excludes variables that cannot affect graph structure, codegen, or kernel
+      selection (see ignored_factors)
     """
 
     ignored_factors: set[str] = {
         "MAX_JOBS",
         "VLLM_RPC_BASE_PATH",
         "VLLM_USE_MODELSCOPE",
         "VLLM_RINGBUFFER_WARNING_INTERVAL",
+        "VLLM_DEBUG_DUMP_PATH",
+        "VLLM_PORT",
+        "VLLM_CACHE_ROOT",
         "LD_LIBRARY_PATH",
-        "VLLM_PATTERN_MATCH_DEBUG",
         "VLLM_SERVER_DEV_MODE",
         "VLLM_DP_MASTER_IP",
         "VLLM_DP_MASTER_PORT",
         "VLLM_RANDOMIZE_DP_DUMMY_INPUTS",
         "VLLM_CI_USE_S3",
         "VLLM_MODEL_REDIRECT_PATH",
+        "VLLM_HOST_IP",                    
+        "S3_ACCESS_KEY_ID", "S3_SECRET_ACCESS_KEY", "S3_ENDPOINT_URL",  
+        "VLLM_USAGE_STATS_SERVER", "VLLM_NO_USAGE_STATS", "VLLM_DO_NOT_TRACK", 
+        "VLLM_LOGGING_LEVEL", "VLLM_LOGGING_PREFIX",
+        "VLLM_LOGGING_STREAM", "VLLM_LOGGING_CONFIG_PATH",
+        "VLLM_LOG_STATS_INTERVAL",         
+        "VLLM_DEBUG_LOG_API_SERVER_RESPONSE",
+        "VLLM_TUNED_CONFIG_FOLDER",        
+        "VLLM_ENGINE_ITERATION_TIMEOUT_S", 
+        "VLLM_HTTP_TIMEOUT_KEEP_ALIVE",
+        "VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS",
+        "VLLM_KEEP_ALIVE_ON_ENGINE_DEATH", 
+        "VLLM_SLEEP_WHEN_IDLE",            
+        "VLLM_IMAGE_FETCH_TIMEOUT", "VLLM_VIDEO_FETCH_TIMEOUT",
+        "VLLM_AUDIO_FETCH_TIMEOUT", "VLLM_MEDIA_URL_ALLOW_REDIRECTS",
+        "VLLM_MEDIA_LOADING_THREAD_COUNT",
+        "VLLM_MAX_AUDIO_CLIP_FILESIZE_MB",
+        "VLLM_VIDEO_LOADER_BACKEND", 
     }
 
     from vllm.config.utils import normalize_value
@@ -1427,4 +1448,4 @@ def compile_factors() -> dict[str, object]:
 
         factors[factor] = normalize_value(raw)
 
-    return factors
+    return factors