Enable prefix caching with full cuda graphs (#19617)

WoosukKwon · web-flow · commit 055915e6ce0b · 2025-06-15T01:05:05.000-07:00
Signed-off-by: Woosuk Kwon &lt;woosuk.kwon@berkeley.edu&gt;
diff --git a/vllm/config.py b/vllm/config.py
@@ -4495,7 +4495,6 @@ def __post_init__(self):
                 "full_cuda_graph is not supported with "
                 "cascade attention. Disabling cascade attention.")
             self.model_config.disable_cascade_attn = True
-            self.cache_config.enable_prefix_caching = False
 
         if (self.kv_events_config is not None
                 and self.kv_events_config.enable_kv_cache_events