We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 3d330c4 commit 055915eCopy full SHA for 055915e
vllm/config.py
@@ -4495,7 +4495,6 @@ def __post_init__(self):
4495
"full_cuda_graph is not supported with "
4496
"cascade attention. Disabling cascade attention.")
4497
self.model_config.disable_cascade_attn = True
4498
- self.cache_config.enable_prefix_caching = False
4499
4500
if (self.kv_events_config is not None
4501
and self.kv_events_config.enable_kv_cache_events
0 commit comments