Skip to content

Commit 32fc471

Browse files
heheda12345NickLucche
authored andcommitted
fix unify kv cache spec
Signed-off-by: Chen Zhang <zhangch99@outlook.com>
1 parent f0066bf commit 32fc471

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

vllm/v1/core/kv_cache_utils.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1103,7 +1103,9 @@ def unify_hybrid_kv_cache_specs(kv_cache_spec: dict[str, KVCacheSpec]):
11031103
kv_cache_spec: The kv cache spec of each attention layer in the model
11041104
"""
11051105

1106-
if is_kv_cache_spec_uniform(kv_cache_spec):
1106+
if is_kv_cache_spec_uniform(
1107+
kv_cache_spec) or UniformTypeKVCacheSpecs.is_uniform_type(
1108+
kv_cache_spec):
11071109
return
11081110

11091111
logger.warning(
@@ -1141,7 +1143,8 @@ def unify_hybrid_kv_cache_specs(kv_cache_spec: dict[str, KVCacheSpec]):
11411143
attention_chunk_size=spec.attention_chunk_size,
11421144
)
11431145

1144-
if not is_kv_cache_spec_uniform(kv_cache_spec):
1146+
if not (is_kv_cache_spec_uniform(kv_cache_spec)
1147+
or UniformTypeKVCacheSpecs.is_uniform_type(kv_cache_spec)):
11451148
raise ValueError("Hybrid KV cache manager is disabled but failed to "
11461149
"convert the KV cache specs to one unified type.")
11471150

0 commit comments

Comments
 (0)