Skip to content

Commit 2be22a5

Browse files
committed
update conditions of max-context-len
1 parent 0cd6239 commit 2be22a5

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

vllm/attention/ops/paged_attn.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,8 @@ def forward_decode(
114114
use_custom = (custom_attn_available and query.dtype == torch.half
115115
and head_size == 128 and block_size == 16
116116
and kv_cache_dtype == "auto"
117-
and (gqa_ratio >= 1 and gqa_ratio <= 16))
117+
and (gqa_ratio >= 1 and gqa_ratio <= 16)
118+
and max_context_len <= 32768)
118119
if not use_custom:
119120
_PARTITION_SIZE = _PARTITION_SIZE_V1V2
120121
else:

0 commit comments

Comments
 (0)