We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 0cd6239 commit 2be22a5Copy full SHA for 2be22a5
vllm/attention/ops/paged_attn.py
@@ -114,7 +114,8 @@ def forward_decode(
114
use_custom = (custom_attn_available and query.dtype == torch.half
115
and head_size == 128 and block_size == 16
116
and kv_cache_dtype == "auto"
117
- and (gqa_ratio >= 1 and gqa_ratio <= 16))
+ and (gqa_ratio >= 1 and gqa_ratio <= 16)
118
+ and max_context_len <= 32768)
119
if not use_custom:
120
_PARTITION_SIZE = _PARTITION_SIZE_V1V2
121
else:
0 commit comments