We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 8f45029 commit cedceb7Copy full SHA for cedceb7
vllm/v1/attention/backends/mla/common.py
@@ -481,7 +481,7 @@ def __init__(self,
481
# which would result in up-projected context being
482
# 2*(192*128)*(64*1024) = 3gb
483
# (assuming 192 QK head dim, 128 heads, and fp16)
484
- 128 * 1024)
+ 64 * 1024)
485
assert self.chunked_prefill_workspace_size >= \
486
scheduler_config.max_num_seqs * cache_config.block_size
487
if self.dcp_world_size > 1:
0 commit comments