Skip to content

Commit a2d5ef0

Browse files
committed
bugfix: set reorder_batch_threshold back to 1 when using FlashMLA and enable DCP
Signed-off-by: FENP <32334296+FENP@users.noreply.github.com>
1 parent d2740fa commit a2d5ef0

File tree

1 file changed

+13
-0
lines changed

1 file changed

+13
-0
lines changed

vllm/v1/attention/backends/mla/common.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,19 @@ def __init__(
558558
self.dcp_world_size = 1
559559
self.dcp_rank = 0
560560

561+
if (
562+
self.dcp_world_size > 1
563+
and self.__class__.reorder_batch_threshold > 1
564+
and self.__class__.__name__ != "FlashAttnMLAMetadataBuilder"
565+
):
566+
logger.warning_once(
567+
"DCP is enabled but not FlashAttnMLA is used. "
568+
"Set query_len_support back to SINGLE_ONLY "
569+
"and reorder_batch_threshold back to 1."
570+
)
571+
self.__class__.query_len_support = QueryLenSupport.SINGLE_ONLY
572+
self.__class__.reorder_batch_threshold = 1
573+
561574
# Don't try to access the runner on AMD
562575
if self.aot_schedule:
563576
self.page_size = self.kv_cache_spec.block_size

0 commit comments

Comments
 (0)