We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent d2740fa commit a2d5ef0Copy full SHA for a2d5ef0
vllm/v1/attention/backends/mla/common.py
@@ -558,6 +558,19 @@ def __init__(
558
self.dcp_world_size = 1
559
self.dcp_rank = 0
560
561
+ if (
562
+ self.dcp_world_size > 1
563
+ and self.__class__.reorder_batch_threshold > 1
564
+ and self.__class__.__name__ != "FlashAttnMLAMetadataBuilder"
565
+ ):
566
+ logger.warning_once(
567
+ "DCP is enabled but not FlashAttnMLA is used. "
568
+ "Set query_len_support back to SINGLE_ONLY "
569
+ "and reorder_batch_threshold back to 1."
570
+ )
571
+ self.__class__.query_len_support = QueryLenSupport.SINGLE_ONLY
572
+ self.__class__.reorder_batch_threshold = 1
573
+
574
# Don't try to access the runner on AMD
575
if self.aot_schedule:
576
self.page_size = self.kv_cache_spec.block_size
0 commit comments