bugfix: set reorder_batch_threshold back to 1 when using FlashMLA and enable DCP

FENP · FENP · commit a2d5ef088a55 · 2025-10-16T21:55:26.000+08:00
Signed-off-by: FENP &lt;32334296+FENP@users.noreply.github.com&gt;
diff --git a/vllm/v1/attention/backends/mla/common.py b/vllm/v1/attention/backends/mla/common.py
@@ -558,6 +558,19 @@ def __init__(
             self.dcp_world_size = 1
             self.dcp_rank = 0
 
+        if (
+            self.dcp_world_size > 1
+            and self.__class__.reorder_batch_threshold > 1
+            and self.__class__.__name__ != "FlashAttnMLAMetadataBuilder"
+        ):
+            logger.warning_once(
+                "DCP is enabled but not FlashAttnMLA is used. "
+                "Set query_len_support back to SINGLE_ONLY "
+                "and reorder_batch_threshold back to 1."
+            )
+            self.__class__.query_len_support = QueryLenSupport.SINGLE_ONLY
+            self.__class__.reorder_batch_threshold = 1
+
         # Don't try to access the runner on AMD
         if self.aot_schedule:
             self.page_size = self.kv_cache_spec.block_size