[Bugfix][DCP] Set default CUDAGraphMode to PIECEWISE for DCP (#26574)

FENP · web-flow · commit b91d8db873a5 · 2025-10-12T09:58:38.000Z
Signed-off-by: FENP &lt;32334296+FENP@users.noreply.github.com&gt;
diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py
@@ -350,6 +350,15 @@ def __post_init__(self):
                         or self.model_config.is_encoder_decoder
                     ):
                         self.compilation_config.cudagraph_mode = CUDAGraphMode.PIECEWISE
+
+                    # decode context parallel do not support full cudagraphs now.
+                    if self.parallel_config.decode_context_parallel_size > 1:
+                        logger.warning(
+                            "Decode context parallel (DCP) is enabled, which is "
+                            "incompatible with full CUDA graphs. Set "
+                            "cudagraph_mode to PIECEWISE."
+                        )
+                        self.compilation_config.cudagraph_mode = CUDAGraphMode.PIECEWISE
                 else:
                     self.compilation_config.cudagraph_mode = CUDAGraphMode.NONE