[Core] Force PIECEWISE CUDAGraph mode for encoder-decoder (vllm-project#25701)

russellb · xuebwang-amd · commit dab72bf062cb · 2025-10-24T09:19:21.000Z
Signed-off-by: Russell Bryant &lt;rbryant@redhat.com&gt;
Signed-off-by: xuebwang-amd &lt;xuebwang@amd.com&gt;
diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py
@@ -364,9 +364,11 @@ def __post_init__(self):
                     self.compilation_config.cudagraph_mode = \
                         CUDAGraphMode.FULL_AND_PIECEWISE
 
-                    # pooling model does not support full cudagraphs
+                    # pooling models and encoder-decoder models
+                    # do not support full cudagraphs
                     if self.model_config is not None and \
-                        self.model_config.pooler_config is not None:
+                        (self.model_config.pooler_config is not None
+                         or self.model_config.is_encoder_decoder):
                         self.compilation_config.cudagraph_mode = \
                             CUDAGraphMode.PIECEWISE
                 else: