We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 045b396 commit b91d8dbCopy full SHA for b91d8db
vllm/config/vllm.py
@@ -350,6 +350,15 @@ def __post_init__(self):
350
or self.model_config.is_encoder_decoder
351
):
352
self.compilation_config.cudagraph_mode = CUDAGraphMode.PIECEWISE
353
+
354
+ # decode context parallel do not support full cudagraphs now.
355
+ if self.parallel_config.decode_context_parallel_size > 1:
356
+ logger.warning(
357
+ "Decode context parallel (DCP) is enabled, which is "
358
+ "incompatible with full CUDA graphs. Set "
359
+ "cudagraph_mode to PIECEWISE."
360
+ )
361
+ self.compilation_config.cudagraph_mode = CUDAGraphMode.PIECEWISE
362
else:
363
self.compilation_config.cudagraph_mode = CUDAGraphMode.NONE
364
0 commit comments