Skip to content

Commit 1974880

Browse files
authored
[Bugfix] skip cuda graph for drafter when running with eager (#26821)
Signed-off-by: Benjamin Chislett <bchislett@nvidia.com>
1 parent 4a8a567 commit 1974880

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

vllm/v1/worker/gpu_model_runner.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3482,7 +3482,10 @@ def _dummy_run(
34823482

34833483
if self.speculative_config and self.speculative_config.use_eagle():
34843484
assert isinstance(self.drafter, EagleProposer)
3485-
use_cudagraphs = cudagraph_runtime_mode == CUDAGraphMode.PIECEWISE
3485+
use_cudagraphs = (
3486+
cudagraph_runtime_mode == CUDAGraphMode.PIECEWISE
3487+
and not self.speculative_config.enforce_eager
3488+
)
34863489
self.drafter.dummy_run(num_tokens, use_cudagraphs=use_cudagraphs)
34873490

34883491
# This is necessary to avoid blocking DP.

0 commit comments

Comments
 (0)