Skip to content

Commit becdd3d

Browse files
committed
Filter Indexer layers, fix metadata selection for DeepSeek Sparse MLA
Signed-off-by: xiaohajiayou <923390377@qq.com>
1 parent 6eacbb5 commit becdd3d

File tree

1 file changed

+3
-4
lines changed

1 file changed

+3
-4
lines changed

vllm/v1/spec_decode/eagle.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ def __init__(
109109
else []
110110
)
111111

112+
self.use_cuda_graph = self.use_cuda_graph and bool(self.cudagraph_batch_sizes)
112113
# persistent buffers for cuda graph
113114
self.input_ids = torch.zeros(
114115
self.max_num_tokens, dtype=torch.int32, device=device
@@ -939,7 +940,7 @@ def load_model(self, target_model: nn.Module) -> None:
939940
self.vllm_config, DeepseekV32IndexerCache
940941
)
941942
draft_indexer_layer_names = indexer_layers.keys() - target_indexer_layer_names
942-
self.attn_layer_names = list(draft_attn_layer_names)
943+
self.attn_layer_names = list(draft_attn_layer_names - draft_indexer_layer_names)
943944
self.indexer_layer_names = list(draft_indexer_layer_names)
944945

945946
if self.indexer_layer_names:
@@ -1051,9 +1052,7 @@ def dummy_run(
10511052
use_cudagraphs=True,
10521053
) -> None:
10531054
# Determine if CUDA graphs should be used for this run.
1054-
cudagraphs_enabled = (
1055-
use_cudagraphs and self.use_cuda_graph and bool(self.cudagraph_batch_sizes)
1056-
)
1055+
cudagraphs_enabled = use_cudagraphs and self.use_cuda_graph
10571056
if cudagraphs_enabled and num_tokens <= self.cudagraph_batch_sizes[-1]:
10581057
num_tokens = self.vllm_config.pad_for_cudagraph(num_tokens)
10591058

0 commit comments

Comments
 (0)