File tree Expand file tree Collapse file tree 1 file changed +3
-4
lines changed Expand file tree Collapse file tree 1 file changed +3
-4
lines changed Original file line number Diff line number Diff line change @@ -109,6 +109,7 @@ def __init__(
109109            else  []
110110        )
111111
112+         self .use_cuda_graph  =  self .use_cuda_graph  and  bool (self .cudagraph_batch_sizes )
112113        # persistent buffers for cuda graph 
113114        self .input_ids  =  torch .zeros (
114115            self .max_num_tokens , dtype = torch .int32 , device = device 
@@ -939,7 +940,7 @@ def load_model(self, target_model: nn.Module) -> None:
939940            self .vllm_config , DeepseekV32IndexerCache 
940941        )
941942        draft_indexer_layer_names  =  indexer_layers .keys () -  target_indexer_layer_names 
942-         self .attn_layer_names  =  list (draft_attn_layer_names )
943+         self .attn_layer_names  =  list (draft_attn_layer_names   -   draft_indexer_layer_names )
943944        self .indexer_layer_names  =  list (draft_indexer_layer_names )
944945
945946        if  self .indexer_layer_names :
@@ -1051,9 +1052,7 @@ def dummy_run(
10511052        use_cudagraphs = True ,
10521053    ) ->  None :
10531054        # Determine if CUDA graphs should be used for this run. 
1054-         cudagraphs_enabled  =  (
1055-             use_cudagraphs  and  self .use_cuda_graph  and  bool (self .cudagraph_batch_sizes )
1056-         )
1055+         cudagraphs_enabled  =  use_cudagraphs  and  self .use_cuda_graph 
10571056        if  cudagraphs_enabled  and  num_tokens  <=  self .cudagraph_batch_sizes [- 1 ]:
10581057            num_tokens  =  self .vllm_config .pad_for_cudagraph (num_tokens )
10591058
    
 
   
 
     
   
   
          
     
  
    
     
 
    
      
     
 
     
    You can’t perform that action at this time.
  
 
    
  
     
    
      
        
     
 
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments