@@ -61,7 +61,8 @@ def __init__(
6161 self .hidden_size = self .draft_model_config .get_hidden_size ()
6262
6363 self .attn_metadata_builder : Optional [AttentionMetadataBuilder ] = None
64- self .draft_indexer_metadata_builder : Optional [AttentionMetadataBuilder ] = None
64+ self .draft_indexer_metadata_builder : Optional [
65+ AttentionMetadataBuilder ] = None
6566 self .attn_layer_names : list [str ] = []
6667 self .indexer_layer_names : list [str ] = []
6768
@@ -181,8 +182,8 @@ def dummy_run(self,
181182 aclgraph_runtime_mode = aclgraph_runtime_mode ,
182183 batch_descriptor = batch_descriptor ):
183184 self .model (input_ids = input_ids ,
184- positions = positions ,
185- hidden_states = previous_hidden_states )
185+ positions = positions ,
186+ hidden_states = previous_hidden_states )
186187 if with_prefill :
187188 break
188189
@@ -385,7 +386,8 @@ def _propose(
385386 last_token_indices : Optional [torch .Tensor ],
386387 common_attn_metadata : CommonAttentionMetadata ,
387388 sampling_metadata : SamplingMetadata ,
388- mm_embed_inputs : Optional [tuple [list [torch .Tensor ], torch .Tensor ]] = None ,
389+ mm_embed_inputs : Optional [tuple [list [torch .Tensor ],
390+ torch .Tensor ]] = None ,
389391 ) -> torch .Tensor :
390392 num_tokens = target_token_ids .shape [0 ]
391393 batch_size = next_token_ids .shape [0 ]
@@ -440,7 +442,7 @@ def _propose(
440442 self .runner .aclgraph_dispatcher .dispatch (batch_descriptor )
441443 if aclgraph_runtime_mode not in [
442444 CUDAGraphMode .PIECEWISE , CUDAGraphMode .NONE
443- ] :
445+ ]:
444446 # Fallback to piecewise graph, when acl full graph is enabled
445447 logger .debug (
446448 "Currently the eagle proposer only supports cudagraph_mode "
0 commit comments