@@ -129,17 +129,17 @@ class AscendMetadata:
129129 # Number of tokens including padding (for logging).
130130 num_input_tokens : int = 0
131131 # Number of tokens excluding padding.
132- num_actual_tokens : int
132+ num_actual_tokens : int = 0
133133
134134 # The sequence length per sequence. Sequence length means the computed
135135 # tokens + new tokens (is None if it is a decoding).
136136 # (batch_size,)
137- seq_lens : torch .Tensor
137+ seq_lens : torch .Tensor = None
138138
139- query_lens : torch .Tensor
139+ query_lens : torch .Tensor = None
140140 # Maximum query length in the batch (None for decoding).
141141 max_query_len : Optional [int ] = None
142- query_start_loc : torch .Tensor
142+ query_start_loc : torch .Tensor = None
143143
144144 # TODO: Indicates whether there are only prefill requests.
145145 # FlashAttention can be used if there are only prefill requests (decode
@@ -150,7 +150,7 @@ class AscendMetadata:
150150 # ********************** KV Cache Related Properties ***********************
151151 # Block addresses per sequence (Seq id -> list of physical block).
152152 # (batch_size, max_blocks_per_seq)
153- block_tables : torch .Tensor
153+ block_tables : torch .Tensor = None
154154
155155 # The indices of the token slots that input tokens will be stored into.
156156 # E.g., if `slot_mapping` is [35, 2, 17] and the block size is 16, the
0 commit comments