Skip to content

Commit 12c97dd

Browse files
committed
add default value
Signed-off-by: shen-shanshan <467638484@qq.com>
1 parent fe239bf commit 12c97dd

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

vllm_ascend/attention/attention_v1.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -129,17 +129,17 @@ class AscendMetadata:
129129
# Number of tokens including padding (for logging).
130130
num_input_tokens: int = 0
131131
# Number of tokens excluding padding.
132-
num_actual_tokens: int
132+
num_actual_tokens: int = 0
133133

134134
# The sequence length per sequence. Sequence length means the computed
135135
# tokens + new tokens (is None if it is a decoding).
136136
# (batch_size,)
137-
seq_lens: torch.Tensor
137+
seq_lens: torch.Tensor = None
138138

139-
query_lens: torch.Tensor
139+
query_lens: torch.Tensor = None
140140
# Maximum query length in the batch (None for decoding).
141141
max_query_len: Optional[int] = None
142-
query_start_loc: torch.Tensor
142+
query_start_loc: torch.Tensor = None
143143

144144
# TODO: Indicates whether there are only prefill requests.
145145
# FlashAttention can be used if there are only prefill requests (decode
@@ -150,7 +150,7 @@ class AscendMetadata:
150150
# ********************** KV Cache Related Properties ***********************
151151
# Block addresses per sequence (Seq id -> list of physical block).
152152
# (batch_size, max_blocks_per_seq)
153-
block_tables: torch.Tensor
153+
block_tables: torch.Tensor = None
154154

155155
# The indices of the token slots that input tokens will be stored into.
156156
# E.g., if `slot_mapping` is [35, 2, 17] and the block size is 16, the

0 commit comments

Comments
 (0)