add default value

shen-shanshan · shen-shanshan · commit 12c97dd528a6 · 2025-07-23T09:05:36.000Z
Signed-off-by: shen-shanshan &lt;467638484@qq.com&gt;
diff --git a/vllm_ascend/attention/attention_v1.py b/vllm_ascend/attention/attention_v1.py
@@ -129,17 +129,17 @@ class AscendMetadata:
     # Number of tokens including padding (for logging).
     num_input_tokens: int = 0
     # Number of tokens excluding padding.
-    num_actual_tokens: int
+    num_actual_tokens: int = 0
 
     # The sequence length per sequence. Sequence length means the computed
     # tokens + new tokens (is None if it is a decoding).
     # (batch_size,)
-    seq_lens: torch.Tensor
+    seq_lens: torch.Tensor = None
 
-    query_lens: torch.Tensor
+    query_lens: torch.Tensor = None
     # Maximum query length in the batch (None for decoding).
     max_query_len: Optional[int] = None
-    query_start_loc: torch.Tensor
+    query_start_loc: torch.Tensor = None
 
     # TODO: Indicates whether there are only prefill requests.
     # FlashAttention can be used if there are only prefill requests (decode
@@ -150,7 +150,7 @@ class AscendMetadata:
     # ********************** KV Cache Related Properties ***********************
     # Block addresses per sequence (Seq id -> list of physical block).
     # (batch_size, max_blocks_per_seq)
-    block_tables: torch.Tensor
+    block_tables: torch.Tensor = None
 
     # The indices of the token slots that input tokens will be stored into.
     # E.g., if `slot_mapping` is [35, 2, 17] and the block size is 16, the