Skip to content

Commit c466324

Browse files
authored
[Bugfix][DP] Add with_prefill_across_dp to AscendMetadata to fix dp (#1094)
### What this PR does / why we need it? Add `with_prefill_across_dp` to AscendMetadata to fix dp This pr fixes the bug introduced by #1012, which add an arg `with_prefill_across_dp` when dp_size > 1. Signed-off-by: MengqingCao <cmq0113@163.com>
1 parent 0b12c2a commit c466324

File tree

1 file changed

+19
-11
lines changed

1 file changed

+19
-11
lines changed

vllm_ascend/attention/attention_v1.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ class AscendMetadata:
132132
# For logging.
133133
num_input_tokens: int = 0 # Number of tokens including padding.
134134

135+
with_prefill_across_dp: bool = False
136+
135137

136138
class AscendAttentionMetadataBuilder:
137139

@@ -142,8 +144,12 @@ def reorder_batch(self, input_batch: "InputBatch",
142144
scheduler_output: "SchedulerOutput") -> bool:
143145
return False
144146

145-
def build(self, num_reqs, num_actual_tokens, max_query_len,
146-
common_prefix_len):
147+
def build(self,
148+
num_reqs,
149+
num_actual_tokens,
150+
max_query_len,
151+
common_prefix_len,
152+
with_prefill_across_dp: bool = False):
147153

148154
block_table = self.runner.input_batch.block_table[0].get_device_tensor(
149155
)
@@ -160,15 +166,17 @@ def build(self, num_reqs, num_actual_tokens, max_query_len,
160166
query_start_loc = query_start_loc_cpu.to(self.runner.device,
161167
non_blocking=True)
162168

163-
attn_metadata = AscendMetadata(num_actual_tokens=num_actual_tokens,
164-
block_tables=block_table,
165-
query_start_loc=query_start_loc,
166-
query_lens=query_lens,
167-
seq_lens=seq_lens,
168-
max_query_len=max_query_len,
169-
slot_mapping=slot_mapping,
170-
attn_mask=attn_mask,
171-
attn_state=attn_state)
169+
attn_metadata = AscendMetadata(
170+
num_actual_tokens=num_actual_tokens,
171+
block_tables=block_table,
172+
query_start_loc=query_start_loc,
173+
query_lens=query_lens,
174+
seq_lens=seq_lens,
175+
max_query_len=max_query_len,
176+
slot_mapping=slot_mapping,
177+
attn_mask=attn_mask,
178+
attn_state=attn_state,
179+
with_prefill_across_dp=with_prefill_across_dp)
172180
return attn_metadata
173181

174182

0 commit comments

Comments
 (0)