diff --git a/vllm_ascend/worker/mtp_proposer_v1.py b/vllm_ascend/worker/mtp_proposer_v1.py index ee8d7c5e30..0efde3bce0 100644 --- a/vllm_ascend/worker/mtp_proposer_v1.py +++ b/vllm_ascend/worker/mtp_proposer_v1.py @@ -218,7 +218,7 @@ def propose( self.hidden_states[:num_tokens] = target_hidden_states if attn_metadata.prefill is not None: - attn_metadata.prefill.query_lens = query_lens + attn_metadata.prefill.query_lens = query_lens.cpu() attn_metadata.prefill.input_positions = target_positions if not self.runner.torchair_graph_enabled: