From 9dae94aa4a0e839ba98011a8879176048534760c Mon Sep 17 00:00:00 2001 From: zhangdepeng Date: Fri, 29 Aug 2025 19:14:32 +0800 Subject: [PATCH] remove aicpu op Signed-off-by: zhangdepeng --- vllm_ascend/torchair/torchair_attention.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm_ascend/torchair/torchair_attention.py b/vllm_ascend/torchair/torchair_attention.py index da754e544e..81f2968a8e 100644 --- a/vllm_ascend/torchair/torchair_attention.py +++ b/vllm_ascend/torchair/torchair_attention.py @@ -304,6 +304,7 @@ def __init__( self.num_queries_per_kv = self.num_heads // self.num_kv_heads self.key_cache = None self.value_cache = None + self.scale_tensor = torch.zeros((), device='npu', dtype=torch.int32) def forward( self, @@ -366,7 +367,7 @@ def forward( key_cache, value_cache = kv_cache[0], kv_cache[1] slots = attn_metadata.slot_mapping - block_size = key_cache.shape[1] + block_size = self.scale_tensor + key_cache.shape[1] slots_indices = slots.reshape(-1, 1) block_indices = slots_indices // block_size slots_indices = slots_indices % block_size