From 248ec9d854b25ab714543447ac4ff62795d4566e Mon Sep 17 00:00:00 2001 From: Zsolt Borbely Date: Wed, 18 Jun 2025 13:28:57 +0200 Subject: [PATCH] Prevent surplus tensor reshape Signed-off-by: Zsolt Borbely --- vllm/v1/attention/backends/triton_attn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/attention/backends/triton_attn.py b/vllm/v1/attention/backends/triton_attn.py index ecb92bb1e416..4c5a1a755c1a 100644 --- a/vllm/v1/attention/backends/triton_attn.py +++ b/vllm/v1/attention/backends/triton_attn.py @@ -376,7 +376,7 @@ def forward( query.reshape( (num_tokens, num_heads * head_size)).contiguous(), layer._q_scale) - query = query.reshape((num_tokens, num_heads, head_size)) + query = query.reshape((num_tokens, num_heads, head_size)) use_local_attn = \ (self.use_irope and attn_metadata.local_attn_metadata is not None)