diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 237802afccde..6483b72e4eb9 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -240,9 +240,7 @@ def forward( `vllm.forward_context.get_forward_context().attn_metadata`. """ if self.calculate_kv_scales: - attn_metadata = get_forward_context().attn_metadata - if attn_metadata.enable_kv_scales_calculation: - self.calc_kv_scales(query, key, value) + self.calc_kv_scales(query, key, value) if self.use_output: output_shape = (output_shape if output_shape is not None else query.shape)