From 1f26a5150c5e48382bb2c0ee2adcb0b6cf2a1867 Mon Sep 17 00:00:00 2001 From: zhewenli Date: Sat, 26 Apr 2025 16:34:51 -0700 Subject: [PATCH] fix misssing parameter full_scales --- vllm/attention/ops/triton_flash_attention.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/attention/ops/triton_flash_attention.py b/vllm/attention/ops/triton_flash_attention.py index 745818eb6cff..b8fea060ab8f 100644 --- a/vllm/attention/ops/triton_flash_attention.py +++ b/vllm/attention/ops/triton_flash_attention.py @@ -711,6 +711,7 @@ def forward( causal=False, sm_scale=1.0, bias=None, + full_scales=None, ): if o is None: o = torch.empty_like(q, dtype=v.dtype)