From 1f26a5150c5e48382bb2c0ee2adcb0b6cf2a1867 Mon Sep 17 00:00:00 2001
From: zhewenli <zhewenli@meta.com>
Date: Sat, 26 Apr 2025 16:34:51 -0700
Subject: [PATCH] fix misssing parameter full_scales

---
 vllm/attention/ops/triton_flash_attention.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vllm/attention/ops/triton_flash_attention.py b/vllm/attention/ops/triton_flash_attention.py
index 745818eb6cff..b8fea060ab8f 100644
--- a/vllm/attention/ops/triton_flash_attention.py
+++ b/vllm/attention/ops/triton_flash_attention.py
@@ -711,6 +711,7 @@ def forward(
         causal=False,
         sm_scale=1.0,
         bias=None,
+        full_scales=None,
     ):
         if o is None:
             o = torch.empty_like(q, dtype=v.dtype)