diff --git a/python/sglang/srt/layers/attention/flashinfer_backend.py b/python/sglang/srt/layers/attention/flashinfer_backend.py index f89bc2ccaa..536358fbc9 100644 --- a/python/sglang/srt/layers/attention/flashinfer_backend.py +++ b/python/sglang/srt/layers/attention/flashinfer_backend.py @@ -678,6 +678,7 @@ def call_begin_forward( self.num_qo_heads, self.num_kv_heads, self.head_dim, + q_data_type=self.q_data_type, ) # cached part @@ -691,6 +692,7 @@ def call_begin_forward( self.num_kv_heads, self.head_dim, 1, + q_data_type=self.q_data_type, )