From 095fd649ad1aba7df30cf371e9abada63cc34893 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Tue, 30 Sep 2025 22:20:46 +0800 Subject: [PATCH 1/2] fix flashmla kernel selection Signed-off-by: youkaichao --- vllm/attention/ops/flashmla.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/attention/ops/flashmla.py b/vllm/attention/ops/flashmla.py index 3cc0e4adfa0a..22bcf85f462e 100644 --- a/vllm/attention/ops/flashmla.py +++ b/vllm/attention/ops/flashmla.py @@ -136,7 +136,7 @@ def flash_mla_with_kvcache( descale_k is None ), "descale_q and descale_k should be both None or both not None" - if (descale_q is not None) and (descale_k is not None): + if indices is None: out, softmax_lse = torch.ops._flashmla_extension_C.fwd_kvcache_mla_fp8( q, k_cache, head_dim_v, cache_seqlens, block_table, softmax_scale, causal, tile_scheduler_metadata, num_splits, descale_q, descale_k) From cc1839f52647a068164775c712358016a3e9f0a6 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Tue, 30 Sep 2025 22:35:44 +0800 Subject: [PATCH 2/2] fix flashmla kernel selection Signed-off-by: youkaichao --- vllm/attention/ops/flashmla.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/attention/ops/flashmla.py b/vllm/attention/ops/flashmla.py index 22bcf85f462e..9654f9f6775a 100644 --- a/vllm/attention/ops/flashmla.py +++ b/vllm/attention/ops/flashmla.py @@ -136,7 +136,7 @@ def flash_mla_with_kvcache( descale_k is None ), "descale_q and descale_k should be both None or both not None" - if indices is None: + if indices is None and q.element_size() == 1: out, softmax_lse = torch.ops._flashmla_extension_C.fwd_kvcache_mla_fp8( q, k_cache, head_dim_v, cache_seqlens, block_table, softmax_scale, causal, tile_scheduler_metadata, num_splits, descale_q, descale_k)