From 954b5ed23fd1cab49ed7ec165a6eb465cf2619b2 Mon Sep 17 00:00:00 2001
From: Mickael Seznec <mickael@mistral.ai>
Date: Fri, 18 Jul 2025 16:26:58 +0000
Subject: [PATCH 1/4] fix: add stubs to allow compilation without sm100

Signed-off-by: Mickael Seznec <mickael@mistral.ai>
---
 csrc/attention/mla/cutlass_mla_entry.cu | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/csrc/attention/mla/cutlass_mla_entry.cu b/csrc/attention/mla/cutlass_mla_entry.cu
index 0319d1daf302..c439a0b3a201 100644
--- a/csrc/attention/mla/cutlass_mla_entry.cu
+++ b/csrc/attention/mla/cutlass_mla_entry.cu
@@ -23,6 +23,23 @@ void cutlass_mla_decode_sm100a(torch::Tensor const& out,
                                torch::Tensor const& kv_c_and_k_pe_cache,
                                torch::Tensor const& seq_lens,
                                torch::Tensor const& page_table, double scale);
+#else
+// define fallback stubs
+void cutlass_mla_decode_sm100a(torch::Tensor const& out,
+                               torch::Tensor const& q_nope,
+                               torch::Tensor const& q_pe,
+                               torch::Tensor const& kv_c_and_k_pe_cache,
+                               torch::Tensor const& seq_lens,
+                               torch::Tensor const& page_table, double scale) {
+  TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled cutlass MLA");
+}
+
+int64_t sm100_cutlass_mla_get_workspace_size(int64_t max_seq_len,
+                                             int64_t num_batches,
+                                             int64_t sm_count,
+                                             int64_t num_kv_splits) {
+  TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled cutlass MLA");
+}
 #endif
 
 void cutlass_mla_decode(torch::Tensor const& out, torch::Tensor const& q_nope,

From 79573f593c47fc04be3ffc8c229b4741be6fc2fe Mon Sep 17 00:00:00 2001
From: Mickael Seznec <mickael@mistral.ai>
Date: Fri, 18 Jul 2025 17:01:12 +0000
Subject: [PATCH 2/4] fix: oopsie

Signed-off-by: Mickael Seznec <mickael@mistral.ai>
---
 csrc/attention/mla/cutlass_mla_entry.cu | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/csrc/attention/mla/cutlass_mla_entry.cu b/csrc/attention/mla/cutlass_mla_entry.cu
index c439a0b3a201..051109387c49 100644
--- a/csrc/attention/mla/cutlass_mla_entry.cu
+++ b/csrc/attention/mla/cutlass_mla_entry.cu
@@ -24,20 +24,21 @@ void cutlass_mla_decode_sm100a(torch::Tensor const& out,
                                torch::Tensor const& seq_lens,
                                torch::Tensor const& page_table, double scale);
 #else
-// define fallback stubs
-void cutlass_mla_decode_sm100a(torch::Tensor const& out,
-                               torch::Tensor const& q_nope,
-                               torch::Tensor const& q_pe,
-                               torch::Tensor const& kv_c_and_k_pe_cache,
-                               torch::Tensor const& seq_lens,
-                               torch::Tensor const& page_table, double scale) {
+// fallback stubs
+void sm100_cutlass_mla_decode(
+    torch::Tensor const& out, torch::Tensor const& q_nope,
+    torch::Tensor const& q_pe, torch::Tensor const& kv_c_and_k_pe_cache,
+    torch::Tensor const& seq_lens, torch::Tensor const& page_table,
+    torch::Tensor const& workspace, double sm_scale,
+    int64_t num_kv_splits =
+        1 /* Set to 1 to avoid cuda_graph issue by default. */) {
   TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled cutlass MLA");
 }
 
-int64_t sm100_cutlass_mla_get_workspace_size(int64_t max_seq_len,
-                                             int64_t num_batches,
-                                             int64_t sm_count,
-                                             int64_t num_kv_splits) {
+int64_t sm100_cutlass_mla_get_workspace_size(
+    int64_t max_seq_len, int64_t num_batches, int64_t sm_count = 0,
+    int64_t num_kv_splits =
+        1 /* Set to 1 to avoid cuda_graph issue by default. */) {
   TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled cutlass MLA");
 }
 #endif

From 8635659a09154bd044b6354fa0342e5ba5c71ba2 Mon Sep 17 00:00:00 2001
From: Mickael Seznec <mickael@mistral.ai>
Date: Fri, 18 Jul 2025 17:02:16 +0000
Subject: [PATCH 3/4] fix: style

Signed-off-by: Mickael Seznec <mickael@mistral.ai>
---
 csrc/attention/mla/cutlass_mla_entry.cu | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/csrc/attention/mla/cutlass_mla_entry.cu b/csrc/attention/mla/cutlass_mla_entry.cu
index 051109387c49..caadbc2e47c9 100644
--- a/csrc/attention/mla/cutlass_mla_entry.cu
+++ b/csrc/attention/mla/cutlass_mla_entry.cu
@@ -29,16 +29,14 @@ void sm100_cutlass_mla_decode(
     torch::Tensor const& out, torch::Tensor const& q_nope,
     torch::Tensor const& q_pe, torch::Tensor const& kv_c_and_k_pe_cache,
     torch::Tensor const& seq_lens, torch::Tensor const& page_table,
-    torch::Tensor const& workspace, double sm_scale,
-    int64_t num_kv_splits =
-        1 /* Set to 1 to avoid cuda_graph issue by default. */) {
+    torch::Tensor const& workspace, double sm_scale, int64_t num_kv_splits) {
   TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled cutlass MLA");
 }
 
-int64_t sm100_cutlass_mla_get_workspace_size(
-    int64_t max_seq_len, int64_t num_batches, int64_t sm_count = 0,
-    int64_t num_kv_splits =
-        1 /* Set to 1 to avoid cuda_graph issue by default. */) {
+int64_t sm100_cutlass_mla_get_workspace_size(int64_t max_seq_len,
+                                             int64_t num_batches,
+                                             int64_t sm_count = 0,
+                                             int64_t num_kv_splits) {
   TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled cutlass MLA");
 }
 #endif

From f2fab0b19f149d29b93dab043c4afb020d557942 Mon Sep 17 00:00:00 2001
From: Mickael Seznec <mickael@mistral.ai>
Date: Tue, 22 Jul 2025 16:31:32 +0000
Subject: [PATCH 4/4] fix: typo

Signed-off-by: Mickael Seznec <mickael@mistral.ai>
---
 csrc/attention/mla/cutlass_mla_entry.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/csrc/attention/mla/cutlass_mla_entry.cu b/csrc/attention/mla/cutlass_mla_entry.cu
index caadbc2e47c9..2e5b3638600f 100644
--- a/csrc/attention/mla/cutlass_mla_entry.cu
+++ b/csrc/attention/mla/cutlass_mla_entry.cu
@@ -35,7 +35,7 @@ void sm100_cutlass_mla_decode(
 
 int64_t sm100_cutlass_mla_get_workspace_size(int64_t max_seq_len,
                                              int64_t num_batches,
-                                             int64_t sm_count = 0,
+                                             int64_t sm_count,
                                              int64_t num_kv_splits) {
   TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled cutlass MLA");
 }