From 954b5ed23fd1cab49ed7ec165a6eb465cf2619b2 Mon Sep 17 00:00:00 2001 From: Mickael Seznec Date: Fri, 18 Jul 2025 16:26:58 +0000 Subject: [PATCH 1/4] fix: add stubs to allow compilation without sm100 Signed-off-by: Mickael Seznec --- csrc/attention/mla/cutlass_mla_entry.cu | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/csrc/attention/mla/cutlass_mla_entry.cu b/csrc/attention/mla/cutlass_mla_entry.cu index 0319d1daf302..c439a0b3a201 100644 --- a/csrc/attention/mla/cutlass_mla_entry.cu +++ b/csrc/attention/mla/cutlass_mla_entry.cu @@ -23,6 +23,23 @@ void cutlass_mla_decode_sm100a(torch::Tensor const& out, torch::Tensor const& kv_c_and_k_pe_cache, torch::Tensor const& seq_lens, torch::Tensor const& page_table, double scale); +#else +// define fallback stubs +void cutlass_mla_decode_sm100a(torch::Tensor const& out, + torch::Tensor const& q_nope, + torch::Tensor const& q_pe, + torch::Tensor const& kv_c_and_k_pe_cache, + torch::Tensor const& seq_lens, + torch::Tensor const& page_table, double scale) { + TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled cutlass MLA"); +} + +int64_t sm100_cutlass_mla_get_workspace_size(int64_t max_seq_len, + int64_t num_batches, + int64_t sm_count, + int64_t num_kv_splits) { + TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled cutlass MLA"); +} #endif void cutlass_mla_decode(torch::Tensor const& out, torch::Tensor const& q_nope, From 79573f593c47fc04be3ffc8c229b4741be6fc2fe Mon Sep 17 00:00:00 2001 From: Mickael Seznec Date: Fri, 18 Jul 2025 17:01:12 +0000 Subject: [PATCH 2/4] fix: oopsie Signed-off-by: Mickael Seznec --- csrc/attention/mla/cutlass_mla_entry.cu | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/csrc/attention/mla/cutlass_mla_entry.cu b/csrc/attention/mla/cutlass_mla_entry.cu index c439a0b3a201..051109387c49 100644 --- a/csrc/attention/mla/cutlass_mla_entry.cu +++ b/csrc/attention/mla/cutlass_mla_entry.cu @@ -24,20 +24,21 @@ void cutlass_mla_decode_sm100a(torch::Tensor const& out, torch::Tensor const& seq_lens, torch::Tensor const& page_table, double scale); #else -// define fallback stubs -void cutlass_mla_decode_sm100a(torch::Tensor const& out, - torch::Tensor const& q_nope, - torch::Tensor const& q_pe, - torch::Tensor const& kv_c_and_k_pe_cache, - torch::Tensor const& seq_lens, - torch::Tensor const& page_table, double scale) { +// fallback stubs +void sm100_cutlass_mla_decode( + torch::Tensor const& out, torch::Tensor const& q_nope, + torch::Tensor const& q_pe, torch::Tensor const& kv_c_and_k_pe_cache, + torch::Tensor const& seq_lens, torch::Tensor const& page_table, + torch::Tensor const& workspace, double sm_scale, + int64_t num_kv_splits = + 1 /* Set to 1 to avoid cuda_graph issue by default. */) { TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled cutlass MLA"); } -int64_t sm100_cutlass_mla_get_workspace_size(int64_t max_seq_len, - int64_t num_batches, - int64_t sm_count, - int64_t num_kv_splits) { +int64_t sm100_cutlass_mla_get_workspace_size( + int64_t max_seq_len, int64_t num_batches, int64_t sm_count = 0, + int64_t num_kv_splits = + 1 /* Set to 1 to avoid cuda_graph issue by default. */) { TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled cutlass MLA"); } #endif From 8635659a09154bd044b6354fa0342e5ba5c71ba2 Mon Sep 17 00:00:00 2001 From: Mickael Seznec Date: Fri, 18 Jul 2025 17:02:16 +0000 Subject: [PATCH 3/4] fix: style Signed-off-by: Mickael Seznec --- csrc/attention/mla/cutlass_mla_entry.cu | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/csrc/attention/mla/cutlass_mla_entry.cu b/csrc/attention/mla/cutlass_mla_entry.cu index 051109387c49..caadbc2e47c9 100644 --- a/csrc/attention/mla/cutlass_mla_entry.cu +++ b/csrc/attention/mla/cutlass_mla_entry.cu @@ -29,16 +29,14 @@ void sm100_cutlass_mla_decode( torch::Tensor const& out, torch::Tensor const& q_nope, torch::Tensor const& q_pe, torch::Tensor const& kv_c_and_k_pe_cache, torch::Tensor const& seq_lens, torch::Tensor const& page_table, - torch::Tensor const& workspace, double sm_scale, - int64_t num_kv_splits = - 1 /* Set to 1 to avoid cuda_graph issue by default. */) { + torch::Tensor const& workspace, double sm_scale, int64_t num_kv_splits) { TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled cutlass MLA"); } -int64_t sm100_cutlass_mla_get_workspace_size( - int64_t max_seq_len, int64_t num_batches, int64_t sm_count = 0, - int64_t num_kv_splits = - 1 /* Set to 1 to avoid cuda_graph issue by default. */) { +int64_t sm100_cutlass_mla_get_workspace_size(int64_t max_seq_len, + int64_t num_batches, + int64_t sm_count = 0, + int64_t num_kv_splits) { TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled cutlass MLA"); } #endif From f2fab0b19f149d29b93dab043c4afb020d557942 Mon Sep 17 00:00:00 2001 From: Mickael Seznec Date: Tue, 22 Jul 2025 16:31:32 +0000 Subject: [PATCH 4/4] fix: typo Signed-off-by: Mickael Seznec --- csrc/attention/mla/cutlass_mla_entry.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/attention/mla/cutlass_mla_entry.cu b/csrc/attention/mla/cutlass_mla_entry.cu index caadbc2e47c9..2e5b3638600f 100644 --- a/csrc/attention/mla/cutlass_mla_entry.cu +++ b/csrc/attention/mla/cutlass_mla_entry.cu @@ -35,7 +35,7 @@ void sm100_cutlass_mla_decode( int64_t sm100_cutlass_mla_get_workspace_size(int64_t max_seq_len, int64_t num_batches, - int64_t sm_count = 0, + int64_t sm_count, int64_t num_kv_splits) { TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled cutlass MLA"); }