Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions csrc/attention/mla/cutlass_mla_entry.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,22 @@ void cutlass_mla_decode_sm100a(torch::Tensor const& out,
torch::Tensor const& kv_c_and_k_pe_cache,
torch::Tensor const& seq_lens,
torch::Tensor const& page_table, double scale);
#else
// fallback stubs
void sm100_cutlass_mla_decode(
torch::Tensor const& out, torch::Tensor const& q_nope,
torch::Tensor const& q_pe, torch::Tensor const& kv_c_and_k_pe_cache,
torch::Tensor const& seq_lens, torch::Tensor const& page_table,
torch::Tensor const& workspace, double sm_scale, int64_t num_kv_splits) {
TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled cutlass MLA");
}

int64_t sm100_cutlass_mla_get_workspace_size(int64_t max_seq_len,
int64_t num_batches,
int64_t sm_count,
int64_t num_kv_splits) {
TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled cutlass MLA");
}
#endif

void cutlass_mla_decode(torch::Tensor const& out, torch::Tensor const& q_nope,
Expand Down