yukavio
diff --git a/‎csrc/ops.h‎
Lines changed: 9 additions & 0 deletions b/‎csrc/ops.h‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎csrc/pybind.cpp‎
Lines changed: 1 addition & 0 deletions b/‎csrc/pybind.cpp‎
Lines changed: 1 addition & 0 deletions
@@ -80,6 +80,15 @@ torch::Tensor awq_dequantize(
     int split_k_iters,
     int thx,
     int thy);
+
+torch::Tensor marlin_gemm(
+  torch::Tensor &a,
+  torch::Tensor &b_q_weight,
+  torch::Tensor &b_scales,
+  torch::Tensor &workspace,
+  int64_t size_m,
+  int64_t size_n,
+  int64_t size_k);
 #endif
 
 void squeezellm_gemm(
 
@@ -52,6 +52,7 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 #ifndef USE_ROCM
   ops.def("awq_gemm", &awq_gemm, "Quantized GEMM for AWQ");
   ops.def("awq_dequantize", &awq_dequantize, "Dequantization for AWQ");
+  ops.def("marlin_gemm", &marlin_gemm, "Marlin Optimized Quantized GEMM for GPTQ");
 #endif
   ops.def("gptq_gemm", &gptq_gemm, "Quantized GEMM for GPTQ");
   ops.def("gptq_shuffle", &gptq_shuffle, "Post processing for GPTQ");