From 2c1f02ba03fabcecbee2321e3fba954a3630121f Mon Sep 17 00:00:00 2001 From: wangzaijun Date: Wed, 25 Dec 2024 18:51:18 +0800 Subject: [PATCH] add A800 grouped moe kernel json configs. --- ...t16,topk_num=6,use_fp8_w8a8=false}_NVIDIA_A800-SXM4-80GB.json | 1 + ...t16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_A800-SXM4-80GB.json | 1 + 2 files changed, 2 insertions(+) create mode 100644 lightllm/common/all_kernel_configs/grouped_moe_gemm_kernel/{K=2048,N=1408,expert_num=64,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=6,use_fp8_w8a8=false}_NVIDIA_A800-SXM4-80GB.json create mode 100644 lightllm/common/all_kernel_configs/grouped_moe_gemm_kernel/{K=704,N=2048,expert_num=64,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_A800-SXM4-80GB.json diff --git a/lightllm/common/all_kernel_configs/grouped_moe_gemm_kernel/{K=2048,N=1408,expert_num=64,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=6,use_fp8_w8a8=false}_NVIDIA_A800-SXM4-80GB.json b/lightllm/common/all_kernel_configs/grouped_moe_gemm_kernel/{K=2048,N=1408,expert_num=64,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=6,use_fp8_w8a8=false}_NVIDIA_A800-SXM4-80GB.json new file mode 100644 index 000000000..4c3327805 --- /dev/null +++ b/lightllm/common/all_kernel_configs/grouped_moe_gemm_kernel/{K=2048,N=1408,expert_num=64,mul_routed_weight=false,out_dtype=torch.bfloat16,topk_num=6,use_fp8_w8a8=false}_NVIDIA_A800-SXM4-80GB.json @@ -0,0 +1 @@ +{"1": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "num_warps": 8, "num_stages": 3}, "8": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 8, "num_warps": 8, "num_stages": 3}, "64": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 4, "num_warps": 8, "num_stages": 3}, "128": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 4, "num_warps": 4, "num_stages": 3}, "256": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 4, "num_warps": 4, "num_stages": 3}, "512": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4}, "1024": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "num_warps": 4, "num_stages": 4}, "4096": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4}, "8192": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 2, "num_warps": 4, "num_stages": 3}} \ No newline at end of file diff --git a/lightllm/common/all_kernel_configs/grouped_moe_gemm_kernel/{K=704,N=2048,expert_num=64,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_A800-SXM4-80GB.json b/lightllm/common/all_kernel_configs/grouped_moe_gemm_kernel/{K=704,N=2048,expert_num=64,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_A800-SXM4-80GB.json new file mode 100644 index 000000000..e85b290bc --- /dev/null +++ b/lightllm/common/all_kernel_configs/grouped_moe_gemm_kernel/{K=704,N=2048,expert_num=64,mul_routed_weight=true,out_dtype=torch.bfloat16,topk_num=1,use_fp8_w8a8=false}_NVIDIA_A800-SXM4-80GB.json @@ -0,0 +1 @@ +{"1": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 5}, "8": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "num_warps": 4, "num_stages": 5}, "64": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 4, "num_warps": 4, "num_stages": 3}, "128": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 2, "num_warps": 4, "num_stages": 3}, "256": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 3}, "512": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 2, "num_warps": 4, "num_stages": 4}, "1024": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4}, "4096": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 4, "num_warps": 4, "num_stages": 4}, "8192": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 8, "num_warps": 4, "num_stages": 3}} \ No newline at end of file