Skip to content

Commit 4b2d2b9

Browse files
committed
Use smem for final write
1 parent 240b2c1 commit 4b2d2b9

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml/src/ggml-cuda/topk-moe.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ __launch_bounds__(4 * WARP_SIZE, 1) __global__ void topk_moe_cuda(const float *
7474
float wt_sum = 0.f;
7575

7676
extern __shared__ float data_topk_shared[];
77-
float * wt_shared_ptr = data_topk_shared + row * n_expert_used;
77+
float * wt_shared_ptr = data_topk_shared + threadIdx.y * n_expert_used;
7878

7979
for (int k = 0; k < n_expert_used; k++) {
8080
float max_val = wt[0];

0 commit comments

Comments
 (0)