We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 240b2c1 commit 4b2d2b9Copy full SHA for 4b2d2b9
ggml/src/ggml-cuda/topk-moe.cu
@@ -74,7 +74,7 @@ __launch_bounds__(4 * WARP_SIZE, 1) __global__ void topk_moe_cuda(const float *
74
float wt_sum = 0.f;
75
76
extern __shared__ float data_topk_shared[];
77
- float * wt_shared_ptr = data_topk_shared + row * n_expert_used;
+ float * wt_shared_ptr = data_topk_shared + threadIdx.y * n_expert_used;
78
79
for (int k = 0; k < n_expert_used; k++) {
80
float max_val = wt[0];
0 commit comments