Skip to content

Commit 0ec69fb

Browse files
committed
add assert that max_clamp == inf
1 parent c5acf1b commit 0ec69fb

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

ggml/src/ggml-cuda/topk-moe.cu

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "ggml.h"
33
#include "topk-moe.cuh"
44

5+
#include <cmath>
56
#include <initializer_list>
67

78
// Warp-local softmax used for both the pre-top-k logits and the post-top-k delayed path.
@@ -253,6 +254,8 @@ void ggml_cuda_op_topk_moe(ggml_backend_cuda_context & ctx,
253254
if (with_norm) {
254255
if (clamp) {
255256
clamp_val = ggml_get_op_params_f32(clamp, 0);
257+
float max_val = ggml_get_op_params_f32(clamp, 1);
258+
GGML_ASSERT(max_val == INFINITY);
256259
}
257260
launch_topk_moe_cuda<true>(ctx, logits_d, weights_d, ids_d, n_rows, n_experts, n_expert_used, clamp_val);
258261
} else {

0 commit comments

Comments
 (0)