Skip to content

Commit 7415f3f

Browse files
committed
llama : minor
ggml-ci
1 parent 4fc48b7 commit 7415f3f

File tree

1 file changed

+15
-5
lines changed

1 file changed

+15
-5
lines changed

src/llama-sampling.cpp

+15-5
Original file line numberDiff line numberDiff line change
@@ -1421,15 +1421,25 @@ static void llama_sampler_penalties_accept(struct llama_sampler * smpl, llama_to
14211421

14221422
// if the ring buffer is full, remove the oldest token
14231423
if (ctx->prev.size() >= (size_t) ctx->penalty_last_n) {
1424-
const auto pop = ctx->prev.front();
1424+
const auto old = ctx->prev.front();
14251425

1426-
ctx->token_count[pop]--;
1427-
if (ctx->token_count[pop] == 0) {
1428-
ctx->token_count.erase(pop);
1426+
ctx->token_count[old]--;
1427+
if (ctx->token_count[old] == 0) {
1428+
ctx->token_count.erase(old);
14291429
}
14301430
}
14311431

14321432
ctx->prev.push_back(token);
1433+
1434+
#if 0
1435+
// sanity check
1436+
std::unordered_map<llama_token, int> tmp;
1437+
for (int i = 0; i < std::min<int>(ctx->penalty_last_n, ctx->prev.size()); ++i) {
1438+
tmp[ctx->prev.rat(i)]++;
1439+
}
1440+
1441+
assert(ctx->token_count == tmp);
1442+
#endif
14331443
}
14341444

14351445
static void llama_sampler_penalties_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
@@ -1449,7 +1459,7 @@ static void llama_sampler_penalties_apply(struct llama_sampler * smpl, llama_tok
14491459

14501460
const int count = token_iter->second;
14511461

1452-
assert(count > 0);
1462+
assert(count > 0 && count <= ctx->penalty_last_n);
14531463

14541464
// The academic publication that described this technique actually just only divided, but that would cause tokens with negative logits to become more likely, which is obviously wrong.
14551465
// This is common fix for this problem, which is to multiply by the penalty instead of dividing.

0 commit comments

Comments
 (0)