Skip to content

Commit

Permalink
perplexity.cpp : fix hellaswag handling of prepended spaces
Browse files Browse the repository at this point in the history
  • Loading branch information
klosax authored Aug 26, 2023
1 parent e4324cb commit 6459cab
Showing 1 changed file with 15 additions and 1 deletion.
16 changes: 15 additions & 1 deletion examples/perplexity/perplexity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,13 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
for (size_t ending_idx = 1; ending_idx < 4; ending_idx++) {

// Tokenize the query
query_embd = ::llama_tokenize(ctx, hs_data[task_idx].ending[ending_idx], false);
// SPM tokenizer: Do not tokenize the starting space in the ending since it is always added by the tokenizer
if (is_spm) {
query_embd = ::llama_tokenize(ctx, hs_data[task_idx].ending[ending_idx].substr(1,hs_data[task_idx].ending[ending_idx].size()-1), false);
} else {
query_embd = ::llama_tokenize(ctx, hs_data[task_idx].ending[ending_idx], false);
}

query_size = query_embd.size();

// Stop if query wont fit the ctx window
Expand Down Expand Up @@ -505,6 +511,14 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
// task_idx,ending_idx,whole_size,context_size, hs_data[task_idx].ending_logprob_count[ending_idx], hs_data[task_idx].ending_logprob[ending_idx] );
}

// TODO: Temporary check for NaNs until Falcon MMQ is solved
for (size_t j = 0; j < 4; j++) {
if (std::isnan(hs_data[task_idx].ending_logprob[j])) {
printf("NAN in task, %zu ending %zu\n",task_idx, j);
return;
}
}

// Find the ending with maximum logprob
size_t ending_logprob_max_idx = 0;
double ending_logprob_max_val = hs_data[task_idx].ending_logprob[0];
Expand Down

0 comments on commit 6459cab

Please sign in to comment.