Skip to content

Commit

Permalink
perplexity.cpp : better way to deal wirh spm prepending space
Browse files Browse the repository at this point in the history
  • Loading branch information
klosax authored Aug 26, 2023
1 parent d9a9b09 commit 724fa67
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions examples/perplexity/perplexity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
hs_data[i].context = prompt_lines[idx*6];
hs_data[i].gold_ending_idx = std::stoi( prompt_lines[idx*6+1] );
for (size_t j=0; j < 4; j++) {
hs_data[i].ending[j] = " " + prompt_lines[idx*6+2+j];
hs_data[i].ending[j] = prompt_lines[idx*6+2+j];
}

// Delete the selected random example from the prompt
Expand All @@ -415,7 +415,7 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {

// Do the 1st ending
// In this case we include the context when evaluating
auto query_embd = ::llama_tokenize(ctx, hs_data[task_idx].context + hs_data[task_idx].ending[0], add_bos);
auto query_embd = ::llama_tokenize(ctx, hs_data[task_idx].context + " " + hs_data[task_idx].ending[0], add_bos);
auto query_size = query_embd.size();
//printf("First query: %d\n",(int)query_size);

Expand Down Expand Up @@ -462,11 +462,11 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
for (size_t ending_idx = 1; ending_idx < 4; ending_idx++) {

// Tokenize the query
// SPM tokenizer: Do not tokenize the starting space in the ending since it is always added by the tokenizer
// SPM tokenizer: Do not prepend a space since the tokenizer always do that
if (is_spm) {
query_embd = ::llama_tokenize(ctx, hs_data[task_idx].ending[ending_idx].substr(1,hs_data[task_idx].ending[ending_idx].size()-1), false);
} else {
query_embd = ::llama_tokenize(ctx, hs_data[task_idx].ending[ending_idx], false);
} else {
query_embd = ::llama_tokenize(ctx, " " + hs_data[task_idx].ending[ending_idx], false);
}

query_size = query_embd.size();
Expand Down

0 comments on commit 724fa67

Please sign in to comment.