diff --git a/bert_score/utils.py b/bert_score/utils.py index 745a4ea..1df6ad0 100644 --- a/bert_score/utils.py +++ b/bert_score/utils.py @@ -104,9 +104,9 @@ def sent_encode(tokenizer, sent): return tokenizer.build_inputs_with_special_tokens([]) elif isinstance(tokenizer, GPT2Tokenizer): # for RoBERTa and GPT-2 - return tokenizer.encode(sent, add_special_tokens=True, add_prefix_space=True, max_length=tokenizer.max_len) + return tokenizer.encode(sent, add_special_tokens=True, add_prefix_space=True, max_length=tokenizer.max_len, truncation=True) else: - return tokenizer.encode(sent, add_special_tokens=True, max_length=tokenizer.max_len) + return tokenizer.encode(sent, add_special_tokens=True, max_length=tokenizer.max_len, truncation=True) def get_model(model_type, num_layers, all_layers=None):