From 2886f7f08ac2cd9c0255e3b66d4ee95884b96087 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Tue, 13 Sep 2022 14:04:14 +0200 Subject: [PATCH] Fix tokenizer for XLMRobertaXL (#19004) Co-authored-by: ydshieh --- src/transformers/models/auto/tokenization_auto.py | 8 +++++++- .../models/xlm_roberta_xl/modeling_xlm_roberta_xl.py | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/auto/tokenization_auto.py b/src/transformers/models/auto/tokenization_auto.py index 86cc4eba94f3da..7aa7627cab2cb5 100644 --- a/src/transformers/models/auto/tokenization_auto.py +++ b/src/transformers/models/auto/tokenization_auto.py @@ -271,7 +271,13 @@ "XLMRobertaTokenizerFast" if is_tokenizers_available() else None, ), ), - ("xlm-roberta-xl", ("RobertaTokenizer", "RobertaTokenizerFast" if is_tokenizers_available() else None)), + ( + "xlm-roberta-xl", + ( + "XLMRobertaTokenizer" if is_sentencepiece_available() else None, + "XLMRobertaTokenizerFast" if is_tokenizers_available() else None, + ), + ), ( "xlnet", ( diff --git a/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py b/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py index ca1c35bf650586..e0634d9a6ae668 100644 --- a/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +++ b/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py @@ -54,7 +54,7 @@ _CHECKPOINT_FOR_DOC = "xlm-roberta-xlarge" _CONFIG_FOR_DOC = "XLMRobertaXLConfig" -_TOKENIZER_FOR_DOC = "RobertaTokenizer" +_TOKENIZER_FOR_DOC = "XLMRobertaTokenizer" XLM_ROBERTA_XL_PRETRAINED_MODEL_ARCHIVE_LIST = [ "facebook/xlm-roberta-xl",