From 3f25baa7aa17e704d215ecbf90e1b28966d89a2f Mon Sep 17 00:00:00 2001 From: Jeremy Fowers Date: Wed, 17 Jan 2024 15:31:24 -0500 Subject: [PATCH 1/2] Fix the documentation checkpoint for xlm-roberta-xl --- .../models/xlm_roberta_xl/modeling_xlm_roberta_xl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py b/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py index 582f3733d6e837..d4f8a29b15e959 100644 --- a/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +++ b/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py @@ -47,7 +47,7 @@ logger = logging.get_logger(__name__) -_CHECKPOINT_FOR_DOC = "xlm-roberta-xlarge" +_CHECKPOINT_FOR_DOC = "facebook/xlm-roberta-xl" _CONFIG_FOR_DOC = "XLMRobertaXLConfig" XLM_ROBERTA_XL_PRETRAINED_MODEL_ARCHIVE_LIST = [ From 7b64da0cb7314174b99d625e9f17b8df9df7e5be Mon Sep 17 00:00:00 2001 From: Jeremy Fowers Date: Wed, 17 Jan 2024 15:49:47 -0500 Subject: [PATCH 2/2] Improve docstring consistency --- .../xlm_roberta_xl/modeling_xlm_roberta_xl.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py b/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py index d4f8a29b15e959..48bb28bf4ee2c6 100644 --- a/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +++ b/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py @@ -653,7 +653,7 @@ def _init_weights(self, module): @add_start_docstrings( - "The bare XLM-RoBERTa-xlarge Model transformer outputting raw hidden-states without any specific head on top.", + "The bare XLM-RoBERTa-XL Model transformer outputting raw hidden-states without any specific head on top.", XLM_ROBERTA_XL_START_DOCSTRING, ) class XLMRobertaXLModel(XLMRobertaXLPreTrainedModel): @@ -833,7 +833,7 @@ def forward( @add_start_docstrings( - """XLM-RoBERTa-xlarge Model with a `language modeling` head on top for CLM fine-tuning.""", + """XLM-RoBERTa-XL Model with a `language modeling` head on top for CLM fine-tuning.""", XLM_ROBERTA_XL_START_DOCSTRING, ) class XLMRobertaXLForCausalLM(XLMRobertaXLPreTrainedModel): @@ -990,7 +990,7 @@ def _reorder_cache(self, past_key_values, beam_idx): @add_start_docstrings( - """XLM-RoBERTa-xlarge Model with a `language modeling` head on top.""", XLM_ROBERTA_XL_START_DOCSTRING + """XLM-RoBERTa-XL Model with a `language modeling` head on top.""", XLM_ROBERTA_XL_START_DOCSTRING ) class XLMRobertaXLForMaskedLM(XLMRobertaXLPreTrainedModel): _tied_weights_keys = ["lm_head.decoder.weight", "lm_head.decoder.bias"] @@ -1081,7 +1081,7 @@ def forward( class XLMRobertaXLLMHead(nn.Module): - """XLM-Roberta-xlarge Head for masked language modeling.""" + """XLM-RoBERTa-XL Head for masked language modeling.""" def __init__(self, config): super().__init__() @@ -1109,7 +1109,7 @@ def _tie_weights(self): @add_start_docstrings( """ - XLM-RoBERTa-xlarge Model transformer with a sequence classification/regression head on top (a linear layer on top + XLM-RoBERTa-XL Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for GLUE tasks. """, XLM_ROBERTA_XL_START_DOCSTRING, @@ -1203,7 +1203,7 @@ def forward( @add_start_docstrings( """ - XLM-Roberta-xlarge Model with a multiple choice classification head on top (a linear layer on top of the pooled + XLM-RoBERTa-XL Model with a multiple choice classification head on top (a linear layer on top of the pooled output and a softmax) e.g. for RocStories/SWAG tasks. """, XLM_ROBERTA_XL_START_DOCSTRING, @@ -1294,7 +1294,7 @@ def forward( @add_start_docstrings( """ - XLM-Roberta-xlarge Model with a token classification head on top (a linear layer on top of the hidden-states + XLM-RoBERTa-XL Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. """, XLM_ROBERTA_XL_START_DOCSTRING, @@ -1405,7 +1405,7 @@ def forward(self, features, **kwargs): @add_start_docstrings( """ - XLM-Roberta-xlarge Model with a span classification head on top for extractive question-answering tasks like SQuAD + XLM-RoBERTa-XL Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layers on top of the hidden-states output to compute `span start logits` and `span end logits`). """, XLM_ROBERTA_XL_START_DOCSTRING,