From 58022e41b83d9b14e5adcdb6d71426aff92cc793 Mon Sep 17 00:00:00 2001 From: Vijeth Moudgalya <33093576+vijethmoudgalya@users.noreply.github.com> Date: Tue, 30 May 2023 20:23:40 +0530 Subject: [PATCH] #23388 Issue: Update RoBERTa configuration (#23863) --- src/transformers/models/roberta/configuration_roberta.py | 4 ++-- .../configuration_roberta_prelayernorm.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/transformers/models/roberta/configuration_roberta.py b/src/transformers/models/roberta/configuration_roberta.py index 3025fe2833d..f82033f4588 100644 --- a/src/transformers/models/roberta/configuration_roberta.py +++ b/src/transformers/models/roberta/configuration_roberta.py @@ -46,7 +46,7 @@ class RobertaConfig(PretrainedConfig): Args: - vocab_size (`int`, *optional*, defaults to 30522): + vocab_size (`int`, *optional*, defaults to 50265): Vocabulary size of the RoBERTa model. Defines the number of different tokens that can be represented by the `inputs_ids` passed when calling [`RobertaModel`] or [`TFRobertaModel`]. hidden_size (`int`, *optional*, defaults to 768): @@ -105,7 +105,7 @@ class RobertaConfig(PretrainedConfig): def __init__( self, - vocab_size=30522, + vocab_size=50265, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, diff --git a/src/transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py b/src/transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py index 49f92586c1b..fca6763f274 100644 --- a/src/transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py +++ b/src/transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py @@ -45,7 +45,7 @@ class RobertaPreLayerNormConfig(PretrainedConfig): Args: - vocab_size (`int`, *optional*, defaults to 30522): + vocab_size (`int`, *optional*, defaults to 50265): Vocabulary size of the RoBERTa-PreLayerNorm model. Defines the number of different tokens that can be represented by the `inputs_ids` passed when calling [`RobertaPreLayerNormModel`] or [`TFRobertaPreLayerNormModel`]. @@ -106,7 +106,7 @@ class RobertaPreLayerNormConfig(PretrainedConfig): def __init__( self, - vocab_size=30522, + vocab_size=50265, hidden_size=768, num_hidden_layers=12, num_attention_heads=12,