From 3b480c3e1757296bb2c95a129fc01f3c6c8aaaf1 Mon Sep 17 00:00:00 2001 From: Lysandre Date: Tue, 27 Oct 2020 08:15:16 -0400 Subject: [PATCH] Tokenizer and config --- src/transformers/configuration_deberta.py | 13 +++++++++---- src/transformers/tokenization_deberta.py | 15 ++++++++------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/transformers/configuration_deberta.py b/src/transformers/configuration_deberta.py index 6098d4375ea451..7088091e918b8c 100644 --- a/src/transformers/configuration_deberta.py +++ b/src/transformers/configuration_deberta.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" DeBERTa model configuration """ +""" DeDeBERTaa model configuration """ from .configuration_utils import PretrainedConfig from .utils import logging @@ -20,7 +20,7 @@ logger = logging.get_logger(__name__) -DEBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP = { +DEDeBERTaA_PRETRAINED_CONFIG_ARCHIVE_MAP = { "microsoft/deberta-base": "https://s3.amazonaws.com/models.huggingface.co/bert/microsoft/deberta-base/config.json", "microsoft/deberta-large": "https://s3.amazonaws.com/models.huggingface.co/bert/microsoft/deberta-large/config.json", } @@ -28,8 +28,13 @@ class DebertaConfig(PretrainedConfig): r""" - :class:`~transformers.DebertaConfig` is the configuration class to store the configuration of a - :class:`~transformers.DebertaModel`. + This is the configuration class to store the configuration of a :class:`~transformers.DebertaModel` or a + :class:`~transformers.TFDebertaModel`. It is used to instantiate a DeBERTa model according to the specified + arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar + configuration to that of the DeBERTa `bert-base-uncased `__ architecture. + + Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model + outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information. Arguments: vocab_size (:obj:`int`, `optional`, defaults to 30522): diff --git a/src/transformers/tokenization_deberta.py b/src/transformers/tokenization_deberta.py index d27331552b87d2..e59b34ea2f90cf 100644 --- a/src/transformers/tokenization_deberta.py +++ b/src/transformers/tokenization_deberta.py @@ -581,7 +581,7 @@ def convert_tokens_to_string(self, tokens): def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None): """ Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and - adding special tokens. A BERT sequence has the following format: + adding special tokens. A DeBERTa sequence has the following format: - single sequence: [CLS] X [SEP] - pair of sequences: [CLS] A [SEP] B [SEP] @@ -608,14 +608,15 @@ def get_special_tokens_mask(self, token_ids_0, token_ids_1=None, already_has_spe special tokens using the tokenizer ``prepare_for_model`` or ``encode_plus`` methods. Args: - token_ids_0: list of ids (must not contain special tokens) - token_ids_1: Optional list of ids (must not contain special tokens), necessary when fetching sequence ids - for sequence pairs - already_has_special_tokens: (default False) Set to True if the token list is already formated with - special tokens for the model + token_ids_0 (:obj:`List[int]`): + List of IDs. + token_ids_1 (:obj:`List[int]`, `optional`): + Optional second list of IDs for sequence pairs. + already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not the token list is already formatted with special tokens for the model. Returns: - A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token. + :obj:`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token. """ if already_has_special_tokens: