diff --git a/docs/source/model_doc/luke.rst b/docs/source/model_doc/luke.rst index 95b50bf006e26a..b3190ea6532db9 100644 --- a/docs/source/model_doc/luke.rst +++ b/docs/source/model_doc/luke.rst @@ -1,5 +1,5 @@ .. - Copyright 2020 The HuggingFace Team. All rights reserved. + Copyright 2021 The HuggingFace Team. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at diff --git a/src/transformers/models/luke/__init__.py b/src/transformers/models/luke/__init__.py index 106a65e22c9f0a..4f5f3155581ab6 100644 --- a/src/transformers/models/luke/__init__.py +++ b/src/transformers/models/luke/__init__.py @@ -2,7 +2,7 @@ # There's no way to ignore "F401 '...' imported but unused" warnings in this # module, but to preserve other warnings. So, don't check this module at all. -# Copyright 2020 The HuggingFace Team. All rights reserved. +# Copyright 2021 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/transformers/models/luke/configuration_luke.py b/src/transformers/models/luke/configuration_luke.py index 6c434fc8cc00aa..1a8ab38ea28b91 100644 --- a/src/transformers/models/luke/configuration_luke.py +++ b/src/transformers/models/luke/configuration_luke.py @@ -21,8 +21,8 @@ logger = logging.get_logger(__name__) LUKE_PRETRAINED_CONFIG_ARCHIVE_MAP = { - "luke-base": "https://huggingface.co/studio-ousia/luke-base/resolve/main/config.json", - "luke-large": "https://huggingface.co/studio-ousia/luke-large/resolve/main/config.json", + "studio-ousia/luke-base": "https://huggingface.co/studio-ousia/luke-base/resolve/main/config.json", + "studio-ousia/luke-large": "https://huggingface.co/studio-ousia/luke-large/resolve/main/config.json", } diff --git a/src/transformers/models/luke/modeling_luke.py b/src/transformers/models/luke/modeling_luke.py index 66d94aa5253ae1..15b43b3a729b66 100644 --- a/src/transformers/models/luke/modeling_luke.py +++ b/src/transformers/models/luke/modeling_luke.py @@ -53,8 +53,6 @@ class BaseLukeModelOutputWithPooling(BaseModelOutputWithPooling): """ Base class for outputs of the LUKE model. - - Args: last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the output of the last layer of the model. @@ -86,8 +84,6 @@ class BaseLukeModelOutput(BaseModelOutput): """ Base class for model's outputs, with potential hidden states and attentions. - - Args: last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`): Sequence of hidden-states at the output of the last layer of the model. @@ -119,8 +115,6 @@ class EntityClassificationOutput(ModelOutput): """ Outputs of entity classification models. - - Args: loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): Classification (or regression if config.num_labels==1) loss. @@ -152,8 +146,6 @@ class EntityPairClassificationOutput(ModelOutput): """ Outputs of entity pair classification models. - - Args: loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): Classification (or regression if config.num_labels==1) loss. @@ -185,8 +177,6 @@ class EntitySpanClassificationOutput(ModelOutput): """ Outputs of entity span classification models. - - Args: loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): Classification (or regression if config.num_labels==1) loss. @@ -230,10 +220,6 @@ def __init__(self, config): self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) self.dropout = nn.Dropout(config.hidden_dropout_prob) - # position_ids (1, len position emb) is contiguous in memory and exported when serialized - self.register_buffer("position_ids", torch.arange(config.max_position_embeddings).expand((1, -1))) - self.position_embedding_type = getattr(config, "position_embedding_type", "absolute") - # End copy self.padding_idx = config.pad_token_id self.position_embeddings = nn.Embedding( @@ -336,8 +322,8 @@ def __init__(self, config): super().__init__() if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"): raise ValueError( - "The hidden size (%d) is not a multiple of the number of attention " - "heads (%d)" % (config.hidden_size, config.num_attention_heads) + f"The hidden size {config.hidden_size,} is not a multiple of the number of attention " + f"heads {config.num_attention_heads}." ) self.num_attention_heads = config.num_attention_heads @@ -495,9 +481,7 @@ def forward( else: entity_attention_output = attention_output[:, word_size:, :] - outputs = (word_attention_output, entity_attention_output) + self_outputs[ - 2: - ] # add attentions if we output them + outputs = (word_attention_output, entity_attention_output) + self_outputs[2:] # add attentions if we output them return outputs @@ -721,8 +705,6 @@ def _init_weights(self, module: nn.Module): subclass. Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and behavior. - - Parameters: config (:class:`~transformers.LukeConfig`): Model configuration class with all the parameters of the model. Initializing with a config file does not load the weights associated with the model, only the @@ -731,8 +713,6 @@ def _init_weights(self, module: nn.Module): """ LUKE_INPUTS_DOCSTRING = r""" - - Args: input_ids (:obj:`torch.LongTensor` of shape :obj:`({0})`): Indices of input sequence tokens in the vocabulary. @@ -745,8 +725,6 @@ def _init_weights(self, module: nn.Module): attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`): Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: - - - 1 for tokens that are **not masked**, - 0 for tokens that are **masked**. @@ -755,8 +733,6 @@ def _init_weights(self, module: nn.Module): Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0, 1]``: - - - 0 corresponds to a `sentence A` token, - 1 corresponds to a `sentence B` token. @@ -777,8 +753,6 @@ def _init_weights(self, module: nn.Module): entity_attention_mask (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, entity_length)`, `optional`): Mask to avoid performing attention on padding entity token indices. Mask values selected in ``[0, 1]``: - - - 1 for entity tokens that are **not masked**, - 0 for entity tokens that are **masked**. @@ -786,8 +760,6 @@ def _init_weights(self, module: nn.Module): Segment token indices to indicate first and second portions of the entity token inputs. Indices are selected in ``[0, 1]``: - - - 0 corresponds to a `portion A` entity token, - 1 corresponds to a `portion B` entity token. @@ -803,8 +775,6 @@ def _init_weights(self, module: nn.Module): head_mask (:obj:`torch.FloatTensor` of shape :obj:`(num_heads,)` or :obj:`(num_layers, num_heads)`, `optional`): Mask to nullify selected heads of the self-attention modules. Mask values selected in ``[0, 1]``: - - - 1 indicates the head is **not masked**, - 0 indicates the head is **masked**. @@ -877,7 +847,6 @@ def forward( Returns: - Examples:: >>> from transformers import LukeTokenizer, LukeModel @@ -1232,7 +1201,7 @@ def forward( return ((loss,) + output) if loss is not None else output return EntityPairClassificationOutput( - loss=loss if loss is not None else None, + loss=loss, logits=logits, hidden_states=outputs.hidden_states, entity_hidden_states=outputs.entity_hidden_states, @@ -1354,7 +1323,7 @@ def forward( return ((loss,) + output) if loss is not None else output return EntitySpanClassificationOutput( - loss=loss if loss is not None else None, + loss=loss, logits=logits, hidden_states=outputs.hidden_states, entity_hidden_states=outputs.entity_hidden_states, diff --git a/src/transformers/models/luke/tokenization_luke.py b/src/transformers/models/luke/tokenization_luke.py index 26e256b84a3a7b..eb8a085df2308b 100644 --- a/src/transformers/models/luke/tokenization_luke.py +++ b/src/transformers/models/luke/tokenization_luke.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tokenization classes for LUKE.""" + import itertools import json import os @@ -95,10 +96,10 @@ class LukeTokenizer(RobertaTokenizer): max_mention_length (:obj:`int`, `optional`, defaults to 30): The maximum number of tokens inside an entity span. entity_token_1 (:obj:`str`, `optional`, defaults to :obj:``): - The special token representing an entity span. This token is only used when `task` is set to + The special token representing an entity span. This token is only used when ``task`` is set to "entity_classification" or "entity_pair_classification". entity_token_2 (:obj:`str`, `optional`, defaults to :obj:``): - The special token representing an entity span. This token is only used when `task` is set to + The special token representing an entity span. This token is only used when ``task`` is set to "entity_pair_classification". """ @@ -130,9 +131,7 @@ def __init__( if isinstance(entity_token_2, str) else entity_token_2 ) - kwargs["additional_special_tokens"] = [entity_token_1, entity_token_2] + kwargs.get( - "additional_special_tokens", [] - ) + kwargs["additional_special_tokens"] = [entity_token_1, entity_token_2] + kwargs.get("additional_special_tokens", []) super().__init__( vocab_file=vocab_file, @@ -156,7 +155,7 @@ def __init__( elif task == "entity_pair_classification": self.max_entity_length = 2 else: - raise ValueError(f"Task {task} not supported") + raise ValueError(f"Task {task} not supported. Select task from ['entity_classification', 'entity_pair_classification'] only.") self.max_mention_length = max_mention_length @@ -332,9 +331,9 @@ def encode_plus( **kwargs ) -> BatchEncoding: """ - Tokenize and prepare for the model a sequence or a pair of sequences. .. warning:: This method is deprecated, - ``__call__`` should be used instead. - + Tokenize and prepare for the model a sequence or a pair of sequences. + + .. warning:: This method is deprecated, ``__call__`` should be used instead. Args: text (:obj:`str`): @@ -1401,7 +1400,7 @@ def _pad( return encoded_inputs def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: - vocab_file, merge_file = super(LukeTokenizer, self).save_vocabulary(save_directory, filename_prefix) + vocab_file, merge_file = super().save_vocabulary(save_directory, filename_prefix) entity_vocab_file = os.path.join( save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["entity_vocab_file"] diff --git a/tests/test_tokenization_luke.py b/tests/test_tokenization_luke.py index 56695254739a69..1146cb4f3299bc 100644 --- a/tests/test_tokenization_luke.py +++ b/tests/test_tokenization_luke.py @@ -1,5 +1,5 @@ # coding=utf-8 -# Copyright 2020 The HuggingFace Team. All rights reserved. +# Copyright 2021 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.