NielsRogge · zbloss · Apr 13, 2021 · Apr 5, 2021 · Apr 5, 2021 · Apr 11, 2021
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -314,6 +314,8 @@ TensorFlow and/or Flax.
 +-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
 |         Longformer          |       ✅       |       ✅       |       ✅        |         ✅         |      ❌      |
 +-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
+|            Luke             |       ✅       |       ❌       |       ✅        |         ❌         |      ❌      |
++-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
 |           M2M100            |       ✅       |       ❌       |       ✅        |         ❌         |      ❌      |
 +-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
 |            MPNet            |       ✅       |       ✅       |       ✅        |         ✅         |      ❌      |
@@ -468,6 +470,7 @@ TensorFlow and/or Flax.
     model_doc/layoutlm
     model_doc/led
     model_doc/longformer
+    model_doc/luke
     model_doc/lxmert
     model_doc/marian
     model_doc/m2m_100

diff --git a/docs/source/model_doc/luke.rst b/docs/source/model_doc/luke.rst
@@ -0,0 +1,124 @@
+..
+    Copyright 2021 The HuggingFace Team. All rights reserved.
+
+    Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+    the License. You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+    an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+    specific language governing permissions and limitations under the License.
+
+LUKE
+-----------------------------------------------------------------------------------------------------------------------
+
+Overview
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The LUKE model was proposed in `LUKE: Deep Contextualized Entity Representations with Entity-aware Self-attention
+<https://arxiv.org/abs/2010.01057>`_ by Ikuya Yamada, Akari Asai, Hiroyuki Shindo, Hideaki Takeda and Yuji Matsumoto.
+It is based on RoBERTa and adds entity embeddings as well as an entity-aware self-attention mechanism, which helps
+improve performance on several downstream tasks involving reasoning about entities such as named-entity recognition,
+extractive and cloze-style question answering, entity linking and relation classification between entities.
+
+The abstract from the paper is the following:
+
+*Entity representations are useful in natural language tasks involving entities. In this paper, we propose new
+pretrained contextualized representations of words and entities based on the bidirectional transformer. The proposed
+model treats words and entities in a given text as independent tokens, and outputs contextualized representations of
+them. Our model is trained using a new pretraining task based on the masked language model of BERT. The task involves
+predicting randomly masked words and entities in a large entity-annotated corpus retrieved from Wikipedia. We also
+propose an entity-aware self-attention mechanism that is an extension of the self-attention mechanism of the
+transformer, and considers the types of tokens (words or entities) when computing attention scores. The proposed model
+achieves impressive empirical performance on a wide range of entity-related tasks. In particular, it obtains
+state-of-the-art results on five well-known datasets: Open Entity (entity typing), TACRED (relation classification),
+CoNLL-2003 (named entity recognition), ReCoRD (cloze-style question answering), and SQuAD 1.1 (extractive question
+answering).*
+
+Tips:
+
+- This implementation is the same as :class:`~transformers.RobertaModel` with the addition of entity embeddings as well
+  as an entity-aware self-attention mechanism, which improves performance on tasks involving reasoning about entities.
+- LUKE adds :obj:`entity_ids`, :obj:`entity_attention_mask`, :obj:`entity_token_type_ids` and
+  :obj:`entity_position_ids` as extra input to the model. Input entities can be special entities (e.g., [MASK]) or
+  Wikipedia entities (e.g., New York City). You can obtain those using :class:`~transformers.LukeTokenizer`.
+- There are 3 head models defined:
+  - :class:`~transformers.LukeForEntityClassification`, for tasks such as entity typing (given an entity in a sentence, classify it), 
+  e.g. the `Open Entity dataset <https://www.cs.utexas.edu/~eunsol/html_pages/open_entity.html>__`. Here, one places a linear head
+  on top of the special <ent> token. 
+  - :class:`~transformers.LukeForEntityPairClassification`, for tasks such as relation classification (classifying the relationship 
+  between two entities), e.g. the `TACRED dataset <https://nlp.stanford.edu/projects/tacred/>__`
+  - :class:`~transformers.LukeForEntitySpanClassification`, for tasks such as named-entity recognition (NER). Here, one provides all 
+  possible entity spans to the model. Each span is then classified appropriately. 
+
+Example:
+
+.. code-block::
+
+    >>> from transformers import LukeTokenizer, LukeModel
+
+    >>> tokenizer = LukeTokenizer.from_pretrained("studio-ousia/luke-base")
+    >>> model = LukeModel.from_pretrained("studio-ousia/luke-base")
+
+    # Compute the contextualized entity representation corresponding to the entity mention "Beyoncé"
+    >>> text = "Beyoncé lives in New York."
+    >>> entity_spans = [(0, 7)]  # character-based entity span corresponding to "Beyoncé"
+    >>> encoding = tokenizer(text, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt")
+    >>> outputs = model(**encoding)
+    >>> word_last_hidden_state = outputs.last_hidden_state
+    >>> entity_last_hidden_state = outputs.entity_last_hidden_state
+
+    # Input Wikipedia entities to obtain enriched contextualized representations.
+    >>> text = "Beyoncé lives in New York."
+    >>> entities = ["Beyoncé", "New York City"]  # Wikipedia entity titles corresponding to the entity mentions "Beyoncé" and "New York"
+    >>> entity_spans = [(0, 7), (17, 25)]  # character-based entity spans corresponding to "Beyoncé" and "New York"
+    >>> encoding = tokenizer(text, entities=entities, entity_spans=entity_spans, add_prefix_space=True, return_tensors="pt")
+    >>> outputs = model(**encoding)
+    >>> word_last_hidden_state = outputs.last_hidden_state
+    >>> entity_last_hidden_state = outputs.entity_last_hidden_state
+
+This model was contributed by `ikuyamada <https://huggingface.co/ikuyamada>`__ and `nielsr <https://huggingface.co/nielsr>`__.
+The original code can be found `here <https://github.com/studio-ousia/luke>`__.
+
+
+LukeConfig
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: transformers.LukeConfig
+    :members:
+
+
+LukeTokenizer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: transformers.LukeTokenizer
+    :members: __call__, save_vocabulary
+
+
+LukeModel
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: transformers.LukeModel
+    :members: forward
+
+
+LukeForEntityClassification
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: transformers.LukeForEntityClassification
+    :members: forward
+
+
+LukeForEntityPairClassification
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: transformers.LukeForEntityPairClassification
+    :members: forward
+
+
+LukeForEntitySpanClassification
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: transformers.LukeForEntitySpanClassification
+    :members: forward
diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
@@ -189,6 +189,7 @@
     "models.layoutlm": ["LAYOUTLM_PRETRAINED_CONFIG_ARCHIVE_MAP", "LayoutLMConfig", "LayoutLMTokenizer"],
     "models.led": ["LED_PRETRAINED_CONFIG_ARCHIVE_MAP", "LEDConfig", "LEDTokenizer"],
     "models.longformer": ["LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP", "LongformerConfig", "LongformerTokenizer"],
+    "models.luke": ["LUKE_PRETRAINED_CONFIG_ARCHIVE_MAP", "LukeConfig", "LukeTokenizer"],
     "models.lxmert": ["LXMERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "LxmertConfig", "LxmertTokenizer"],
     "models.m2m_100": ["M2M_100_PRETRAINED_CONFIG_ARCHIVE_MAP", "M2M100Config"],
     "models.marian": ["MarianConfig"],
@@ -442,8 +443,8 @@
     ]
     _import_structure["generation_utils"] = ["top_k_top_p_filtering"]
     _import_structure["modeling_utils"] = ["Conv1D", "PreTrainedModel", "apply_chunking_to_forward", "prune_layer"]
-    # PyTorch models structure
 
+    # PyTorch models structure
     _import_structure["models.albert"].extend(
         [
             "ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST",
@@ -751,6 +752,16 @@
             "LongformerSelfAttention",
         ]
     )
+    _import_structure["models.luke"].extend(
+        [
+            "LUKE_PRETRAINED_MODEL_ARCHIVE_LIST",
+            "LukeForEntityClassification",
+            "LukeForEntityPairClassification",
+            "LukeForEntitySpanClassification",
+            "LukeModel",
+            "LukePreTrainedModel",
+        ]
+    )
     _import_structure["models.lxmert"].extend(
         [
             "LxmertEncoder",
@@ -1540,6 +1551,7 @@
     from .models.layoutlm import LAYOUTLM_PRETRAINED_CONFIG_ARCHIVE_MAP, LayoutLMConfig, LayoutLMTokenizer
     from .models.led import LED_PRETRAINED_CONFIG_ARCHIVE_MAP, LEDConfig, LEDTokenizer
     from .models.longformer import LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, LongformerConfig, LongformerTokenizer
+    from .models.luke import LUKE_PRETRAINED_CONFIG_ARCHIVE_MAP, LukeConfig, LukeTokenizer
     from .models.lxmert import LXMERT_PRETRAINED_CONFIG_ARCHIVE_MAP, LxmertConfig, LxmertTokenizer
     from .models.m2m_100 import M2M_100_PRETRAINED_CONFIG_ARCHIVE_MAP, M2M100Config
     from .models.marian import MarianConfig
@@ -2020,6 +2032,14 @@
             LongformerModel,
             LongformerSelfAttention,
         )
+        from .models.luke import (
+            LUKE_PRETRAINED_MODEL_ARCHIVE_LIST,
+            LukeForEntityClassification,
+            LukeForEntityPairClassification,
+            LukeForEntitySpanClassification,
+            LukeModel,
+            LukePreTrainedModel,
+        )
         from .models.lxmert import (
             LxmertEncoder,
             LxmertForPreTraining,

diff --git a/src/transformers/models/__init__.py b/src/transformers/models/__init__.py
@@ -48,6 +48,7 @@
     layoutlm,
     led,
     longformer,
+    luke,
     lxmert,
     m2m_100,
     marian,

diff --git a/src/transformers/models/auto/configuration_auto.py b/src/transformers/models/auto/configuration_auto.py
@@ -47,6 +47,7 @@
 from ..layoutlm.configuration_layoutlm import LAYOUTLM_PRETRAINED_CONFIG_ARCHIVE_MAP, LayoutLMConfig
 from ..led.configuration_led import LED_PRETRAINED_CONFIG_ARCHIVE_MAP, LEDConfig
 from ..longformer.configuration_longformer import LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP, LongformerConfig
+from ..luke.configuration_luke import LUKE_PRETRAINED_CONFIG_ARCHIVE_MAP, LukeConfig
 from ..lxmert.configuration_lxmert import LXMERT_PRETRAINED_CONFIG_ARCHIVE_MAP, LxmertConfig
 from ..m2m_100.configuration_m2m_100 import M2M_100_PRETRAINED_CONFIG_ARCHIVE_MAP, M2M100Config
 from ..marian.configuration_marian import MarianConfig
@@ -86,6 +87,7 @@
     for pretrained_map in [
         # Add archive maps here
         DEIT_PRETRAINED_CONFIG_ARCHIVE_MAP,
+        LUKE_PRETRAINED_CONFIG_ARCHIVE_MAP,
         GPT_NEO_PRETRAINED_CONFIG_ARCHIVE_MAP,
         BIG_BIRD_PRETRAINED_CONFIG_ARCHIVE_MAP,
         MEGATRON_BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
@@ -138,6 +140,7 @@
     [
         # Add configs here
         ("deit", DeiTConfig),
+        ("luke", LukeConfig),
         ("gpt_neo", GPTNeoConfig),
         ("big_bird", BigBirdConfig),
         ("speech_to_text", Speech2TextConfig),
@@ -196,6 +199,7 @@
     [
         # Add full (and cased) model names here
         ("deit", "DeiT"),
+        ("luke", "Luke"),
         ("gpt_neo", "GPT Neo"),
         ("big_bird", "BigBird"),
         ("speech_to_text", "Speech2Text"),

diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py
@@ -166,6 +166,7 @@
     LongformerForTokenClassification,
     LongformerModel,
 )
+from ..luke.modeling_luke import LukeModel
 from ..lxmert.modeling_lxmert import LxmertForPreTraining, LxmertForQuestionAnswering, LxmertModel
 from ..m2m_100.modeling_m2m_100 import M2M100ForConditionalGeneration, M2M100Model
 from ..marian.modeling_marian import MarianForCausalLM, MarianModel, MarianMTModel
@@ -308,6 +309,7 @@
     LayoutLMConfig,
     LEDConfig,
     LongformerConfig,
+    LukeConfig,
     LxmertConfig,
     M2M100Config,
     MarianConfig,
@@ -343,6 +345,7 @@
     [
         # Base model mapping
         (DeiTConfig, DeiTModel),
+        (LukeConfig, LukeModel),
         (GPTNeoConfig, GPTNeoModel),
         (BigBirdConfig, BigBirdModel),
         (Speech2TextConfig, Speech2TextModel),

diff --git a/src/transformers/models/auto/tokenization_auto.py b/src/transformers/models/auto/tokenization_auto.py
@@ -41,6 +41,7 @@
 from ..layoutlm.tokenization_layoutlm import LayoutLMTokenizer
 from ..led.tokenization_led import LEDTokenizer
 from ..longformer.tokenization_longformer import LongformerTokenizer
+from ..luke.tokenization_luke import LukeTokenizer
 from ..lxmert.tokenization_lxmert import LxmertTokenizer
 from ..mobilebert.tokenization_mobilebert import MobileBertTokenizer
 from ..mpnet.tokenization_mpnet import MPNetTokenizer
@@ -81,6 +82,7 @@
     LayoutLMConfig,
     LEDConfig,
     LongformerConfig,
+    LukeConfig,
     LxmertConfig,
     M2M100Config,
     MarianConfig,
@@ -232,7 +234,6 @@
         (MarianConfig, (MarianTokenizer, None)),
         (BlenderbotSmallConfig, (BlenderbotSmallTokenizer, None)),
         (BlenderbotConfig, (BlenderbotTokenizer, None)),
-        (LongformerConfig, (LongformerTokenizer, LongformerTokenizerFast)),
         (BartConfig, (BartTokenizer, BartTokenizerFast)),
         (LongformerConfig, (LongformerTokenizer, LongformerTokenizerFast)),
         (RobertaConfig, (RobertaTokenizer, RobertaTokenizerFast)),
@@ -268,6 +269,7 @@
         (IBertConfig, (RobertaTokenizer, RobertaTokenizerFast)),
         (Wav2Vec2Config, (Wav2Vec2CTCTokenizer, None)),
         (GPTNeoConfig, (GPT2Tokenizer, GPT2TokenizerFast)),
+        (LukeConfig, (LukeTokenizer, None)),
     ]
 )
 

diff --git a/src/transformers/models/luke/__init__.py b/src/transformers/models/luke/__init__.py
@@ -0,0 +1,70 @@
+# flake8: noqa
+# There's no way to ignore "F401 '...' imported but unused" warnings in this
+# module, but to preserve other warnings. So, don't check this module at all.
+
+# Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import TYPE_CHECKING
+
+from ...file_utils import _BaseLazyModule, is_torch_available
+
+
+_import_structure = {
+    "configuration_luke": ["LUKE_PRETRAINED_CONFIG_ARCHIVE_MAP", "LukeConfig"],
+    "tokenization_luke": ["LukeTokenizer"],
+}
+
+if is_torch_available():
+    _import_structure["modeling_luke"] = [
+        "LUKE_PRETRAINED_MODEL_ARCHIVE_LIST",
+        "LukeForEntityClassification",
+        "LukeForEntityPairClassification",
+        "LukeForEntitySpanClassification",
+        "LukeModel",
+        "LukePreTrainedModel",
+    ]
+
+
+if TYPE_CHECKING:
+    from .configuration_luke import LUKE_PRETRAINED_CONFIG_ARCHIVE_MAP, LukeConfig
+    from .tokenization_luke import LukeTokenizer
+
+    if is_torch_available():
+        from .modeling_luke import (
+            LUKE_PRETRAINED_MODEL_ARCHIVE_LIST,
+            LukeForEntityClassification,
+            LukeForEntityPairClassification,
+            LukeForEntitySpanClassification,
+            LukeModel,
+            LukePreTrainedModel,
+        )
+
+else:
+    import importlib
+    import os
+    import sys
+
+    class _LazyModule(_BaseLazyModule):
+        """
+        Module class that surfaces all objects but only performs associated imports when the objects are requested.
+        """
+
+        __file__ = globals()["__file__"]
+        __path__ = [os.path.dirname(__file__)]
+
+        def _get_module(self, module_name: str):
+            return importlib.import_module("." + module_name, self.__name__)
+
+    sys.modules[__name__] = _LazyModule(__name__, _import_structure)
-Original file line number
+Diff line change
@@ Expand Up / @@ -48,6 +48,7 @@ @@
         layoutlm,
         led,
         longformer,
+        luke,
         lxmert,
         m2m_100,
         marian,
@@ Expand Down @@