huggingface · sgugger · Sep 9, 2022 · Aug 27, 2022 · Aug 31, 2022 · Sep 7, 2022
diff --git a/docs/source/en/model_doc/ernie.mdx b/docs/source/en/model_doc/ernie.mdx
@@ -0,0 +1,74 @@
+<!--Copyright 2022 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# Ernie
+
+## Overview
+ERNIE is a series of powerful models proposed by baidu, especially in Chinese tasks,
+including [ERNIE1.0](https://arxiv.org/abs/1904.09223), [ERNIE2.0](https://ojs.aaai.org/index.php/AAAI/article/view/6428),
+[ERNIE3.0](https://arxiv.org/abs/2107.02137), [ERNIE-Gram](https://arxiv.org/abs/2010.12148), [ERNIE-health](https://arxiv.org/abs/2110.07244), etc.
+
+These models are contributed by [nghuyong](https://huggingface.co/nghuyong) and the official code can be found in [PaddleNLP](https://github.com/PaddlePaddle/PaddleNLP) (in PaddlePaddle).
+
+## ErnieConfig
+
+[[autodoc]] ErnieConfig
+    - all
+
+## Ernie specific outputs
+
+[[autodoc]] models.ernie.modeling_ernie.ErnieForPreTrainingOutput
+
+## ErnieModel
+
+[[autodoc]] ErnieModel
+    - forward
+
+## ErnieForPreTraining
+
+[[autodoc]] ErnieForPreTraining
+    - forward
+
+## ErnieLMHeadModel
+
+[[autodoc]] ErnieLMHeadModel
+    - forward
+
+## ErnieForMaskedLM
+
+[[autodoc]] ErnieForMaskedLM
+    - forward
+
+## ErnieForNextSentencePrediction
+
+[[autodoc]] ErnieForNextSentencePrediction
+    - forward
+
+## ErnieForSequenceClassification
+
+[[autodoc]] ErnieForSequenceClassification
+    - forward
+
+## ErnieForMultipleChoice
+
+[[autodoc]] ErnieForMultipleChoice
+    - forward
+
+## ErnieForTokenClassification
+
+[[autodoc]] ErnieForTokenClassification
+    - forward
+
+## ErnieForQuestionAnswering
+
+[[autodoc]] ErnieForQuestionAnswering
+    - forward
diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
@@ -143,6 +143,10 @@
         "BertTokenizer",
         "WordpieceTokenizer",
     ],
+    "models.ernie": [
+        "ERNIE_PRETRAINED_CONFIG_ARCHIVE_MAP",
+        "ErnieConfig",
+    ],
     "models.bert_generation": ["BertGenerationConfig"],
     "models.bert_japanese": ["BertJapaneseTokenizer", "CharacterTokenizer", "MecabTokenizer"],
     "models.bertweet": ["BertweetTokenizer"],
@@ -888,6 +892,22 @@
             "load_tf_weights_in_bert",
         ]
     )
+    _import_structure["models.ernie"].extend(
+        [
+            "ERNIE_PRETRAINED_MODEL_ARCHIVE_LIST",
+            "ErnieForMaskedLM",
+            "ErnieForMultipleChoice",
+            "ErnieForNextSentencePrediction",
+            "ErnieForPreTraining",
+            "ErnieForQuestionAnswering",
+            "ErnieForSequenceClassification",
+            "ErnieForTokenClassification",
+            "ErnieLayer",
+            "ErnieLMHeadModel",
+            "ErnieModel",
+            "ErniePreTrainedModel",
+        ]
+    )
     _import_structure["models.bert_generation"].extend(
         [
             "BertGenerationDecoder",
@@ -2989,6 +3009,10 @@
         BertTokenizer,
         WordpieceTokenizer,
     )
+    from .models.ernie import (
+        ERNIE_PRETRAINED_CONFIG_ARCHIVE_MAP,
+        ErnieConfig,
+    )
     from .models.bert_generation import BertGenerationConfig
     from .models.bert_japanese import BertJapaneseTokenizer, CharacterTokenizer, MecabTokenizer
     from .models.bertweet import BertweetTokenizer
@@ -3630,6 +3654,20 @@
             BertPreTrainedModel,
             load_tf_weights_in_bert,
         )
+        from .models.ernie import (
+            ERNIE_PRETRAINED_MODEL_ARCHIVE_LIST,
+            ErnieForMaskedLM,
+            ErnieForMultipleChoice,
+            ErnieForNextSentencePrediction,
+            ErnieForPreTraining,
+            ErnieForQuestionAnswering,
+            ErnieForSequenceClassification,
+            ErnieForTokenClassification,
+            ErnieLayer,
+            ErnieLMHeadModel,
+            ErnieModel,
+            ErniePreTrainedModel,
+        )
         from .models.bert_generation import (
             BertGenerationDecoder,
             BertGenerationEncoder,

diff --git a/src/transformers/models/__init__.py b/src/transformers/models/__init__.py
@@ -24,6 +24,7 @@
     bartpho,
     beit,
     bert,
+    ernie,
     bert_generation,
     bert_japanese,
     bertweet,

diff --git a/src/transformers/models/auto/configuration_auto.py b/src/transformers/models/auto/configuration_auto.py
@@ -33,6 +33,7 @@
         ("bart", "BartConfig"),
         ("beit", "BeitConfig"),
         ("bert", "BertConfig"),
+        ("ernie", "ErnieConfig"),
         ("bert-generation", "BertGenerationConfig"),
         ("big_bird", "BigBirdConfig"),
         ("bigbird_pegasus", "BigBirdPegasusConfig"),
@@ -162,6 +163,7 @@
         ("bart", "BART_PRETRAINED_CONFIG_ARCHIVE_MAP"),
         ("beit", "BEIT_PRETRAINED_CONFIG_ARCHIVE_MAP"),
         ("bert", "BERT_PRETRAINED_CONFIG_ARCHIVE_MAP"),
+        ("ernie", "ERNIE_PRETRAINED_CONFIG_ARCHIVE_MAP"),
         ("big_bird", "BIG_BIRD_PRETRAINED_CONFIG_ARCHIVE_MAP"),
         ("bigbird_pegasus", "BIGBIRD_PEGASUS_PRETRAINED_CONFIG_ARCHIVE_MAP"),
         ("blenderbot", "BLENDERBOT_PRETRAINED_CONFIG_ARCHIVE_MAP"),
@@ -278,6 +280,7 @@
         ("bartpho", "BARTpho"),
         ("beit", "BEiT"),
         ("bert", "BERT"),
+        ("ernie", "Ernie"),
         ("bert-generation", "Bert Generation"),
         ("bert-japanese", "BertJapanese"),
         ("bertweet", "BERTweet"),

diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py
@@ -32,6 +32,7 @@
         ("bart", "BartModel"),
         ("beit", "BeitModel"),
         ("bert", "BertModel"),
+        ("ernie", "ErnieModel"),
         ("bert-generation", "BertGenerationEncoder"),
         ("big_bird", "BigBirdModel"),
         ("bigbird_pegasus", "BigBirdPegasusModel"),
@@ -155,6 +156,7 @@
         ("albert", "AlbertForPreTraining"),
         ("bart", "BartForConditionalGeneration"),
         ("bert", "BertForPreTraining"),
+        ("ernie", "ErnieForPreTraining"),
         ("big_bird", "BigBirdForPreTraining"),
         ("bloom", "BloomForCausalLM"),
         ("camembert", "CamembertForMaskedLM"),
@@ -208,6 +210,7 @@
         ("albert", "AlbertForMaskedLM"),
         ("bart", "BartForConditionalGeneration"),
         ("bert", "BertForMaskedLM"),
+        ("ernie", "ErnieForMaskedLM"),
         ("big_bird", "BigBirdForMaskedLM"),
         ("bigbird_pegasus", "BigBirdPegasusForConditionalGeneration"),
         ("blenderbot-small", "BlenderbotSmallForConditionalGeneration"),
@@ -272,6 +275,7 @@
         # Model for Causal LM mapping
         ("bart", "BartForCausalLM"),
         ("bert", "BertLMHeadModel"),
+        ("ernie", "ErnieLMHeadModel"),
         ("bert-generation", "BertGenerationDecoder"),
         ("big_bird", "BigBirdForCausalLM"),
         ("bigbird_pegasus", "BigBirdPegasusForCausalLM"),
@@ -404,6 +408,7 @@
         ("albert", "AlbertForMaskedLM"),
         ("bart", "BartForConditionalGeneration"),
         ("bert", "BertForMaskedLM"),
+        ("ernie", "ErnieForMaskedLM"),
         ("big_bird", "BigBirdForMaskedLM"),
         ("camembert", "CamembertForMaskedLM"),
         ("convbert", "ConvBertForMaskedLM"),
@@ -489,6 +494,7 @@
         ("albert", "AlbertForSequenceClassification"),
         ("bart", "BartForSequenceClassification"),
         ("bert", "BertForSequenceClassification"),
+        ("ernie", "ErnieForSequenceClassification"),
         ("big_bird", "BigBirdForSequenceClassification"),
         ("bigbird_pegasus", "BigBirdPegasusForSequenceClassification"),
         ("bloom", "BloomForSequenceClassification"),
@@ -547,6 +553,7 @@
         ("albert", "AlbertForQuestionAnswering"),
         ("bart", "BartForQuestionAnswering"),
         ("bert", "BertForQuestionAnswering"),
+        ("ernie", "ErnieForQuestionAnswering"),
         ("big_bird", "BigBirdForQuestionAnswering"),
         ("bigbird_pegasus", "BigBirdPegasusForQuestionAnswering"),
         ("camembert", "CamembertForQuestionAnswering"),
@@ -608,6 +615,7 @@
         # Model for Token Classification mapping
         ("albert", "AlbertForTokenClassification"),
         ("bert", "BertForTokenClassification"),
+        ("ernie", "ErnieForTokenClassification"),
         ("big_bird", "BigBirdForTokenClassification"),
         ("bloom", "BloomForTokenClassification"),
         ("camembert", "CamembertForTokenClassification"),
@@ -651,6 +659,7 @@
         # Model for Multiple Choice mapping
         ("albert", "AlbertForMultipleChoice"),
         ("bert", "BertForMultipleChoice"),
+        ("ernie", "ErnieForMultipleChoice"),
         ("big_bird", "BigBirdForMultipleChoice"),
         ("camembert", "CamembertForMultipleChoice"),
         ("canine", "CanineForMultipleChoice"),
@@ -686,6 +695,7 @@
 MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING_NAMES = OrderedDict(
     [
         ("bert", "BertForNextSentencePrediction"),
+        ("ernie", "ErnieForNextSentencePrediction"),
         ("fnet", "FNetForNextSentencePrediction"),
         ("megatron-bert", "MegatronBertForNextSentencePrediction"),
         ("mobilebert", "MobileBertForNextSentencePrediction"),

diff --git a/src/transformers/models/auto/tokenization_auto.py b/src/transformers/models/auto/tokenization_auto.py
@@ -63,6 +63,7 @@
             ),
             ("bartpho", ("BartphoTokenizer", None)),
             ("bert", ("BertTokenizer", "BertTokenizerFast" if is_tokenizers_available() else None)),
+            ("ernie", ("BertTokenizer", "BertTokenizerFast" if is_tokenizers_available() else None)),
             ("bert-generation", ("BertGenerationTokenizer" if is_sentencepiece_available() else None, None)),
             ("bert-japanese", ("BertJapaneseTokenizer", None)),
             ("bertweet", ("BertweetTokenizer", None)),

diff --git a/src/transformers/models/ernie/__init__.py b/src/transformers/models/ernie/__init__.py
@@ -0,0 +1,88 @@
+# flake8: noqa
+# There's no way to ignore "F401 '...' imported but unused" warnings in this
+# module, but to preserve other warnings. So, don't check this module at all.
+
+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import TYPE_CHECKING
+
+from ...utils import (
+    OptionalDependencyNotAvailable,
+    _LazyModule,
+    is_tensorflow_text_available,
+    is_torch_available,
+)
+
+
+_import_structure = {
+    "configuration_ernie": ["ERNIE_PRETRAINED_CONFIG_ARCHIVE_MAP", "ErnieConfig", "ErnieOnnxConfig"],
+}
+
+try:
+    if not is_torch_available():
+        raise OptionalDependencyNotAvailable()
+except OptionalDependencyNotAvailable:
+    pass
+else:
+    _import_structure["modeling_ernie"] = [
+        "ERNIE_PRETRAINED_MODEL_ARCHIVE_LIST",
+        "ErnieForMaskedLM",
+        "ErnieForMultipleChoice",
+        "ErnieForNextSentencePrediction",
+        "ErnieForPreTraining",
+        "ErnieForQuestionAnswering",
+        "ErnieForSequenceClassification",
+        "ErnieForTokenClassification",
+        "ErnieLayer",
+        "ErnieLMHeadModel",
+        "ErnieModel",
+        "ErniePreTrainedModel",
+    ]
+
+try:
+    if not is_tensorflow_text_available():
+        raise OptionalDependencyNotAvailable()
+except OptionalDependencyNotAvailable:
+    pass
+else:
+    _import_structure["tokenization_ernie_tf"] = ["TFBertTokenizer"]
+
+if TYPE_CHECKING:
+    from .configuration_ernie import ERNIE_PRETRAINED_CONFIG_ARCHIVE_MAP, ErnieConfig, ErnieOnnxConfig
+
+    try:
+        if not is_torch_available():
+            raise OptionalDependencyNotAvailable()
+    except OptionalDependencyNotAvailable:
+        pass
+    else:
+        from .modeling_ernie import (
+            ERNIE_PRETRAINED_MODEL_ARCHIVE_LIST,
+            ErnieForMaskedLM,
+            ErnieForMultipleChoice,
+            ErnieForNextSentencePrediction,
+            ErnieForPreTraining,
+            ErnieForQuestionAnswering,
+            ErnieForSequenceClassification,
+            ErnieForTokenClassification,
+            ErnieLayer,
+            ErnieLMHeadModel,
+            ErnieModel,
+            ErniePreTrainedModel,
+        )
+
+else:
+    import sys
+    sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)