Skip to content

Commit

Permalink
Model templates encoder only (huggingface#8509)
Browse files Browse the repository at this point in the history
* Model templates

* TensorFlow

* Remove pooler

* CI

* Tokenizer + Refactoring

* Encoder-Decoder

* Let's go testing

* Encoder-Decoder in TF

* Let's go testing in TF

* Documentation

* README

* Fixes

* Better names

* Style

* Update docs

* Choose to skip either TF or PT

* Code quality fixes

* Add to testing suite

* Update file path

* Cookiecutter path

* Update `transformers` path

* Handle rebasing

* Remove seq2seq from model templates

* Remove s2s config

* Apply Sylvain and Patrick comments

* Apply suggestions from code review

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Last fixes from code review

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
  • Loading branch information
2 people authored and fabiocapsouza committed Nov 15, 2020
1 parent 796e7c4 commit aa2012f
Show file tree
Hide file tree
Showing 29 changed files with 3,328 additions and 1,990 deletions.
18 changes: 18 additions & 0 deletions .github/workflows/self-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ on:
push:
branches:
- master
- model-templates
paths:
- "src/**"
- "tests/**"
- ".github/**"
- "templates/**"
# pull_request:
repository_dispatch:

Expand Down Expand Up @@ -55,6 +57,14 @@ jobs:
python -c "import torch; print('Cuda available:', torch.cuda.is_available())"
python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
- name: Create model files
run: |
source .env/bin/activate
transformers-cli add-new-model --testing --testing_file=templates/cookiecutter/tests/encoder-bert-tokenizer.json --path=templates/cookiecutter
transformers-cli add-new-model --testing --testing_file=templates/cookiecutter/tests/pt-encoder-bert-tokenizer.json --path=templates/cookiecutter
transformers-cli add-new-model --testing --testing_file=templates/cookiecutter/tests/standalone.json --path=templates/cookiecutter
transformers-cli add-new-model --testing --testing_file=templates/cookiecutter/tests/tf-encoder-bert-tokenizer.json --path=templates/cookiecutter
- name: Run all non-slow tests on GPU
env:
OMP_NUM_THREADS: 1
Expand Down Expand Up @@ -116,6 +126,14 @@ jobs:
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
- name: Create model files
run: |
source .env/bin/activate
transformers-cli add-new-model --testing --testing_file=templates/cookiecutter/tests/encoder-bert-tokenizer.json --path=templates/cookiecutter
transformers-cli add-new-model --testing --testing_file=templates/cookiecutter/tests/pt-encoder-bert-tokenizer.json --path=templates/cookiecutter
transformers-cli add-new-model --testing --testing_file=templates/cookiecutter/tests/standalone.json --path=templates/cookiecutter
transformers-cli add-new-model --testing --testing_file=templates/cookiecutter/tests/tf-encoder-bert-tokenizer.json --path=templates/cookiecutter
- name: Run all non-slow tests on GPU
env:
OMP_NUM_THREADS: 1
Expand Down
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,20 +98,21 @@

extras["tokenizers"] = ["tokenizers==0.9.2"]
extras["onnxruntime"] = ["onnxruntime>=1.4.0", "onnxruntime-tools>=1.4.2"]
extras["modelcreation"] = ["cookiecutter==1.7.2"]

extras["serving"] = ["pydantic", "uvicorn", "fastapi", "starlette"]

extras["sentencepiece"] = ["sentencepiece==0.1.91"]
extras["retrieval"] = ["faiss-cpu", "datasets"]
extras["testing"] = ["pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil"] + extras["retrieval"]
extras["testing"] = ["pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil"] + extras["retrieval"] + extras["modelcreation"]
# sphinx-rtd-theme==0.5.0 introduced big changes in the style.
extras["docs"] = ["recommonmark", "sphinx==3.2.1", "sphinx-markdown-tables", "sphinx-rtd-theme==0.4.3", "sphinx-copybutton"]
extras["quality"] = ["black >= 20.8b1", "isort >= 5.5.4", "flake8 >= 3.8.3"]


extras["all"] = extras["tf"] + extras["torch"] + extras["flax"] + extras["sentencepiece"] + extras["tokenizers"]

extras["dev"] = extras["all"] + extras["testing"] + extras["quality"] + extras["ja"] + extras["docs"] + extras["sklearn"]
extras["dev"] = extras["all"] + extras["testing"] + extras["quality"] + extras["ja"] + extras["docs"] + extras["sklearn"] + extras["modelcreation"]


setup(
Expand Down
192 changes: 192 additions & 0 deletions src/transformers/commands/add_new_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
import json
import os
import shutil
from argparse import ArgumentParser, Namespace
from pathlib import Path
from typing import List

from cookiecutter.main import cookiecutter
from transformers.commands import BaseTransformersCLICommand

from ..utils import logging


logger = logging.get_logger(__name__) # pylint: disable=invalid-name


def add_new_model_command_factory(args: Namespace):
return AddNewModelCommand(args.testing, args.testing_file, path=args.path)


class AddNewModelCommand(BaseTransformersCLICommand):
@staticmethod
def register_subcommand(parser: ArgumentParser):
add_new_model_parser = parser.add_parser("add-new-model")
add_new_model_parser.add_argument("--testing", action="store_true", help="If in testing mode.")
add_new_model_parser.add_argument("--testing_file", type=str, help="Configuration file on which to run.")
add_new_model_parser.add_argument(
"--path", type=str, help="Path to cookiecutter. Should only be used for testing purposes."
)
add_new_model_parser.set_defaults(func=add_new_model_command_factory)

def __init__(self, testing: bool, testing_file: str, path=None, *args):
self._testing = testing
self._testing_file = testing_file
self._path = path

def run(self):
# Ensure that there is no other `cookiecutter-template-xxx` directory in the current working directory
directories = [directory for directory in os.listdir() if "cookiecutter-template-" == directory[:22]]
if len(directories) > 0:
raise ValueError(
"Several directories starting with `cookiecutter-template-` in current working directory. "
"Please clean your directory by removing all folders startign with `cookiecutter-template-` or "
"change your working directory."
)

path_to_transformer_root = (
Path(__file__).parent.parent.parent.parent if self._path is None else Path(self._path).parent.parent
)
path_to_cookiecutter = path_to_transformer_root / "templates" / "cookiecutter"

# Execute cookiecutter
if not self._testing:
cookiecutter(str(path_to_cookiecutter))
else:
with open(self._testing_file, "r") as configuration_file:
testing_configuration = json.load(configuration_file)

cookiecutter(
str(path_to_cookiecutter if self._path is None else self._path),
no_input=True,
extra_context=testing_configuration,
)

directory = [directory for directory in os.listdir() if "cookiecutter-template-" in directory[:22]][0]

# Retrieve configuration
with open(directory + "/configuration.json", "r") as configuration_file:
configuration = json.load(configuration_file)

lowercase_model_name = configuration["lowercase_modelname"]
pytorch_or_tensorflow = configuration["generate_tensorflow_and_pytorch"]
os.remove(f"{directory}/configuration.json")

output_pytorch = "PyTorch" in pytorch_or_tensorflow
output_tensorflow = "TensorFlow" in pytorch_or_tensorflow

shutil.move(
f"{directory}/configuration_{lowercase_model_name}.py",
f"{path_to_transformer_root}/src/transformers/configuration_{lowercase_model_name}.py",
)

def remove_copy_lines(path):
with open(path, "r") as f:
lines = f.readlines()
with open(path, "w") as f:
for line in lines:
if "# Copied from transformers." not in line:
f.write(line)

if output_pytorch:
if not self._testing:
remove_copy_lines(f"{directory}/modeling_{lowercase_model_name}.py")

shutil.move(
f"{directory}/modeling_{lowercase_model_name}.py",
f"{path_to_transformer_root}/src/transformers/modeling_{lowercase_model_name}.py",
)

shutil.move(
f"{directory}/test_modeling_{lowercase_model_name}.py",
f"{path_to_transformer_root}/tests/test_modeling_{lowercase_model_name}.py",
)
else:
os.remove(f"{directory}/modeling_{lowercase_model_name}.py")
os.remove(f"{directory}/test_modeling_{lowercase_model_name}.py")

if output_tensorflow:
if not self._testing:
remove_copy_lines(f"{directory}/modeling_tf_{lowercase_model_name}.py")

shutil.move(
f"{directory}/modeling_tf_{lowercase_model_name}.py",
f"{path_to_transformer_root}/src/transformers/modeling_tf_{lowercase_model_name}.py",
)

shutil.move(
f"{directory}/test_modeling_tf_{lowercase_model_name}.py",
f"{path_to_transformer_root}/tests/test_modeling_tf_{lowercase_model_name}.py",
)
else:
os.remove(f"{directory}/modeling_tf_{lowercase_model_name}.py")
os.remove(f"{directory}/test_modeling_tf_{lowercase_model_name}.py")

shutil.move(
f"{directory}/{lowercase_model_name}.rst",
f"{path_to_transformer_root}/docs/source/model_doc/{lowercase_model_name}.rst",
)

shutil.move(
f"{directory}/tokenization_{lowercase_model_name}.py",
f"{path_to_transformer_root}/src/transformers/tokenization_{lowercase_model_name}.py",
)

from os import fdopen, remove
from shutil import copymode, move
from tempfile import mkstemp

def replace(original_file: str, line_to_copy_below: str, lines_to_copy: List[str]):
# Create temp file
fh, abs_path = mkstemp()
line_found = False
with fdopen(fh, "w") as new_file:
with open(original_file) as old_file:
for line in old_file:
new_file.write(line)
if line_to_copy_below in line:
line_found = True
for line_to_copy in lines_to_copy:
new_file.write(line_to_copy)

if not line_found:
raise ValueError(f"Line {line_to_copy_below} was not found in file.")

# Copy the file permissions from the old file to the new file
copymode(original_file, abs_path)
# Remove original file
remove(original_file)
# Move new file
move(abs_path, original_file)

def skip_units(line):
return ("generating PyTorch" in line and not output_pytorch) or (
"generating TensorFlow" in line and not output_tensorflow
)

def replace_in_files(path_to_datafile):
with open(path_to_datafile) as datafile:
lines_to_copy = []
skip_file = False
skip_snippet = False
for line in datafile:
if "# To replace in: " in line and "##" not in line:
file_to_replace_in = line.split('"')[1]
skip_file = skip_units(line)
elif "# Below: " in line and "##" not in line:
line_to_copy_below = line.split('"')[1]
skip_snippet = skip_units(line)
elif "# End." in line and "##" not in line:
if not skip_file and not skip_snippet:
replace(file_to_replace_in, line_to_copy_below, lines_to_copy)

lines_to_copy = []
elif "# Replace with" in line and "##" not in line:
lines_to_copy = []
elif "##" not in line:
lines_to_copy.append(line)

remove(path_to_datafile)

replace_in_files(f"{directory}/to_replace_{lowercase_model_name}.py")
os.rmdir(directory)
2 changes: 2 additions & 0 deletions src/transformers/commands/transformers_cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python
from argparse import ArgumentParser

from transformers.commands.add_new_model import AddNewModelCommand
from transformers.commands.convert import ConvertCommand
from transformers.commands.download import DownloadCommand
from transformers.commands.env import EnvironmentCommand
Expand All @@ -20,6 +21,7 @@ def main():
RunCommand.register_subcommand(commands_parser)
ServeCommand.register_subcommand(commands_parser)
UserCommands.register_subcommand(commands_parser)
AddNewModelCommand.register_subcommand(commands_parser)

# Let's go
args = parser.parse_args()
Expand Down
3 changes: 3 additions & 0 deletions src/transformers/configuration_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
ALL_PRETRAINED_CONFIG_ARCHIVE_MAP = dict(
(key, value)
for pretrained_map in [
# Add archive maps here
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
BART_PRETRAINED_CONFIG_ARCHIVE_MAP,
BLENDERBOT_PRETRAINED_CONFIG_ARCHIVE_MAP,
Expand Down Expand Up @@ -95,6 +96,7 @@

CONFIG_MAPPING = OrderedDict(
[
# Add configs here
("retribert", RetriBertConfig),
("t5", T5Config),
("mobilebert", MobileBertConfig),
Expand Down Expand Up @@ -136,6 +138,7 @@

MODEL_NAMES_MAPPING = OrderedDict(
[
# Add full (and cased) model names here
("retribert", "RetriBERT"),
("t5", "T5"),
("mobilebert", "MobileBERT"),
Expand Down
12 changes: 12 additions & 0 deletions src/transformers/modeling_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,11 +226,14 @@
from .utils import logging


# Add modeling imports here

logger = logging.get_logger(__name__)


MODEL_MAPPING = OrderedDict(
[
# Base model mapping
(RetriBertConfig, RetriBertModel),
(T5Config, T5Model),
(DistilBertConfig, DistilBertModel),
Expand Down Expand Up @@ -266,6 +269,7 @@

MODEL_FOR_PRETRAINING_MAPPING = OrderedDict(
[
# Model for pre-training mapping
(LayoutLMConfig, LayoutLMForMaskedLM),
(RetriBertConfig, RetriBertModel),
(T5Config, T5ForConditionalGeneration),
Expand Down Expand Up @@ -295,6 +299,7 @@

MODEL_WITH_LM_HEAD_MAPPING = OrderedDict(
[
# Model with LM heads mapping
(LayoutLMConfig, LayoutLMForMaskedLM),
(T5Config, T5ForConditionalGeneration),
(DistilBertConfig, DistilBertForMaskedLM),
Expand Down Expand Up @@ -325,6 +330,7 @@

MODEL_FOR_CAUSAL_LM_MAPPING = OrderedDict(
[
# Model for Causal LM mapping
(CamembertConfig, CamembertForCausalLM),
(XLMRobertaConfig, XLMRobertaForCausalLM),
(RobertaConfig, RobertaForCausalLM),
Expand All @@ -347,6 +353,7 @@

MODEL_FOR_MASKED_LM_MAPPING = OrderedDict(
[
# Model for Masked LM mapping
(LayoutLMConfig, LayoutLMForMaskedLM),
(DistilBertConfig, DistilBertForMaskedLM),
(AlbertConfig, AlbertForMaskedLM),
Expand All @@ -368,6 +375,7 @@

MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING = OrderedDict(
[
# Model for Seq2Seq Causal LM mapping
(T5Config, T5ForConditionalGeneration),
(PegasusConfig, PegasusForConditionalGeneration),
(MarianConfig, MarianMTModel),
Expand All @@ -383,6 +391,7 @@

MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING = OrderedDict(
[
# Model for Sequence Classification mapping
(DistilBertConfig, DistilBertForSequenceClassification),
(AlbertConfig, AlbertForSequenceClassification),
(CamembertConfig, CamembertForSequenceClassification),
Expand All @@ -407,6 +416,7 @@

MODEL_FOR_QUESTION_ANSWERING_MAPPING = OrderedDict(
[
# Model for Question Answering mapping
(DistilBertConfig, DistilBertForQuestionAnswering),
(AlbertConfig, AlbertForQuestionAnswering),
(CamembertConfig, CamembertForQuestionAnswering),
Expand All @@ -429,6 +439,7 @@

MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING = OrderedDict(
[
# Model for Token Classification mapping
(LayoutLMConfig, LayoutLMForTokenClassification),
(DistilBertConfig, DistilBertForTokenClassification),
(CamembertConfig, CamembertForTokenClassification),
Expand All @@ -450,6 +461,7 @@

MODEL_FOR_MULTIPLE_CHOICE_MAPPING = OrderedDict(
[
# Model for Multiple Choice mapping
(CamembertConfig, CamembertForMultipleChoice),
(ElectraConfig, ElectraForMultipleChoice),
(XLMRobertaConfig, XLMRobertaForMultipleChoice),
Expand Down
Loading

0 comments on commit aa2012f

Please sign in to comment.