Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[core] implement support for run-time dependency version checking #8645

Merged
merged 24 commits into from
Nov 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: modified_only_fixup extra_quality_checks quality style fixup fix-copies test test-examples docs
.PHONY: deps_table_update modified_only_fixup extra_quality_checks quality style fixup fix-copies test test-examples docs


check_dirs := examples tests src utils
Expand All @@ -14,9 +14,14 @@ modified_only_fixup:
echo "No library .py files were modified"; \
fi

# Update src/transformers/dependency_versions_table.py

deps_table_update:
@python setup.py deps_table_update
stas00 marked this conversation as resolved.
Show resolved Hide resolved

# Check that source code meets quality standards

extra_quality_checks:
extra_quality_checks: deps_table_update
python utils/check_copies.py
python utils/check_dummies.py
python utils/check_repo.py
Expand All @@ -32,7 +37,7 @@ quality:

# Format source code automatically and check is there are any problems left that need manual fixing

style:
style: deps_table_update
black $(check_dirs)
isort $(check_dirs)
python utils/style_doc.py src/transformers docs/source --max_len 119
Expand Down
15 changes: 2 additions & 13 deletions examples/lightning_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@
from pathlib import Path
from typing import Any, Dict

import packaging
import pytorch_lightning as pl
from pytorch_lightning.utilities import rank_zero_info

import pkg_resources
from transformers import (
AdamW,
AutoConfig,
Expand All @@ -30,21 +28,12 @@
get_linear_schedule_with_warmup,
get_polynomial_decay_schedule_with_warmup,
)
from transformers.utils.versions import require_version_examples


logger = logging.getLogger(__name__)


def require_min_ver(pkg, min_ver):
got_ver = pkg_resources.get_distribution(pkg).version
if packaging.version.parse(got_ver) < packaging.version.parse(min_ver):
logger.warning(
f"{pkg}>={min_ver} is required for a normal functioning of this module, but found {pkg}=={got_ver}. "
"Try: pip install -r examples/requirements.txt"
)


require_min_ver("pytorch_lightning", "1.0.4")
require_version_examples("pytorch_lightning>=1.0.4")

MODEL_MODES = {
"base": AutoModel,
Expand Down
199 changes: 147 additions & 52 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@
"""

import os
import re
import shutil
from distutils.core import Command
from pathlib import Path

from setuptools import find_packages, setup
Expand All @@ -69,54 +71,163 @@
shutil.rmtree(stale_egg_info)


extras = {}

extras["ja"] = ["fugashi>=1.0", "ipadic>=1.0.0,<2.0", "unidic_lite>=1.0.7", "unidic>=1.0.2"]
extras["sklearn"] = ["scikit-learn"]

# keras2onnx and onnxconverter-common version is specific through a commit until 1.7.0 lands on pypi
extras["tf"] = [
"tensorflow>=2.0",
# IMPORTANT:
# 1. all dependencies should be listed here with their version requirements if any
# 2. once modified, run: `make deps_table_update` to update src/transformers/dependency_versions_table.py
_deps = [
"black>=20.8b1",
"cookiecutter==1.7.2",
"dataclasses",
"datasets",
"faiss-cpu",
"fastapi",
"filelock",
"flake8>=3.8.3",
"flax==0.2.2",
"fugashi>=1.0",
"ipadic>=1.0.0,<2.0",
"isort>=5.5.4",
"jax>=0.2.0",
"jaxlib==0.1.55",
"keras2onnx",
"numpy",
"onnxconverter-common",
"keras2onnx"
# "onnxconverter-common @ git+git://github.com/microsoft/onnxconverter-common.git@f64ca15989b6dc95a1f3507ff6e4c395ba12dff5#egg=onnxconverter-common",
# "keras2onnx @ git+git://github.com/onnx/keras-onnx.git@cbdc75cb950b16db7f0a67be96a278f8d2953b48#egg=keras2onnx",
]
extras["tf-cpu"] = [
"onnxruntime-tools>=1.4.2",
"onnxruntime>=1.4.0",
"packaging",
"parameterized",
"protobuf",
"psutil",
"pydantic",
"pytest",
"pytest-xdist",
"python>=3.6.0",
"recommonmark",
"regex!=2019.12.17",
"requests",
"sacremoses",
"scikit-learn",
"sentencepiece==0.1.91",
"sphinx-copybutton",
"sphinx-markdown-tables",
"sphinx-rtd-theme==0.4.3", # sphinx-rtd-theme==0.5.0 introduced big changes in the style.
"sphinx==3.2.1",
"starlette",
"tensorflow-cpu>=2.0",
"onnxconverter-common",
"keras2onnx"
# "onnxconverter-common @ git+git://github.com/microsoft/onnxconverter-common.git@f64ca15989b6dc95a1f3507ff6e4c395ba12dff5#egg=onnxconverter-common",
# "keras2onnx @ git+git://github.com/onnx/keras-onnx.git@cbdc75cb950b16db7f0a67be96a278f8d2953b48#egg=keras2onnx",
"tensorflow>=2.0",
"timeout-decorator",
"tokenizers==0.9.4",
"torch>=1.0",
"tqdm>=4.27",
"unidic>=1.0.2",
"unidic_lite>=1.0.7",
"uvicorn",
]
extras["torch"] = ["torch>=1.0"]


# tokenizers: "tokenizers==0.9.4" lookup table
# support non-versions file too so that they can be checked at run time
deps = {b: a for a, b in (re.findall(r"^(([^!=<>]+)(?:[!=<>].*)?$)", x)[0] for x in _deps)}


def deps_list(*pkgs):
return [deps[pkg] for pkg in pkgs]


class DepsTableUpdateCommand(Command):
"""
A custom distutils command that updates the dependency table.
usage: python setup.py deps_table_update
"""

description = "build runtime dependency table"
user_options = [
# format: (long option, short option, description).
("dep-table-update", None, "updates src/transformers/dependency_versions_table.py"),
]

def initialize_options(self):
pass

def finalize_options(self):
pass

def run(self):
entries = "\n".join([f' "{k}": "{v}",' for k, v in deps.items()])
content = [
"# THIS FILE HAS BEEN AUTOGENERATED. To update:",
"# 1. modify the `_deps` dict in setup.py",
"# 2. run `make deps_table_update``",
"deps = {",
entries,
"}",
""
]
target = "src/transformers/dependency_versions_table.py"
print(f"updating {target}")
with open(target, "w") as f:
f.write("\n".join(content))


extras = {}

extras["ja"] = deps_list("fugashi", "ipadic", "unidic_lite", "unidic")
extras["sklearn"] = deps_list("scikit-learn")

extras["tf"] = deps_list("tensorflow", "onnxconverter-common", "keras2onnx")
extras["tf-cpu"] = deps_list("tensorflow-cpu", "onnxconverter-common", "keras2onnx")

extras["torch"] = deps_list("torch")

if os.name == "nt": # windows
extras["retrieval"] = ["datasets"] # faiss is not supported on windows
extras["flax"] = [] # jax is not supported on windows
extras["retrieval"] = deps_list("datasets") # faiss is not supported on windows
extras["flax"] = [] # jax is not supported on windows
else:
extras["retrieval"] = ["faiss-cpu", "datasets"]
extras["flax"] = ["jaxlib==0.1.55", "jax>=0.2.0", "flax==0.2.2"]

extras["tokenizers"] = ["tokenizers==0.9.4"]
extras["onnxruntime"] = ["onnxruntime>=1.4.0", "onnxruntime-tools>=1.4.2"]
extras["modelcreation"] = ["cookiecutter==1.7.2"]
extras["retrieval"] = deps_list("faiss-cpu", "datasets")
extras["flax"] = deps_list("jax", "jaxlib", "flax")

extras["serving"] = ["pydantic", "uvicorn", "fastapi", "starlette"]
extras["tokenizers"] = deps_list("tokenizers")
extras["onnxruntime"] = deps_list("onnxruntime", "onnxruntime-tools")
extras["modelcreation"] = deps_list("cookiecutter")

extras["sentencepiece"] = ["sentencepiece==0.1.91", "protobuf"]
extras["retrieval"] = ["faiss-cpu", "datasets"]
extras["testing"] = ["pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil"] + extras["retrieval"] + extras["modelcreation"]
# sphinx-rtd-theme==0.5.0 introduced big changes in the style.
extras["docs"] = ["recommonmark", "sphinx==3.2.1", "sphinx-markdown-tables", "sphinx-rtd-theme==0.4.3", "sphinx-copybutton"]
extras["quality"] = ["black >= 20.8b1", "isort >= 5.5.4", "flake8 >= 3.8.3"]
extras["serving"] = deps_list("pydantic", "uvicorn", "fastapi", "starlette")

extras["sentencepiece"] = deps_list("sentencepiece", "protobuf")
extras["retrieval"] = deps_list("faiss-cpu", "datasets")
extras["testing"] = (
deps_list("pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil")
+ extras["retrieval"]
+ extras["modelcreation"]
)
extras["docs"] = deps_list("recommonmark", "sphinx", "sphinx-markdown-tables", "sphinx-rtd-theme", "sphinx-copybutton")
extras["quality"] = deps_list("black", "isort", "flake8")

extras["all"] = extras["tf"] + extras["torch"] + extras["flax"] + extras["sentencepiece"] + extras["tokenizers"]

extras["dev"] = extras["all"] + extras["testing"] + extras["quality"] + extras["ja"] + extras["docs"] + extras["sklearn"] + extras["modelcreation"]
extras["dev"] = (
extras["all"]
+ extras["testing"]
+ extras["quality"]
+ extras["ja"]
+ extras["docs"]
+ extras["sklearn"]
+ extras["modelcreation"]
)


# when modifying the following list, make sure to update src/transformers/dependency_versions_check.py
install_requires = [
deps["dataclasses"] + ";python_version<'3.7'", # dataclasses for Python versions that don't have it
deps["filelock"], # filesystem locks, e.g., to prevent parallel downloads
deps["numpy"],
deps["packaging"], # utilities from PyPA to e.g., compare versions
deps["regex"], # for OpenAI GPT
deps["requests"], # for downloading models over HTTPS
deps["sacremoses"], # for XLM
deps["tokenizers"],
deps["tqdm"], # progress bars in model download and training scripts
]

setup(
name="transformers",
version="4.0.0-dev",
Expand All @@ -130,27 +241,10 @@
url="https://github.com/huggingface/transformers",
package_dir={"": "src"},
packages=find_packages("src"),
install_requires=[
"numpy",
"tokenizers == 0.9.4",
# dataclasses for Python versions that don't have it
"dataclasses;python_version<'3.7'",
# utilities from PyPA to e.g. compare versions
"packaging",
# filesystem locks e.g. to prevent parallel downloads
"filelock",
# for downloading models over HTTPS
"requests",
# progress bars in model download and training scripts
"tqdm >= 4.27",
# for OpenAI GPT
"regex != 2019.12.17",
# for XLM
"sacremoses",
],
extras_require=extras,
entry_points={"console_scripts": ["transformers-cli=transformers.commands.transformers_cli:main"]},
python_requires=">=3.6.0",
install_requires=install_requires,
classifiers=[
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
Expand All @@ -163,4 +257,5 @@
"Programming Language :: Python :: 3.7",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
],
cmdclass={"deps_table_update": DepsTableUpdateCommand},
)
21 changes: 12 additions & 9 deletions src/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,7 @@
absl.logging.set_stderrthreshold("info")
absl.logging._warn_preinit_stderr = False

# Integrations: this needs to come before other ml imports
# in order to allow any 3rd-party code to initialize properly
from .integrations import ( # isort:skip
is_comet_available,
is_optuna_available,
is_ray_available,
is_tensorboard_available,
is_wandb_available,
)
from . import dependency_versions_check

# Configuration
from .configuration_utils import PretrainedConfig
Expand Down Expand Up @@ -203,6 +195,17 @@
)


# Integrations: this needs to come before other ml imports
# in order to allow any 3rd-party code to initialize properly
from .integrations import ( # isort:skip
is_comet_available,
is_optuna_available,
is_ray_available,
is_tensorboard_available,
is_wandb_available,
)


if is_sentencepiece_available():
from .models.albert import AlbertTokenizer
from .models.bert_generation import BertGenerationTokenizer
Expand Down
28 changes: 28 additions & 0 deletions src/transformers/dependency_versions_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import sys

from .dependency_versions_table import deps
from .utils.versions import require_version_core


# define which module versions we always want to check at run time
# (usually the ones defined in `install_requires` in setup.py)
#
# order specific notes:
# - tqdm must be checked before tokenizers

pkgs_to_check_at_runtime = "python tqdm regex sacremoses requests packaging filelock numpy tokenizers".split()
if sys.version_info < (3, 7):
pkgs_to_check_at_runtime.append("dataclasses")

for pkg in pkgs_to_check_at_runtime:
if pkg in deps:
if pkg == "tokenizers":
# must be loaded here, or else tqdm check may fail
from .file_utils import is_tokenizers_available

if not is_tokenizers_available():
continue # not required, check version only if installed

require_version_core(deps[pkg])
else:
raise ValueError(f"can't find {pkg} in {deps.keys()}, check dependency_versions_table.py")
Loading