From 17bb53f739e5eee81764e3c780d1bc924ec78a70 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Thu, 8 Apr 2021 14:42:17 -0700 Subject: [PATCH 1/5] make fairscale and deepspeed setup extras --- docs/source/main_classes/trainer.rst | 20 +++++++++++++++++-- setup.py | 4 ++++ src/transformers/dependency_versions_check.py | 6 +++++- src/transformers/dependency_versions_table.py | 2 ++ src/transformers/integrations.py | 4 ++-- src/transformers/trainer.py | 2 ++ src/transformers/utils/versions.py | 6 ++++++ 7 files changed, 39 insertions(+), 5 deletions(-) diff --git a/docs/source/main_classes/trainer.rst b/docs/source/main_classes/trainer.rst index bc9f248827ad6a..d99ca0e69df242 100644 --- a/docs/source/main_classes/trainer.rst +++ b/docs/source/main_classes/trainer.rst @@ -272,7 +272,15 @@ Install the library via pypi: .. code-block:: bash - pip install fairscale + pip install fairscale + +or via ``transformers``' ``extras``: + +.. code-block:: bash + + pip install transformers[fairscale] + +(will become available starting from ``transformers==4.6.0``) or find more details on `the FairScale's GitHub page `__. @@ -417,7 +425,15 @@ Install the library via pypi: .. code-block:: bash - pip install deepspeed + pip install deepspeed + +or via ``transformers``' ``extras``: + +.. code-block:: bash + + pip install transformers[deepspeed] + +(will become available starting from ``transformers==4.6.0``) or find more details on `the DeepSpeed's GitHub page `__ and `advanced install `__. diff --git a/setup.py b/setup.py index c3583a30700980..1142fd19838d86 100644 --- a/setup.py +++ b/setup.py @@ -90,7 +90,9 @@ "cookiecutter==1.7.2", "dataclasses", "datasets", + "deepspeed>0.3.13", "docutils==0.16.0", + "fairscale>0.3", "faiss-cpu", "fastapi", "filelock", @@ -233,6 +235,8 @@ def run(self): extras["modelcreation"] = deps_list("cookiecutter") extras["sagemaker"] = deps_list("sagemaker") +extras["deepspeed"] = deps_list("deepspeed") +extras["fairscale"] = deps_list("fairscale") extras["serving"] = deps_list("pydantic", "uvicorn", "fastapi", "starlette") extras["speech"] = deps_list("soundfile", "torchaudio") diff --git a/src/transformers/dependency_versions_check.py b/src/transformers/dependency_versions_check.py index 7e36aaef3091ba..b33a7e0da1be76 100644 --- a/src/transformers/dependency_versions_check.py +++ b/src/transformers/dependency_versions_check.py @@ -14,7 +14,7 @@ import sys from .dependency_versions_table import deps -from .utils.versions import require_version_core +from .utils.versions import require_version, require_version_core # define which module versions we always want to check at run time @@ -41,3 +41,7 @@ require_version_core(deps[pkg]) else: raise ValueError(f"can't find {pkg} in {deps.keys()}, check dependency_versions_table.py") + + +def dep_version_check(pkg, hint=""): + require_version(deps[pkg], hint) diff --git a/src/transformers/dependency_versions_table.py b/src/transformers/dependency_versions_table.py index 43f4c028feca57..bd070d7bdf254f 100644 --- a/src/transformers/dependency_versions_table.py +++ b/src/transformers/dependency_versions_table.py @@ -7,7 +7,9 @@ "cookiecutter": "cookiecutter==1.7.2", "dataclasses": "dataclasses", "datasets": "datasets", + "deepspeed": "deepspeed>0.3.13", "docutils": "docutils==0.16.0", + "fairscale": "fairscale>0.3", "faiss-cpu": "faiss-cpu", "fastapi": "fastapi", "filelock": "filelock", diff --git a/src/transformers/integrations.py b/src/transformers/integrations.py index 65824c25ca7468..7e4ab0f5c7a100 100644 --- a/src/transformers/integrations.py +++ b/src/transformers/integrations.py @@ -24,8 +24,8 @@ from copy import deepcopy from pathlib import Path +from .dependency_versions_check import dep_version_check from .utils import logging -from .utils.versions import require_version logger = logging.get_logger(__name__) @@ -324,7 +324,7 @@ def deepspeed_parse_config(ds_config): If it's already a dict, return a copy of it, so that we can freely modify it. """ - require_version("deepspeed>0.3.13") + dep_version_check("deepspeed") if isinstance(ds_config, dict): # Don't modify user's data should they want to reuse it (e.g. in tests), because once we diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index dc311643310bf6..90798ac330706b 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -54,6 +54,7 @@ from torch.utils.data.sampler import RandomSampler, SequentialSampler from .data.data_collator import DataCollator, DataCollatorWithPadding, default_data_collator +from .dependency_versions_check import dep_version_check from .file_utils import ( WEIGHTS_NAME, is_apex_available, @@ -139,6 +140,7 @@ import torch_xla.distributed.parallel_loader as pl if is_fairscale_available(): + dep_version_check("fairscale") import fairscale from fairscale.nn.data_parallel import ShardedDataParallel as ShardedDDP from fairscale.optim import OSS diff --git a/src/transformers/utils/versions.py b/src/transformers/utils/versions.py index b573a361b96ff7..73151487bc71f2 100644 --- a/src/transformers/utils/versions.py +++ b/src/transformers/utils/versions.py @@ -60,6 +60,12 @@ def require_version(requirement: str, hint: Optional[str] = None) -> None: Args: requirement (:obj:`str`): pip style definition, e.g., "tokenizers==0.9.4", "tqdm>=4.27", "numpy" hint (:obj:`str`, `optional`): what suggestion to print in case of requirements not being met + + Example:: + + require_version("pandas>1.1.2") + require_version("numpy>1.18.5", "this is important to have for whatever reason") + """ hint = f"\n{hint}" if hint is not None else "" From a86762dc3e203265bfa0fa6a87fc61f1b539cc38 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Thu, 8 Apr 2021 14:47:36 -0700 Subject: [PATCH 2/5] fix default --- src/transformers/dependency_versions_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/dependency_versions_check.py b/src/transformers/dependency_versions_check.py index b33a7e0da1be76..e6e676481d79c9 100644 --- a/src/transformers/dependency_versions_check.py +++ b/src/transformers/dependency_versions_check.py @@ -43,5 +43,5 @@ raise ValueError(f"can't find {pkg} in {deps.keys()}, check dependency_versions_table.py") -def dep_version_check(pkg, hint=""): +def dep_version_check(pkg, hint=None): require_version(deps[pkg], hint) From a29fd4d0f8ae68db0e2bf857701eace54a75e838 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Thu, 8 Apr 2021 15:03:37 -0700 Subject: [PATCH 3/5] Apply suggestions from code review Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> --- docs/source/main_classes/trainer.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/main_classes/trainer.rst b/docs/source/main_classes/trainer.rst index d99ca0e69df242..10a7a9d54aa3bf 100644 --- a/docs/source/main_classes/trainer.rst +++ b/docs/source/main_classes/trainer.rst @@ -272,13 +272,13 @@ Install the library via pypi: .. code-block:: bash - pip install fairscale + pip install fairscale or via ``transformers``' ``extras``: .. code-block:: bash - pip install transformers[fairscale] + pip install transformers[fairscale] (will become available starting from ``transformers==4.6.0``) @@ -425,13 +425,13 @@ Install the library via pypi: .. code-block:: bash - pip install deepspeed + pip install deepspeed or via ``transformers``' ``extras``: .. code-block:: bash - pip install transformers[deepspeed] + pip install transformers[deepspeed] (will become available starting from ``transformers==4.6.0``) From 3459421c0daecc72ce9ca7e1742e24492c3f0951 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Thu, 8 Apr 2021 15:29:47 -0700 Subject: [PATCH 4/5] no reason not to ask for the good version --- src/transformers/trainer.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 90798ac330706b..41800b7fd3a32c 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -142,16 +142,12 @@ if is_fairscale_available(): dep_version_check("fairscale") import fairscale + from fairscale.nn.data_parallel import FullyShardedDataParallel as FullyShardedDDP from fairscale.nn.data_parallel import ShardedDataParallel as ShardedDDP + from fairscale.nn.wrap import auto_wrap from fairscale.optim import OSS from fairscale.optim.grad_scaler import ShardedGradScaler - if version.parse(fairscale.__version__) >= version.parse("0.3"): - from fairscale.nn.data_parallel import FullyShardedDataParallel as FullyShardedDDP - from fairscale.nn.wrap import auto_wrap - else: - FullyShardedDDP = None - if is_sagemaker_dp_enabled(): import smdistributed.dataparallel.torch.distributed as dist from smdistributed.dataparallel.torch.parallel.distributed import DistributedDataParallel as DDP From aa0b87bd28c17b8fd1611243ae5e04464e0cc01b Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Thu, 8 Apr 2021 15:31:16 -0700 Subject: [PATCH 5/5] update the CIs --- .github/workflows/self-scheduled.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index c49a967d2aba72..978d9e02a69d38 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -33,8 +33,7 @@ jobs: run: | apt -y update && apt install -y libsndfile1-dev pip install --upgrade pip - pip install .[sklearn,testing,onnxruntime,sentencepiece,speech] - pip install deepspeed + pip install .[sklearn,testing,onnxruntime,sentencepiece,speech,deepspeed] - name: Are GPUs recognized by our DL frameworks run: | @@ -156,9 +155,7 @@ jobs: run: | apt -y update && apt install -y libsndfile1-dev pip install --upgrade pip - pip install .[sklearn,testing,onnxruntime,sentencepiece,speech] - pip install fairscale - pip install deepspeed + pip install .[sklearn,testing,onnxruntime,sentencepiece,speech,deepspeed,fairscale] - name: Are GPUs recognized by our DL frameworks run: |