From b47f8f1086928720bc59cec983946472cef8b21a Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Tue, 26 Mar 2024 12:16:40 -0400 Subject: [PATCH 1/8] Docstring to note about zero init --- src/transformers/training_args.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index a52a77e9a766d6..61b7fd766ea53e 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -503,6 +503,11 @@ class TrainingArguments: evolve in the future. The value is either the location of DeepSpeed json config file (e.g., `ds_config.json`) or an already loaded json file as a `dict`" + + If enabling any Zero-init, make sure that your model is not initialized until + *after* initializing the `TrainingArguments`, else it will not be applied. + + accelerator_config (`str`, `dict`, or `AcceleratorConfig`, *optional*): Config to be used with the internal `Accelerator` implementation. The value is either a location of accelerator json config file (e.g., `accelerator_config.json`), an already loaded json file as `dict`, From 02348331baea70aaedce3b76c18481b2b441fe2d Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Tue, 26 Mar 2024 12:51:50 -0400 Subject: [PATCH 2/8] Check for accelerate --- src/transformers/integrations/deepspeed.py | 18 +++++++++++++++--- src/transformers/modeling_utils.py | 20 ++++++++++++++++++-- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/transformers/integrations/deepspeed.py b/src/transformers/integrations/deepspeed.py index b0db718dba016b..eb219eb5e8f49c 100644 --- a/src/transformers/integrations/deepspeed.py +++ b/src/transformers/integrations/deepspeed.py @@ -17,6 +17,7 @@ import copy import importlib.metadata as importlib_metadata import importlib.util +import os import weakref from functools import partialmethod @@ -282,11 +283,22 @@ def unset_hf_deepspeed_config(): _hf_deepspeed_config_weak_ref = None -def is_deepspeed_zero3_enabled(): +def is_deepspeed_zero3_enabled(check_accelerate=False): + """ + If `check_accelerate`, will also check if `deepspeed_zero3` has been enabled through + the `HfDeepSpeedConfig` and if it was enabled through the environment variables + from `accelerate launch`. + """ + accelerate_zero_stage = int(os.environ.get("ACCELERATE_DEEPSPEED_ZERO_STAGE", -1)) + accelerate_zero_init = os.environ.get("ACCELERATE_DEEPSPEED_ZERO3_INIT", "0") if _hf_deepspeed_config_weak_ref is not None and _hf_deepspeed_config_weak_ref() is not None: - return _hf_deepspeed_config_weak_ref().is_zero3() + return _hf_deepspeed_config_weak_ref().is_zero3(), True + # This only gets triggered passively if the user launches code with a configured + # `accelerate launch` + elif check_accelerate and accelerate_zero_stage != -1 and accelerate_zero_init != "0": + return True, False else: - return False + return False, True def deepspeed_config(): diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 263ae5d2f988cf..f543f31cebe30e 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -1312,7 +1312,15 @@ def _from_config(cls, config, **kwargs): torch_dtype=torch_dtype, ) - if is_deepspeed_zero3_enabled(): + deepspeed_enabled, accelerate_enabled = is_deepspeed_zero3_enabled(check_accelerate=True) + + if deepspeed_enabled: + if not accelerate_enabled: + raise ValueError( + "Detected that you want to use `zero-3` Init, but the environment " + "has not been setup yet. Please create `TrainingArguments` before " + "initializing the model." + ) import deepspeed logger.info("Detected DeepSpeed ZeRO-3: activating zero.init() for this model") @@ -3386,7 +3394,15 @@ def from_pretrained( # Instantiate model. init_contexts = [no_init_weights(_enable=_fast_init)] - if is_deepspeed_zero3_enabled() and not is_quantized: + deepspeed_enabled, accelerate_enabled = is_deepspeed_zero3_enabled(check_accelerate=True) + + if deepspeed_enabled and not is_quantized: + if not accelerate_enabled: + raise ValueError( + "Detected that you want to use `zero-3` Init, but the environment " + "has not been setup yet. Please create `TrainingArguments` before " + "initializing the model." + ) import deepspeed logger.info("Detected DeepSpeed ZeRO-3: activating zero.init() for this model") From 125cd16da356321da057228ead3f5b9d74b1bfb9 Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Tue, 26 Mar 2024 12:53:47 -0400 Subject: [PATCH 3/8] Change conditional return --- src/transformers/integrations/deepspeed.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/transformers/integrations/deepspeed.py b/src/transformers/integrations/deepspeed.py index eb219eb5e8f49c..21cca3e4173643 100644 --- a/src/transformers/integrations/deepspeed.py +++ b/src/transformers/integrations/deepspeed.py @@ -292,13 +292,13 @@ def is_deepspeed_zero3_enabled(check_accelerate=False): accelerate_zero_stage = int(os.environ.get("ACCELERATE_DEEPSPEED_ZERO_STAGE", -1)) accelerate_zero_init = os.environ.get("ACCELERATE_DEEPSPEED_ZERO3_INIT", "0") if _hf_deepspeed_config_weak_ref is not None and _hf_deepspeed_config_weak_ref() is not None: - return _hf_deepspeed_config_weak_ref().is_zero3(), True + return _hf_deepspeed_config_weak_ref().is_zero3() # This only gets triggered passively if the user launches code with a configured - # `accelerate launch` + # `accelerate launch` without making `TrainingArguments` elif check_accelerate and accelerate_zero_stage != -1 and accelerate_zero_init != "0": return True, False else: - return False, True + return False def deepspeed_config(): From eb122c7d06a8c0977eaddcad8310155d53dfa19e Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Tue, 26 Mar 2024 13:00:21 -0400 Subject: [PATCH 4/8] Tweak --- src/transformers/integrations/deepspeed.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/transformers/integrations/deepspeed.py b/src/transformers/integrations/deepspeed.py index 21cca3e4173643..9dbcf2187e1062 100644 --- a/src/transformers/integrations/deepspeed.py +++ b/src/transformers/integrations/deepspeed.py @@ -286,8 +286,7 @@ def unset_hf_deepspeed_config(): def is_deepspeed_zero3_enabled(check_accelerate=False): """ If `check_accelerate`, will also check if `deepspeed_zero3` has been enabled through - the `HfDeepSpeedConfig` and if it was enabled through the environment variables - from `accelerate launch`. + the environment variables setup during `accelerate launch`. """ accelerate_zero_stage = int(os.environ.get("ACCELERATE_DEEPSPEED_ZERO_STAGE", -1)) accelerate_zero_init = os.environ.get("ACCELERATE_DEEPSPEED_ZERO3_INIT", "0") From e455bc61bc8ba95c902ca60dd50fa587f6fa349f Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Thu, 28 Mar 2024 12:53:16 -0400 Subject: [PATCH 5/8] Add new accelerate-specific zero3 check --- src/transformers/integrations/deepspeed.py | 20 +++++++++----------- src/transformers/modeling_utils.py | 14 +++++--------- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/src/transformers/integrations/deepspeed.py b/src/transformers/integrations/deepspeed.py index 9dbcf2187e1062..abeebe7eb1674c 100644 --- a/src/transformers/integrations/deepspeed.py +++ b/src/transformers/integrations/deepspeed.py @@ -283,21 +283,19 @@ def unset_hf_deepspeed_config(): _hf_deepspeed_config_weak_ref = None -def is_deepspeed_zero3_enabled(check_accelerate=False): - """ - If `check_accelerate`, will also check if `deepspeed_zero3` has been enabled through - the environment variables setup during `accelerate launch`. - """ - accelerate_zero_stage = int(os.environ.get("ACCELERATE_DEEPSPEED_ZERO_STAGE", -1)) - accelerate_zero_init = os.environ.get("ACCELERATE_DEEPSPEED_ZERO3_INIT", "0") +def is_deepspeed_zero3_enabled(): if _hf_deepspeed_config_weak_ref is not None and _hf_deepspeed_config_weak_ref() is not None: return _hf_deepspeed_config_weak_ref().is_zero3() - # This only gets triggered passively if the user launches code with a configured - # `accelerate launch` without making `TrainingArguments` - elif check_accelerate and accelerate_zero_stage != -1 and accelerate_zero_init != "0": - return True, False else: return False + +def is_deepspeed_zero3_enabled_accelerate(): + "Same as `is_deepspeed_zero3_enabled`, but checks only for accelerate setups" + accelerate_zero_stage = int(os.environ.get("ACCELERATE_DEEPSPEED_ZERO_STAGE", -1)) + accelerate_zero_init = os.environ.get("ACCELERATE_DEEPSPEED_ZERO3_INIT", "0") + # This only gets triggered passively if the user launches code with a configured + # `accelerate launch` without making `TrainingArguments` + return accelerate_zero_stage != -1 and accelerate_zero_init != "0" def deepspeed_config(): diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index f543f31cebe30e..8e16adcaac513a 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -42,7 +42,7 @@ from .configuration_utils import PretrainedConfig from .dynamic_module_utils import custom_object_save from .generation import GenerationConfig, GenerationMixin -from .integrations import PeftAdapterMixin, deepspeed_config, is_deepspeed_zero3_enabled +from .integrations import PeftAdapterMixin, deepspeed_config, is_deepspeed_zero3_enabled, is_deepspeed_zero3_enabled_accelerate from .pytorch_utils import ( # noqa: F401 Conv1D, apply_chunking_to_forward, @@ -1312,10 +1312,8 @@ def _from_config(cls, config, **kwargs): torch_dtype=torch_dtype, ) - deepspeed_enabled, accelerate_enabled = is_deepspeed_zero3_enabled(check_accelerate=True) - - if deepspeed_enabled: - if not accelerate_enabled: + if is_deepspeed_zero3_enabled(): + if not is_deepspeed_zero3_enabled_accelerate(): raise ValueError( "Detected that you want to use `zero-3` Init, but the environment " "has not been setup yet. Please create `TrainingArguments` before " @@ -3394,10 +3392,8 @@ def from_pretrained( # Instantiate model. init_contexts = [no_init_weights(_enable=_fast_init)] - deepspeed_enabled, accelerate_enabled = is_deepspeed_zero3_enabled(check_accelerate=True) - - if deepspeed_enabled and not is_quantized: - if not accelerate_enabled: + if is_deepspeed_zero3_enabled() and not is_quantized: + if not is_deepspeed_zero3_enabled_accelerate(): raise ValueError( "Detected that you want to use `zero-3` Init, but the environment " "has not been setup yet. Please create `TrainingArguments` before " From 93ff7e70eead84ad8b418af92d7a42a996c8370a Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Thu, 28 Mar 2024 13:06:29 -0400 Subject: [PATCH 6/8] Fix import --- src/transformers/integrations/__init__.py | 2 ++ src/transformers/integrations/deepspeed.py | 9 +++++++-- src/transformers/modeling_utils.py | 7 ++++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/transformers/integrations/__init__.py b/src/transformers/integrations/__init__.py index 0dc2975aa963e1..ec663b4e700630 100644 --- a/src/transformers/integrations/__init__.py +++ b/src/transformers/integrations/__init__.py @@ -39,6 +39,7 @@ "deepspeed_optim_sched", "is_deepspeed_available", "is_deepspeed_zero3_enabled", + "is_deepspeed_zero3_enabled_accelerate", "set_hf_deepspeed_config", "unset_hf_deepspeed_config", ], @@ -108,6 +109,7 @@ deepspeed_optim_sched, is_deepspeed_available, is_deepspeed_zero3_enabled, + is_deepspeed_zero3_enabled_accelerate, set_hf_deepspeed_config, unset_hf_deepspeed_config, ) diff --git a/src/transformers/integrations/deepspeed.py b/src/transformers/integrations/deepspeed.py index abeebe7eb1674c..cfcc5a1a28e79c 100644 --- a/src/transformers/integrations/deepspeed.py +++ b/src/transformers/integrations/deepspeed.py @@ -288,9 +288,14 @@ def is_deepspeed_zero3_enabled(): return _hf_deepspeed_config_weak_ref().is_zero3() else: return False - + + def is_deepspeed_zero3_enabled_accelerate(): - "Same as `is_deepspeed_zero3_enabled`, but checks only for accelerate setups" + """ + Will check if the user has enabled ZeRO 3 with accelerate. This is useful + when checking if the environment is configured for it before + the deepspeed config reference has been made. + """ accelerate_zero_stage = int(os.environ.get("ACCELERATE_DEEPSPEED_ZERO_STAGE", -1)) accelerate_zero_init = os.environ.get("ACCELERATE_DEEPSPEED_ZERO3_INIT", "0") # This only gets triggered passively if the user launches code with a configured diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 8e16adcaac513a..528eb3e46b0769 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -42,7 +42,12 @@ from .configuration_utils import PretrainedConfig from .dynamic_module_utils import custom_object_save from .generation import GenerationConfig, GenerationMixin -from .integrations import PeftAdapterMixin, deepspeed_config, is_deepspeed_zero3_enabled, is_deepspeed_zero3_enabled_accelerate +from .integrations import ( + PeftAdapterMixin, + deepspeed_config, + is_deepspeed_zero3_enabled, + is_deepspeed_zero3_enabled_accelerate, +) from .pytorch_utils import ( # noqa: F401 Conv1D, apply_chunking_to_forward, From 5c2370b41983be5fe9bcb0aec8b0e18f4d8a5ef6 Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Tue, 2 Apr 2024 09:10:58 -0400 Subject: [PATCH 7/8] Revert to RTFM --- src/transformers/integrations/__init__.py | 2 -- src/transformers/integrations/deepspeed.py | 14 -------------- src/transformers/modeling_utils.py | 13 ------------- 3 files changed, 29 deletions(-) diff --git a/src/transformers/integrations/__init__.py b/src/transformers/integrations/__init__.py index ec663b4e700630..0dc2975aa963e1 100644 --- a/src/transformers/integrations/__init__.py +++ b/src/transformers/integrations/__init__.py @@ -39,7 +39,6 @@ "deepspeed_optim_sched", "is_deepspeed_available", "is_deepspeed_zero3_enabled", - "is_deepspeed_zero3_enabled_accelerate", "set_hf_deepspeed_config", "unset_hf_deepspeed_config", ], @@ -109,7 +108,6 @@ deepspeed_optim_sched, is_deepspeed_available, is_deepspeed_zero3_enabled, - is_deepspeed_zero3_enabled_accelerate, set_hf_deepspeed_config, unset_hf_deepspeed_config, ) diff --git a/src/transformers/integrations/deepspeed.py b/src/transformers/integrations/deepspeed.py index cfcc5a1a28e79c..b0db718dba016b 100644 --- a/src/transformers/integrations/deepspeed.py +++ b/src/transformers/integrations/deepspeed.py @@ -17,7 +17,6 @@ import copy import importlib.metadata as importlib_metadata import importlib.util -import os import weakref from functools import partialmethod @@ -290,19 +289,6 @@ def is_deepspeed_zero3_enabled(): return False -def is_deepspeed_zero3_enabled_accelerate(): - """ - Will check if the user has enabled ZeRO 3 with accelerate. This is useful - when checking if the environment is configured for it before - the deepspeed config reference has been made. - """ - accelerate_zero_stage = int(os.environ.get("ACCELERATE_DEEPSPEED_ZERO_STAGE", -1)) - accelerate_zero_init = os.environ.get("ACCELERATE_DEEPSPEED_ZERO3_INIT", "0") - # This only gets triggered passively if the user launches code with a configured - # `accelerate launch` without making `TrainingArguments` - return accelerate_zero_stage != -1 and accelerate_zero_init != "0" - - def deepspeed_config(): if _hf_deepspeed_config_weak_ref is not None and _hf_deepspeed_config_weak_ref() is not None: return _hf_deepspeed_config_weak_ref().config diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 528eb3e46b0769..7f9c47f2d235b0 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -46,7 +46,6 @@ PeftAdapterMixin, deepspeed_config, is_deepspeed_zero3_enabled, - is_deepspeed_zero3_enabled_accelerate, ) from .pytorch_utils import ( # noqa: F401 Conv1D, @@ -1318,12 +1317,6 @@ def _from_config(cls, config, **kwargs): ) if is_deepspeed_zero3_enabled(): - if not is_deepspeed_zero3_enabled_accelerate(): - raise ValueError( - "Detected that you want to use `zero-3` Init, but the environment " - "has not been setup yet. Please create `TrainingArguments` before " - "initializing the model." - ) import deepspeed logger.info("Detected DeepSpeed ZeRO-3: activating zero.init() for this model") @@ -3398,12 +3391,6 @@ def from_pretrained( init_contexts = [no_init_weights(_enable=_fast_init)] if is_deepspeed_zero3_enabled() and not is_quantized: - if not is_deepspeed_zero3_enabled_accelerate(): - raise ValueError( - "Detected that you want to use `zero-3` Init, but the environment " - "has not been setup yet. Please create `TrainingArguments` before " - "initializing the model." - ) import deepspeed logger.info("Detected DeepSpeed ZeRO-3: activating zero.init() for this model") From 1740e29360857edccb2ebf5965bef0a1c81aa512 Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Wed, 3 Apr 2024 10:10:19 -0400 Subject: [PATCH 8/8] Update src/transformers/modeling_utils.py Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --- src/transformers/modeling_utils.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 7f9c47f2d235b0..263ae5d2f988cf 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -42,11 +42,7 @@ from .configuration_utils import PretrainedConfig from .dynamic_module_utils import custom_object_save from .generation import GenerationConfig, GenerationMixin -from .integrations import ( - PeftAdapterMixin, - deepspeed_config, - is_deepspeed_zero3_enabled, -) +from .integrations import PeftAdapterMixin, deepspeed_config, is_deepspeed_zero3_enabled from .pytorch_utils import ( # noqa: F401 Conv1D, apply_chunking_to_forward,