From be3b2c13f007a6f08f4385179b20540bd95625a2 Mon Sep 17 00:00:00 2001 From: Marc Sun Date: Mon, 12 Aug 2024 16:02:16 +0200 Subject: [PATCH 1/8] Update min version of accelerate to 0.26.0 --- setup.py | 2 +- src/transformers/dependency_versions_table.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index e79cd46197779e..e49b40ac971581 100644 --- a/setup.py +++ b/setup.py @@ -96,7 +96,7 @@ # 2. once modified, run: `make deps_table_update` to update src/transformers/dependency_versions_table.py _deps = [ "Pillow>=10.0.1,<=15.0", - "accelerate>=0.21.0", + "accelerate>=0.26.0", "av==9.2.0", # Latest version of PyAV (10.0.0) has issues with audio stream. "beautifulsoup4", "codecarbon==1.2.0", diff --git a/src/transformers/dependency_versions_table.py b/src/transformers/dependency_versions_table.py index 7644d8d68d1696..68e76612887924 100644 --- a/src/transformers/dependency_versions_table.py +++ b/src/transformers/dependency_versions_table.py @@ -3,7 +3,7 @@ # 2. run `make deps_table_update`` deps = { "Pillow": "Pillow>=10.0.1,<=15.0", - "accelerate": "accelerate>=0.21.0", + "accelerate": "accelerate>=0.26.0", "av": "av==9.2.0", "beautifulsoup4": "beautifulsoup4", "codecarbon": "codecarbon==1.2.0", From 6a1f969739782c034d363421941e18136cf5f242 Mon Sep 17 00:00:00 2001 From: Marc Sun Date: Mon, 12 Aug 2024 17:44:12 +0200 Subject: [PATCH 2/8] dev-ci From bce434b78bef0fddabcb09a4f31893ec32cb462f Mon Sep 17 00:00:00 2001 From: Marc Sun Date: Mon, 12 Aug 2024 17:52:48 +0200 Subject: [PATCH 3/8] update min version in import --- src/transformers/utils/import_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py index 9b4be00ee8ddab..ac58b1ece9202e 100755 --- a/src/transformers/utils/import_utils.py +++ b/src/transformers/utils/import_utils.py @@ -87,7 +87,7 @@ def _is_package_available(pkg_name: str, return_version: bool = False) -> Union[ # This is the version of torch required to run torch.fx features and torch.onnx with dictionary inputs. TORCH_FX_REQUIRED_VERSION = version.parse("1.10") -ACCELERATE_MIN_VERSION = "0.21.0" +ACCELERATE_MIN_VERSION = "0.26.0" FSDP_MIN_VERSION = "1.12.0" XLA_FSDPV2_MIN_VERSION = "2.2.0" From 2fa98fa45987752443b13f43b2d0f6e8159de748 Mon Sep 17 00:00:00 2001 From: Marc Sun Date: Mon, 12 Aug 2024 17:56:47 +0200 Subject: [PATCH 4/8] remove useless check --- src/transformers/trainer.py | 17 ++++++++--------- src/transformers/training_args.py | 6 +----- tests/fsdp/test_fsdp.py | 6 +----- 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 85d31be09a6c63..1ac8782a0ed2df 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -4671,16 +4671,15 @@ def create_accelerator_and_postprocess(self): fsdp_plugin.limit_all_gathers = self.args.fsdp_config.get( "limit_all_gathers", fsdp_plugin.limit_all_gathers ) - if is_accelerate_available("0.23.0"): - fsdp_plugin.activation_checkpointing = self.args.fsdp_config.get( - "activation_checkpointing", fsdp_plugin.activation_checkpointing + fsdp_plugin.activation_checkpointing = self.args.fsdp_config.get( + "activation_checkpointing", fsdp_plugin.activation_checkpointing + ) + if fsdp_plugin.activation_checkpointing and self.args.gradient_checkpointing: + raise ValueError( + "The activation_checkpointing in FSDP config and the gradient_checkpointing in training arg " + "can't be set to True simultaneously. Please use FSDP's activation_checkpointing logic " + "when using FSDP." ) - if fsdp_plugin.activation_checkpointing and self.args.gradient_checkpointing: - raise ValueError( - "The activation_checkpointing in FSDP config and the gradient_checkpointing in training arg " - "can't be set to True simultaneously. Please use FSDP's activation_checkpointing logic " - "when using FSDP." - ) if self.is_deepspeed_enabled and getattr(self.args, "hf_deepspeed_config", None) is None: self.propagate_args_to_deepspeed() diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index 57605fd945c72c..66191fbb9943e9 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -1913,11 +1913,7 @@ def __post_init__(self): for fsdp_option in self.fsdp: if fsdp_option.upper() in FSDP_SHARDING_STRATEGY: # set environment variable for FSDP sharding strategy - os.environ[f"{prefix}SHARDING_STRATEGY"] = ( - str(FSDP_SHARDING_STRATEGY.index(fsdp_option.upper()) + 1) - if is_accelerate_available("0.26.0") - else fsdp_option.upper() - ) + os.environ[f"{prefix}SHARDING_STRATEGY"] = str(FSDP_SHARDING_STRATEGY.index(fsdp_option.upper()) + 1) elif fsdp_option == FSDPOption.OFFLOAD: os.environ[f"{prefix}OFFLOAD_PARAMS"] = "true" elif fsdp_option == FSDPOption.AUTO_WRAP: diff --git a/tests/fsdp/test_fsdp.py b/tests/fsdp/test_fsdp.py index ff5bd851069738..7e14cc8c9e6fc9 100644 --- a/tests/fsdp/test_fsdp.py +++ b/tests/fsdp/test_fsdp.py @@ -196,11 +196,7 @@ def test_fsdp_config_transformers_auto_wrap(self, sharding_strategy, dtype): self.assertEqual(trainer.args.fsdp[0], sharding_strategy) self.assertEqual(trainer.args.fsdp[1], FSDPOption.OFFLOAD) self.assertEqual(trainer.args.fsdp[2], FSDPOption.AUTO_WRAP) - fsdp_sharding_strategy = ( - str(FSDP_SHARDING_STRATEGY.index(sharding_strategy.upper()) + 1) - if is_accelerate_available("0.26.0") - else sharding_strategy.upper() - ) + fsdp_sharding_strategy = str(FSDP_SHARDING_STRATEGY.index(sharding_strategy.upper()) + 1) self.assertEqual(os.environ[f"{prefix}SHARDING_STRATEGY"], fsdp_sharding_strategy) self.assertEqual(os.environ[f"{prefix}OFFLOAD_PARAMS"], "true") self.assertEqual(os.environ[f"{prefix}AUTO_WRAP_POLICY"], "TRANSFORMER_BASED_WRAP") From e58683408b03a740207a34515afa4a3e8b986c1b Mon Sep 17 00:00:00 2001 From: Marc Sun Date: Mon, 12 Aug 2024 17:57:10 +0200 Subject: [PATCH 5/8] dev-ci From bddc0d53b016429864bff99a078f1b2b5e25f524 Mon Sep 17 00:00:00 2001 From: Marc Sun Date: Mon, 12 Aug 2024 17:57:27 +0200 Subject: [PATCH 6/8] style --- src/transformers/training_args.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index 66191fbb9943e9..d30a8cf81d7d48 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -1913,7 +1913,9 @@ def __post_init__(self): for fsdp_option in self.fsdp: if fsdp_option.upper() in FSDP_SHARDING_STRATEGY: # set environment variable for FSDP sharding strategy - os.environ[f"{prefix}SHARDING_STRATEGY"] = str(FSDP_SHARDING_STRATEGY.index(fsdp_option.upper()) + 1) + os.environ[f"{prefix}SHARDING_STRATEGY"] = str( + FSDP_SHARDING_STRATEGY.index(fsdp_option.upper()) + 1 + ) elif fsdp_option == FSDPOption.OFFLOAD: os.environ[f"{prefix}OFFLOAD_PARAMS"] = "true" elif fsdp_option == FSDPOption.AUTO_WRAP: From 6a0ffae5e01c7ef8bfa311a1c91a3e40c27165ab Mon Sep 17 00:00:00 2001 From: Marc Sun Date: Mon, 12 Aug 2024 17:57:42 +0200 Subject: [PATCH 7/8] dev-ci From 1efa1124799026afb6ce8081d56ca9f63c90df5d Mon Sep 17 00:00:00 2001 From: Marc Sun Date: Tue, 13 Aug 2024 15:50:26 +0200 Subject: [PATCH 8/8] dev-ci