From a52270657e439b9d3cc35f2bb2ae26ad1ff44407 Mon Sep 17 00:00:00 2001 From: "Lin, Fanli" Date: Mon, 9 Sep 2024 04:10:47 -0400 Subject: [PATCH 1/3] update code --- tests/slow/test_dpo_slow.py | 9 ++++++--- tests/slow/test_sft_slow.py | 4 ++-- tests/test_ppo_trainer.py | 4 ++-- tests/testing_utils.py | 21 +++++++++++++++++++++ 4 files changed, 31 insertions(+), 7 deletions(-) diff --git a/tests/slow/test_dpo_slow.py b/tests/slow/test_dpo_slow.py index 53d6868bd7..f9fea13191 100644 --- a/tests/slow/test_dpo_slow.py +++ b/tests/slow/test_dpo_slow.py @@ -24,7 +24,7 @@ from trl import DPOConfig, DPOTrainer, is_peft_available -from ..testing_utils import require_bitsandbytes, require_peft, require_torch_gpu +from ..testing_utils import require_bitsandbytes, require_peft, require_non_cpu, torch_device from .testing_constants import DPO_LOSS_TYPES, DPO_PRECOMPUTE_LOGITS, GRADIENT_CHECKPOINTING_KWARGS, MODELS_TO_TEST @@ -32,7 +32,7 @@ from peft import LoraConfig, PeftModel -@require_torch_gpu +@require_non_cpu class DPOTrainerSlowTester(unittest.TestCase): def setUp(self): self.dataset = load_dataset("trl-internal-testing/mlabonne-chatml-dpo-pairs-copy", split="train[:10%]") @@ -47,7 +47,10 @@ def setUp(self): def tearDown(self): gc.collect() - torch.cuda.empty_cache() + if torch_device == "cpu": + torch.cuda.empty_cache() + elif torch_device == "xpu": + torch.xpu.empty_cache() gc.collect() @parameterized.expand(list(itertools.product(MODELS_TO_TEST, DPO_LOSS_TYPES, DPO_PRECOMPUTE_LOGITS))) diff --git a/tests/slow/test_sft_slow.py b/tests/slow/test_sft_slow.py index 3151db64f9..a6bd990a62 100644 --- a/tests/slow/test_sft_slow.py +++ b/tests/slow/test_sft_slow.py @@ -25,7 +25,7 @@ from trl import SFTConfig, SFTTrainer, is_peft_available from trl.models.utils import setup_chat_format -from ..testing_utils import require_bitsandbytes, require_peft, require_torch_gpu, require_torch_multi_gpu +from ..testing_utils import require_bitsandbytes, require_peft, require_non_cpu, require_torch_multi_gpu from .testing_constants import DEVICE_MAP_OPTIONS, GRADIENT_CHECKPOINTING_KWARGS, MODELS_TO_TEST, PACKING_OPTIONS @@ -33,7 +33,7 @@ from peft import LoraConfig, PeftModel -@require_torch_gpu +@require_non_cpu class SFTTrainerSlowTester(unittest.TestCase): def setUp(self): self.train_dataset = load_dataset("imdb", split="train[:10%]") diff --git a/tests/test_ppo_trainer.py b/tests/test_ppo_trainer.py index eb2764420c..425815a51a 100644 --- a/tests/test_ppo_trainer.py +++ b/tests/test_ppo_trainer.py @@ -30,7 +30,7 @@ from trl.core import respond_to_batch from .testing_constants import CI_HUB_ENDPOINT, CI_HUB_USER -from .testing_utils import require_peft, require_torch_multi_gpu +from .testing_utils import require_peft, require_multi_accelerator EXPECTED_STATS = [ @@ -1038,7 +1038,7 @@ def test_push_to_hub(self): ) @require_peft - @require_torch_multi_gpu + @require_multi_accelerator def test_peft_model_ppo_trainer_multi_gpu(self): from peft import LoraConfig, get_peft_model from transformers import AutoModelForCausalLM diff --git a/tests/testing_utils.py b/tests/testing_utils.py index 7033218886..539f01406a 100644 --- a/tests/testing_utils.py +++ b/tests/testing_utils.py @@ -14,6 +14,7 @@ import unittest import torch +from accelerate.test_utils.testing import get_backend from trl import ( is_bitsandbytes_available, @@ -24,6 +25,8 @@ is_xpu_available, ) +torch_device, device_count, memory_allocated_func = get_backend() + def require_peft(test_case): """ @@ -105,3 +108,21 @@ def require_torch_multi_xpu(test_case): if torch.xpu.device_count() < 2 and is_xpu_available(): test_case = unittest.skip("test requires multiple XPUs")(test_case) return test_case + + +def require_non_cpu(test_case): + """ + Decorator marking a test that requires a hardware accelerator backend. These tests are skipped when there are no + hardware accelerator available. + """ + return unittest.skipUnless(torch_device != "cpu", "test requires a hardware accelerator")(test_case) + + +def require_multi_accelerator(test_case): + """ + Decorator marking a test that requires multiple hardware accelerators. These tests are skipped on a machine without + multiple accelerators. + """ + return unittest.skipUnless( + torch_device != "cpu" and device_count > 1, "test requires multiple hardware accelerators" + )(test_case) From 5b4e65890daffaa8402f673fc5410a56a08c26ba Mon Sep 17 00:00:00 2001 From: Fanli Lin Date: Mon, 9 Sep 2024 07:37:16 -0700 Subject: [PATCH 2/3] update --- tests/slow/test_sft_slow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/slow/test_sft_slow.py b/tests/slow/test_sft_slow.py index a6bd990a62..e2269a57a8 100644 --- a/tests/slow/test_sft_slow.py +++ b/tests/slow/test_sft_slow.py @@ -25,7 +25,7 @@ from trl import SFTConfig, SFTTrainer, is_peft_available from trl.models.utils import setup_chat_format -from ..testing_utils import require_bitsandbytes, require_peft, require_non_cpu, require_torch_multi_gpu +from ..testing_utils import require_bitsandbytes, require_peft, require_non_cpu, require_multi_accelerator from .testing_constants import DEVICE_MAP_OPTIONS, GRADIENT_CHECKPOINTING_KWARGS, MODELS_TO_TEST, PACKING_OPTIONS @@ -264,7 +264,7 @@ def test_sft_trainer_transformers_mp_gc_peft(self, model_name, packing, gradient @parameterized.expand( list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS, GRADIENT_CHECKPOINTING_KWARGS, DEVICE_MAP_OPTIONS)) ) - @require_torch_multi_gpu + @require_multi_accelerator def test_sft_trainer_transformers_mp_gc_device_map( self, model_name, packing, gradient_checkpointing_kwargs, device_map ): From e0e1eeb59a96cdc058826c8cdd28b1e23f3b4c65 Mon Sep 17 00:00:00 2001 From: Fanli Lin Date: Thu, 12 Sep 2024 00:47:45 -0700 Subject: [PATCH 3/3] fix style --- tests/slow/test_dpo_slow.py | 2 +- tests/slow/test_sft_slow.py | 6 ++---- tests/test_ppo_trainer.py | 2 +- tests/testing_utils.py | 5 +++-- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/tests/slow/test_dpo_slow.py b/tests/slow/test_dpo_slow.py index f9fea13191..11da05fa31 100644 --- a/tests/slow/test_dpo_slow.py +++ b/tests/slow/test_dpo_slow.py @@ -24,7 +24,7 @@ from trl import DPOConfig, DPOTrainer, is_peft_available -from ..testing_utils import require_bitsandbytes, require_peft, require_non_cpu, torch_device +from ..testing_utils import require_bitsandbytes, require_non_cpu, require_peft, torch_device from .testing_constants import DPO_LOSS_TYPES, DPO_PRECOMPUTE_LOGITS, GRADIENT_CHECKPOINTING_KWARGS, MODELS_TO_TEST diff --git a/tests/slow/test_sft_slow.py b/tests/slow/test_sft_slow.py index ffb72bdee3..bb429d7155 100644 --- a/tests/slow/test_sft_slow.py +++ b/tests/slow/test_sft_slow.py @@ -28,11 +28,9 @@ from ..testing_utils import ( require_bitsandbytes, require_liger_kernel, - require_peft, - require_torch_gpu, - require_non_cpu, require_multi_accelerator, - require_torch_multi_gpu, + require_non_cpu, + require_peft, ) from .testing_constants import DEVICE_MAP_OPTIONS, GRADIENT_CHECKPOINTING_KWARGS, MODELS_TO_TEST, PACKING_OPTIONS diff --git a/tests/test_ppo_trainer.py b/tests/test_ppo_trainer.py index 425815a51a..517aa19cca 100644 --- a/tests/test_ppo_trainer.py +++ b/tests/test_ppo_trainer.py @@ -30,7 +30,7 @@ from trl.core import respond_to_batch from .testing_constants import CI_HUB_ENDPOINT, CI_HUB_USER -from .testing_utils import require_peft, require_multi_accelerator +from .testing_utils import require_multi_accelerator, require_peft EXPECTED_STATS = [ diff --git a/tests/testing_utils.py b/tests/testing_utils.py index 68c632b587..4b41b28a55 100644 --- a/tests/testing_utils.py +++ b/tests/testing_utils.py @@ -26,6 +26,7 @@ is_xpu_available, ) + torch_device, device_count, memory_allocated_func = get_backend() @@ -118,8 +119,8 @@ def require_liger_kernel(test_case): if not (torch.cuda.is_available() and is_liger_available()): test_case = unittest.skip("test requires GPU and liger-kernel")(test_case) return test_case - - + + def require_non_cpu(test_case): """ Decorator marking a test that requires a hardware accelerator backend. These tests are skipped when there are no