From a52270657e439b9d3cc35f2bb2ae26ad1ff44407 Mon Sep 17 00:00:00 2001
From: "Lin, Fanli" <fanli.lin@intel.com>
Date: Mon, 9 Sep 2024 04:10:47 -0400
Subject: [PATCH 1/3] update code

---
 tests/slow/test_dpo_slow.py |  9 ++++++---
 tests/slow/test_sft_slow.py |  4 ++--
 tests/test_ppo_trainer.py   |  4 ++--
 tests/testing_utils.py      | 21 +++++++++++++++++++++
 4 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/tests/slow/test_dpo_slow.py b/tests/slow/test_dpo_slow.py
index 53d6868bd7..f9fea13191 100644
--- a/tests/slow/test_dpo_slow.py
+++ b/tests/slow/test_dpo_slow.py
@@ -24,7 +24,7 @@
 
 from trl import DPOConfig, DPOTrainer, is_peft_available
 
-from ..testing_utils import require_bitsandbytes, require_peft, require_torch_gpu
+from ..testing_utils import require_bitsandbytes, require_peft, require_non_cpu, torch_device
 from .testing_constants import DPO_LOSS_TYPES, DPO_PRECOMPUTE_LOGITS, GRADIENT_CHECKPOINTING_KWARGS, MODELS_TO_TEST
 
 
@@ -32,7 +32,7 @@
     from peft import LoraConfig, PeftModel
 
 
-@require_torch_gpu
+@require_non_cpu
 class DPOTrainerSlowTester(unittest.TestCase):
     def setUp(self):
         self.dataset = load_dataset("trl-internal-testing/mlabonne-chatml-dpo-pairs-copy", split="train[:10%]")
@@ -47,7 +47,10 @@ def setUp(self):
 
     def tearDown(self):
         gc.collect()
-        torch.cuda.empty_cache()
+        if torch_device == "cpu":
+            torch.cuda.empty_cache()
+        elif torch_device == "xpu":
+            torch.xpu.empty_cache()
         gc.collect()
 
     @parameterized.expand(list(itertools.product(MODELS_TO_TEST, DPO_LOSS_TYPES, DPO_PRECOMPUTE_LOGITS)))
diff --git a/tests/slow/test_sft_slow.py b/tests/slow/test_sft_slow.py
index 3151db64f9..a6bd990a62 100644
--- a/tests/slow/test_sft_slow.py
+++ b/tests/slow/test_sft_slow.py
@@ -25,7 +25,7 @@
 from trl import SFTConfig, SFTTrainer, is_peft_available
 from trl.models.utils import setup_chat_format
 
-from ..testing_utils import require_bitsandbytes, require_peft, require_torch_gpu, require_torch_multi_gpu
+from ..testing_utils import require_bitsandbytes, require_peft, require_non_cpu, require_torch_multi_gpu
 from .testing_constants import DEVICE_MAP_OPTIONS, GRADIENT_CHECKPOINTING_KWARGS, MODELS_TO_TEST, PACKING_OPTIONS
 
 
@@ -33,7 +33,7 @@
     from peft import LoraConfig, PeftModel
 
 
-@require_torch_gpu
+@require_non_cpu
 class SFTTrainerSlowTester(unittest.TestCase):
     def setUp(self):
         self.train_dataset = load_dataset("imdb", split="train[:10%]")
diff --git a/tests/test_ppo_trainer.py b/tests/test_ppo_trainer.py
index eb2764420c..425815a51a 100644
--- a/tests/test_ppo_trainer.py
+++ b/tests/test_ppo_trainer.py
@@ -30,7 +30,7 @@
 from trl.core import respond_to_batch
 
 from .testing_constants import CI_HUB_ENDPOINT, CI_HUB_USER
-from .testing_utils import require_peft, require_torch_multi_gpu
+from .testing_utils import require_peft, require_multi_accelerator
 
 
 EXPECTED_STATS = [
@@ -1038,7 +1038,7 @@ def test_push_to_hub(self):
             )
 
     @require_peft
-    @require_torch_multi_gpu
+    @require_multi_accelerator
     def test_peft_model_ppo_trainer_multi_gpu(self):
         from peft import LoraConfig, get_peft_model
         from transformers import AutoModelForCausalLM
diff --git a/tests/testing_utils.py b/tests/testing_utils.py
index 7033218886..539f01406a 100644
--- a/tests/testing_utils.py
+++ b/tests/testing_utils.py
@@ -14,6 +14,7 @@
 import unittest
 
 import torch
+from accelerate.test_utils.testing import get_backend
 
 from trl import (
     is_bitsandbytes_available,
@@ -24,6 +25,8 @@
     is_xpu_available,
 )
 
+torch_device, device_count, memory_allocated_func = get_backend()
+
 
 def require_peft(test_case):
     """
@@ -105,3 +108,21 @@ def require_torch_multi_xpu(test_case):
     if torch.xpu.device_count() < 2 and is_xpu_available():
         test_case = unittest.skip("test requires multiple XPUs")(test_case)
     return test_case
+
+
+def require_non_cpu(test_case):
+    """
+    Decorator marking a test that requires a hardware accelerator backend. These tests are skipped when there are no
+    hardware accelerator available.
+    """
+    return unittest.skipUnless(torch_device != "cpu", "test requires a hardware accelerator")(test_case)
+
+
+def require_multi_accelerator(test_case):
+    """
+    Decorator marking a test that requires multiple hardware accelerators. These tests are skipped on a machine without
+    multiple accelerators.
+    """
+    return unittest.skipUnless(
+        torch_device != "cpu" and device_count > 1, "test requires multiple hardware accelerators"
+    )(test_case)

From 5b4e65890daffaa8402f673fc5410a56a08c26ba Mon Sep 17 00:00:00 2001
From: Fanli Lin <fanli.lin@intel.com>
Date: Mon, 9 Sep 2024 07:37:16 -0700
Subject: [PATCH 2/3] update

---
 tests/slow/test_sft_slow.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/slow/test_sft_slow.py b/tests/slow/test_sft_slow.py
index a6bd990a62..e2269a57a8 100644
--- a/tests/slow/test_sft_slow.py
+++ b/tests/slow/test_sft_slow.py
@@ -25,7 +25,7 @@
 from trl import SFTConfig, SFTTrainer, is_peft_available
 from trl.models.utils import setup_chat_format
 
-from ..testing_utils import require_bitsandbytes, require_peft, require_non_cpu, require_torch_multi_gpu
+from ..testing_utils import require_bitsandbytes, require_peft, require_non_cpu, require_multi_accelerator
 from .testing_constants import DEVICE_MAP_OPTIONS, GRADIENT_CHECKPOINTING_KWARGS, MODELS_TO_TEST, PACKING_OPTIONS
 
 
@@ -264,7 +264,7 @@ def test_sft_trainer_transformers_mp_gc_peft(self, model_name, packing, gradient
     @parameterized.expand(
         list(itertools.product(MODELS_TO_TEST, PACKING_OPTIONS, GRADIENT_CHECKPOINTING_KWARGS, DEVICE_MAP_OPTIONS))
     )
-    @require_torch_multi_gpu
+    @require_multi_accelerator
     def test_sft_trainer_transformers_mp_gc_device_map(
         self, model_name, packing, gradient_checkpointing_kwargs, device_map
     ):

From e0e1eeb59a96cdc058826c8cdd28b1e23f3b4c65 Mon Sep 17 00:00:00 2001
From: Fanli Lin <fanli.lin@intel.com>
Date: Thu, 12 Sep 2024 00:47:45 -0700
Subject: [PATCH 3/3] fix style

---
 tests/slow/test_dpo_slow.py | 2 +-
 tests/slow/test_sft_slow.py | 6 ++----
 tests/test_ppo_trainer.py   | 2 +-
 tests/testing_utils.py      | 5 +++--
 4 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/tests/slow/test_dpo_slow.py b/tests/slow/test_dpo_slow.py
index f9fea13191..11da05fa31 100644
--- a/tests/slow/test_dpo_slow.py
+++ b/tests/slow/test_dpo_slow.py
@@ -24,7 +24,7 @@
 
 from trl import DPOConfig, DPOTrainer, is_peft_available
 
-from ..testing_utils import require_bitsandbytes, require_peft, require_non_cpu, torch_device
+from ..testing_utils import require_bitsandbytes, require_non_cpu, require_peft, torch_device
 from .testing_constants import DPO_LOSS_TYPES, DPO_PRECOMPUTE_LOGITS, GRADIENT_CHECKPOINTING_KWARGS, MODELS_TO_TEST
 
 
diff --git a/tests/slow/test_sft_slow.py b/tests/slow/test_sft_slow.py
index ffb72bdee3..bb429d7155 100644
--- a/tests/slow/test_sft_slow.py
+++ b/tests/slow/test_sft_slow.py
@@ -28,11 +28,9 @@
 from ..testing_utils import (
     require_bitsandbytes,
     require_liger_kernel,
-    require_peft,
-    require_torch_gpu,
-    require_non_cpu, 
     require_multi_accelerator,
-    require_torch_multi_gpu,
+    require_non_cpu,
+    require_peft,
 )
 from .testing_constants import DEVICE_MAP_OPTIONS, GRADIENT_CHECKPOINTING_KWARGS, MODELS_TO_TEST, PACKING_OPTIONS
 
diff --git a/tests/test_ppo_trainer.py b/tests/test_ppo_trainer.py
index 425815a51a..517aa19cca 100644
--- a/tests/test_ppo_trainer.py
+++ b/tests/test_ppo_trainer.py
@@ -30,7 +30,7 @@
 from trl.core import respond_to_batch
 
 from .testing_constants import CI_HUB_ENDPOINT, CI_HUB_USER
-from .testing_utils import require_peft, require_multi_accelerator
+from .testing_utils import require_multi_accelerator, require_peft
 
 
 EXPECTED_STATS = [
diff --git a/tests/testing_utils.py b/tests/testing_utils.py
index 68c632b587..4b41b28a55 100644
--- a/tests/testing_utils.py
+++ b/tests/testing_utils.py
@@ -26,6 +26,7 @@
     is_xpu_available,
 )
 
+
 torch_device, device_count, memory_allocated_func = get_backend()
 
 
@@ -118,8 +119,8 @@ def require_liger_kernel(test_case):
     if not (torch.cuda.is_available() and is_liger_available()):
         test_case = unittest.skip("test requires GPU and liger-kernel")(test_case)
     return test_case
-  
-  
+
+
 def require_non_cpu(test_case):
     """
     Decorator marking a test that requires a hardware accelerator backend. These tests are skipped when there are no