huggingface · lvwerra · Oct 31, 2023 · Oct 6, 2023 · Oct 6, 2023 · Oct 6, 2023
diff --git a/examples/research_projects/stack_llama_2/scripts/sft_llama2.py b/examples/research_projects/stack_llama_2/scripts/sft_llama2.py
@@ -9,6 +9,7 @@
 from peft import AutoPeftModelForCausalLM, LoraConfig
 from tqdm import tqdm
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments
+from accelerate.utils import is_xpu_available
 
 from trl import SFTTrainer
 from trl.trainer import ConstantLengthDataset
@@ -208,7 +209,10 @@ def create_datasets(tokenizer, args):
 
 # Free memory for merging weights
 del base_model
-torch.cuda.empty_cache()
+if is_xpu_available():
+    torch.xpu.empty_cache()
+else:
+    torch.cuda.empty_cache()
 
 model = AutoPeftModelForCausalLM.from_pretrained(output_dir, device_map="auto", torch_dtype=torch.bfloat16)
 model = model.merge_and_unload()

diff --git a/examples/research_projects/tools/python_interpreter.py b/examples/research_projects/tools/python_interpreter.py
@@ -152,7 +152,7 @@ def solution():
     log_with="wandb",
     tracker_project_name="trl-gsm8k",
     remove_unused_columns=False,
-    optimize_cuda_cache=True,
+    optimize_device_cache=True,
 )
 
 ppo_trainer = PPOTrainer(config=ppo_config, model=model, tokenizer=tokenizer, dataset=ds)

diff --git a/examples/research_projects/tools/triviaqa.py b/examples/research_projects/tools/triviaqa.py
@@ -104,7 +104,7 @@ class ScriptArguments:
     ppo_epochs=args.ppo_epochs,
     gradient_accumulation_steps=args.gradient_accumulation_steps,
     seed=args.seed,
-    optimize_cuda_cache=True,
+    optimize_device_cache=True,
 )
 ppo_trainer = PPOTrainer(config=config, model=model, tokenizer=tokenizer)
 dataset = load_dataset("trivia_qa", "rc", split="train")

diff --git a/examples/research_projects/toxicity/scripts/evaluate-toxicity.py b/examples/research_projects/toxicity/scripts/evaluate-toxicity.py
@@ -7,7 +7,7 @@
 from datasets import load_dataset
 from tqdm import tqdm
 from transformers import AutoModelForCausalLM, AutoTokenizer
-
+from accelerate.utils import is_xpu_available
 
 toxicity = evaluate.load("ybelkada/toxicity", "DaNLP/da-electra-hatespeech-detection", module_type="measurement")
 ds = load_dataset("OxAISH-AL-LLM/wiki_toxic", split="test")
@@ -50,7 +50,10 @@
 output_file = args.output_file
 max_new_tokens = args.max_new_tokens
 context_length = args.context_length
-device = torch.cuda.current_device() if torch.cuda.is_available() else "cpu"
+if is_xpu_available():
+    device = torch.xpu.current_device()
+else:
+    device = torch.cuda.current_device() if torch.cuda.is_available() else "cpu"
 
 # consider only toxic prompts
 ds = ds.filter(lambda x: x["label"] == 1)
@@ -116,7 +119,10 @@
     print(f"Model: {model_id} - Mean: {mean} - Std: {std}")
 
     model = None
-    torch.cuda.empty_cache()
+    if is_xpu_available():
+        torch.xpu.empty_cache()
+    else:
+        torch.cuda.empty_cache()
 
 # close file
 file.close()
diff --git a/examples/scripts/multi_adapter_rl.py b/examples/scripts/multi_adapter_rl.py
@@ -76,7 +76,7 @@ def collator(data):
     batch_size=8,
     mini_batch_size=2,
     gradient_accumulation_steps=2,
-    optimize_cuda_cache=True,
+    optimize_device_cache=True,
 )
 
 ppo_trainer = PPOTrainer(

diff --git a/examples/scripts/sentiment_tuning.py b/examples/scripts/sentiment_tuning.py
@@ -18,6 +18,7 @@
 import torch
 import tyro
 from accelerate import Accelerator
+from accelerate.utils import is_xpu_available
 from datasets import load_dataset
 from peft import LoraConfig
 from tqdm import tqdm
@@ -153,7 +154,10 @@ def collator(data):
 # to the same device as the PPOTrainer.
 device = ppo_trainer.accelerator.device
 if ppo_trainer.accelerator.num_processes == 1:
-    device = 0 if torch.cuda.is_available() else "cpu"  # to avoid a `pipeline` bug
+    if is_xpu_available():
+        device = "xpu:0"
+    else:
+        device = 0 if torch.cuda.is_available() else "cpu"  # to avoid a `pipeline` bug
 ds_plugin = ppo_trainer.accelerator.state.deepspeed_plugin
 task, model_name = args.ppo_config.reward_model.split(":")
 if ds_plugin is not None and ds_plugin.is_zero3_init_enabled():

diff --git a/examples/scripts/stable_diffusion_tuning.py b/examples/scripts/stable_diffusion_tuning.py
@@ -22,6 +22,7 @@
 from huggingface_hub import hf_hub_download
 from huggingface_hub.utils import EntryNotFoundError
 from transformers import CLIPModel, CLIPProcessor
+from accelerate.utils import is_xpu_available
 
 from trl import DDPOConfig, DDPOTrainer, DefaultDDPOStableDiffusionPipeline
 
@@ -82,7 +83,8 @@ def aesthetic_scorer(hub_model_id, model_filename):
         model_id=hub_model_id,
         model_filename=model_filename,
         dtype=torch.float32,
-    ).cuda()
+    )
+    scorer = scorer.xpu() if is_xpu_available() or scorer.cuda() 
 
     def _fn(images, prompts, metadata):
         images = (images * 255).round().clamp(0, 255).to(torch.uint8)

diff --git a/trl/core.py b/trl/core.py
@@ -21,7 +21,7 @@
 import torch.nn.functional as F
 from torch.nn.utils.rnn import pad_sequence
 from transformers import top_k_top_p_filtering
-
+from accelerate.utils import is_xpu_available
 
 try:
     from collections.abc import Mapping
@@ -240,7 +240,10 @@ def set_seed(seed: int):
     random.seed(seed)
     np.random.seed(seed)
     torch.manual_seed(seed)
-    torch.cuda.manual_seed_all(seed)
+    if is_xpu_available():
+        torch.xpu.manual_seed_all(seed)
+    else:
+        torch.cuda.manual_seed_all(seed)
 
 
 class LengthSampler:
@@ -257,12 +260,19 @@ def __call__(self):
 
 class PPODecorators(object):
     optimize_cuda_cache = False
+    optimize_xpu_cache = False
 
     @classmethod
     @contextmanager
-    def empty_cuda_cache(cls):
+    def empty_device_cache(cls):
         yield
-        if cls.optimize_cuda_cache and torch.cuda.is_available():
-            gc.collect()
-            torch.cuda.empty_cache()
-            gc.collect()
+        if is_xpu_available():
+            if cls.optimize_xpu_cache and torch.xpu.is_available():
+                gc.collect()
+                torch.xpu.empty_cache()
+                gc.collect()
+        else:
+            if cls.optimize_cuda_cache and torch.cuda.is_available():
+                gc.collect()
+                torch.cuda.empty_cache()
+                gc.collect()
diff --git a/trl/models/modeling_base.py b/trl/models/modeling_base.py
@@ -19,6 +19,7 @@
 import torch
 import torch.nn as nn
 from accelerate import Accelerator
+from accelerate.utils import is_xpu_available
 from huggingface_hub import hf_hub_download
 from huggingface_hub.utils import EntryNotFoundError, HFValidationError, LocalEntryNotFoundError
 from transformers import PreTrainedModel
@@ -328,7 +329,7 @@ def _get_current_device(cls):
                 The current device.
         """
         dummy_accelerator = Accelerator()
-        return dummy_accelerator.local_process_index if torch.cuda.is_available() else "cpu"
+        return dummy_accelerator.local_process_index if (torch.cuda.is_available() or is_xpu_available()) else "cpu"
 
     @classmethod
     def _split_kwargs(cls, kwargs):

diff --git a/trl/trainer/ppo_config.py b/trl/trainer/ppo_config.py
@@ -100,7 +100,7 @@ class PPOConfig:
     """Maximum gradient norm for gradient clipping"""
     seed: int = 0
     """Seed value for random generations"""
-    optimize_cuda_cache: bool = False
+    optimize_device_cache: bool = False
     """Optimize CUDA cache for slightly more memory-efficient training"""
     early_stopping: bool = False
     """Whether to stop the PPO optimization loop early is the KL too high"""

diff --git a/trl/trainer/ppo_trainer.py b/trl/trainer/ppo_trainer.py
@@ -24,7 +24,7 @@
 import torch
 import torch.nn.functional as F
 from accelerate import Accelerator
-from accelerate.utils import ProjectConfiguration, is_deepspeed_available
+from accelerate.utils import ProjectConfiguration, is_deepspeed_available, is_xpu_available
 from datasets import Dataset
 from huggingface_hub import whoami
 from packaging import version
@@ -341,9 +341,12 @@ def __init__(
         if not getattr(self.model, "is_sequential_parallel", False):
             self.current_device = self.accelerator.device
         else:
-            self.current_device = torch.device("cuda:0")
+            if is_xpu_available():
+                self.current_device = torch.device("xpu:0")
+            else:
+                self.current_device = torch.device("cuda:0")
 
-        PPODecorators.optimize_cuda_cache = self.config.optimize_cuda_cache
+        PPODecorators.optimize_device_cache = self.config.optimize_device_cache
 
         self.running = RunningMoments(self.accelerator)
 
@@ -576,7 +579,7 @@ def _step_safety_checker(
 
         return queries, responses, scores, masks
 
-    @PPODecorators.empty_cuda_cache()
+    @PPODecorators.empty_device_cache()
     def step(
         self,
         queries: List[torch.LongTensor],
@@ -909,7 +912,7 @@ def prepare_model_inputs(self, queries: torch.Tensor, responses: torch.Tensor):
         input_data.pop("labels", None)  # we don't want to compute LM losses
         return input_data
 
-    @PPODecorators.empty_cuda_cache()
+    @PPODecorators.empty_device_cache()
     def batched_forward_pass(
         self,
         model: PreTrainedModelWrapper,
@@ -1000,7 +1003,7 @@ def batched_forward_pass(
             torch.cat(all_masks)[:, :-1],
         )
 
-    @PPODecorators.empty_cuda_cache()
+    @PPODecorators.empty_device_cache()
     def train_minibatch(
         self,
         old_logprobs: torch.FloatTensor,