From 4bc7d64083a46e139bd04f92dbe01891ea28ce83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?=
 <quentin.gallouedec@huggingface.co>
Date: Fri, 4 Oct 2024 14:33:10 +0000
Subject: [PATCH] Unslosh tag

---
 tests/test_utils.py                  |  2 +-
 trl/trainer/alignprop_trainer.py     |  7 +++++++
 trl/trainer/bco_trainer.py           |  7 +++++++
 trl/trainer/cpo_trainer.py           |  7 +++++++
 trl/trainer/ddpo_trainer.py          |  7 +++++++
 trl/trainer/dpo_trainer.py           |  7 +++++++
 trl/trainer/gkd_trainer.py           |  7 +++++++
 trl/trainer/iterative_sft_trainer.py |  7 +++++++
 trl/trainer/kto_trainer.py           |  7 +++++++
 trl/trainer/nash_md_trainer.py       |  7 +++++++
 trl/trainer/online_dpo_trainer.py    |  7 +++++++
 trl/trainer/orpo_trainer.py          |  7 +++++++
 trl/trainer/ppov2_trainer.py         |  7 +++++++
 trl/trainer/reward_trainer.py        |  7 +++++++
 trl/trainer/rloo_trainer.py          |  7 +++++++
 trl/trainer/sft_trainer.py           | 24 +++++++-----------------
 trl/trainer/utils.py                 |  8 ++------
 trl/trainer/xpo_trainer.py           |  7 +++++++
 18 files changed, 115 insertions(+), 24 deletions(-)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index d23e18c841..e79edc755f 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -158,7 +158,7 @@ def test_val_none(self):
             model_name="my_model",
             hub_model_id="username/my_hub_model",
             dataset_name=None,
-            tags=None,
+            tags=[],
             wandb_url=None,
             trainer_name="My Trainer",
             trainer_citation=None,
diff --git a/trl/trainer/alignprop_trainer.py b/trl/trainer/alignprop_trainer.py
index 19342597da..84776a026b 100644
--- a/trl/trainer/alignprop_trainer.py
+++ b/trl/trainer/alignprop_trainer.py
@@ -415,6 +415,13 @@ def create_model_card(
         else:
             base_model = None
 
+        tags = tags or []
+        if isinstance(tags, str):
+            tags = [tags]
+
+        if hasattr(self.model.config, "unsloth_version"):
+            tags.append("unsloth")
+
         citation = textwrap.dedent("""\
         @article{prabhudesai2024aligning,
             title        = {{Aligning Text-to-Image Diffusion Models with Reward Backpropagation}},
diff --git a/trl/trainer/bco_trainer.py b/trl/trainer/bco_trainer.py
index a836e3b928..598bbab8bf 100644
--- a/trl/trainer/bco_trainer.py
+++ b/trl/trainer/bco_trainer.py
@@ -1475,6 +1475,13 @@ def create_model_card(
         else:
             base_model = None
 
+        tags = tags or []
+        if isinstance(tags, str):
+            tags = [tags]
+
+        if hasattr(self.model.config, "unsloth_version"):
+            tags.append("unsloth")
+
         citation = textwrap.dedent("""\
         @article{jung2024binary,
             title        = {{Binary Classifier Optimization for Large Language Model Alignment}},
diff --git a/trl/trainer/cpo_trainer.py b/trl/trainer/cpo_trainer.py
index 738f563323..363e304e56 100644
--- a/trl/trainer/cpo_trainer.py
+++ b/trl/trainer/cpo_trainer.py
@@ -997,6 +997,13 @@ def create_model_card(
         else:
             base_model = None
 
+        tags = tags or []
+        if isinstance(tags, str):
+            tags = [tags]
+
+        if hasattr(self.model.config, "unsloth_version"):
+            tags.append("unsloth")
+
         citation = textwrap.dedent("""\
         @inproceedings{xu2024contrastive,
             title        = {{Contrastive Preference Optimization: Pushing the Boundaries of LLM Performance in Machine Translation}},
diff --git a/trl/trainer/ddpo_trainer.py b/trl/trainer/ddpo_trainer.py
index df6cd94af1..412a461a30 100644
--- a/trl/trainer/ddpo_trainer.py
+++ b/trl/trainer/ddpo_trainer.py
@@ -617,6 +617,13 @@ def create_model_card(
         else:
             base_model = None
 
+        tags = tags or []
+        if isinstance(tags, str):
+            tags = [tags]
+
+        if hasattr(self.model.config, "unsloth_version"):
+            tags.append("unsloth")
+
         citation = textwrap.dedent("""\
         @inproceedings{black2024training,
             title        = {{Training Diffusion Models with Reinforcement Learning}},
diff --git a/trl/trainer/dpo_trainer.py b/trl/trainer/dpo_trainer.py
index bc17ffdf39..d5e57fdba1 100644
--- a/trl/trainer/dpo_trainer.py
+++ b/trl/trainer/dpo_trainer.py
@@ -1715,6 +1715,13 @@ def create_model_card(
         else:
             base_model = None
 
+        tags = tags or []
+        if isinstance(tags, str):
+            tags = [tags]
+
+        if hasattr(self.model.config, "unsloth_version"):
+            tags.append("unsloth")
+
         citation = textwrap.dedent("""\
         @inproceedings{rafailov2023direct,
             title        = {{Direct Preference Optimization: Your Language Model is Secretly a Reward Model}},
diff --git a/trl/trainer/gkd_trainer.py b/trl/trainer/gkd_trainer.py
index 1dc96b4135..c0b9242768 100644
--- a/trl/trainer/gkd_trainer.py
+++ b/trl/trainer/gkd_trainer.py
@@ -300,6 +300,13 @@ def create_model_card(
         else:
             base_model = None
 
+        tags = tags or []
+        if isinstance(tags, str):
+            tags = [tags]
+
+        if hasattr(self.model.config, "unsloth_version"):
+            tags.append("unsloth")
+
         citation = textwrap.dedent("""\
         @inproceedings{agarwal2024on-policy,
             title        = {{On-Policy Distillation of Language Models: Learning from Self-Generated Mistakes}},
diff --git a/trl/trainer/iterative_sft_trainer.py b/trl/trainer/iterative_sft_trainer.py
index 0e0d19f08e..c6cdeb158d 100644
--- a/trl/trainer/iterative_sft_trainer.py
+++ b/trl/trainer/iterative_sft_trainer.py
@@ -415,6 +415,13 @@ def create_model_card(
         else:
             base_model = None
 
+        tags = tags or []
+        if isinstance(tags, str):
+            tags = [tags]
+
+        if hasattr(self.model.config, "unsloth_version"):
+            tags.append("unsloth")
+
         model_card = generate_model_card(
             base_model=base_model,
             model_name=model_name,
diff --git a/trl/trainer/kto_trainer.py b/trl/trainer/kto_trainer.py
index b670db9c6e..e2495d63b9 100644
--- a/trl/trainer/kto_trainer.py
+++ b/trl/trainer/kto_trainer.py
@@ -1453,6 +1453,13 @@ def create_model_card(
         else:
             base_model = None
 
+        tags = tags or []
+        if isinstance(tags, str):
+            tags = [tags]
+
+        if hasattr(self.model.config, "unsloth_version"):
+            tags.append("unsloth")
+
         citation = textwrap.dedent("""\
         @article{ethayarajh2024kto,
             title        = {{KTO: Model Alignment as Prospect Theoretic Optimization}},
diff --git a/trl/trainer/nash_md_trainer.py b/trl/trainer/nash_md_trainer.py
index b15c0d5c59..ad46ad18df 100644
--- a/trl/trainer/nash_md_trainer.py
+++ b/trl/trainer/nash_md_trainer.py
@@ -413,6 +413,13 @@ def create_model_card(
         else:
             base_model = None
 
+        tags = tags or []
+        if isinstance(tags, str):
+            tags = [tags]
+
+        if hasattr(self.model.config, "unsloth_version"):
+            tags.append("unsloth")
+
         citation = textwrap.dedent("""\
         @inproceedings{munos2024nash,
             title        = {Nash Learning from Human Feedback},
diff --git a/trl/trainer/online_dpo_trainer.py b/trl/trainer/online_dpo_trainer.py
index f495f5af1c..d9db374b87 100644
--- a/trl/trainer/online_dpo_trainer.py
+++ b/trl/trainer/online_dpo_trainer.py
@@ -573,6 +573,13 @@ def create_model_card(
         else:
             base_model = None
 
+        tags = tags or []
+        if isinstance(tags, str):
+            tags = [tags]
+
+        if hasattr(self.model.config, "unsloth_version"):
+            tags.append("unsloth")
+
         citation = textwrap.dedent("""\
         @article{guo2024direct,
             title        = {{Direct Language Model Alignment from Online AI Feedback}},
diff --git a/trl/trainer/orpo_trainer.py b/trl/trainer/orpo_trainer.py
index 7aa991ad93..c18c92d345 100644
--- a/trl/trainer/orpo_trainer.py
+++ b/trl/trainer/orpo_trainer.py
@@ -1019,6 +1019,13 @@ def create_model_card(
         else:
             base_model = None
 
+        tags = tags or []
+        if isinstance(tags, str):
+            tags = [tags]
+
+        if hasattr(self.model.config, "unsloth_version"):
+            tags.append("unsloth")
+
         citation = textwrap.dedent("""\
         @article{hong2024orpo,
             title        = {{ORPO: Monolithic Preference Optimization without Reference Model}},
diff --git a/trl/trainer/ppov2_trainer.py b/trl/trainer/ppov2_trainer.py
index e6fb9d70ef..63dec548dc 100644
--- a/trl/trainer/ppov2_trainer.py
+++ b/trl/trainer/ppov2_trainer.py
@@ -673,6 +673,13 @@ def create_model_card(
         else:
             base_model = None
 
+        tags = tags or []
+        if isinstance(tags, str):
+            tags = [tags]
+
+        if hasattr(self.model.config, "unsloth_version"):
+            tags.append("unsloth")
+
         citation = textwrap.dedent("""\
         @article{mziegler2019fine-tuning,
             title        = {{Fine-Tuning Language Models from Human Preferences}},
diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py
index c1cf590372..cb239b4fcc 100644
--- a/trl/trainer/reward_trainer.py
+++ b/trl/trainer/reward_trainer.py
@@ -380,6 +380,13 @@ def create_model_card(
         else:
             base_model = None
 
+        tags = tags or []
+        if isinstance(tags, str):
+            tags = [tags]
+
+        if hasattr(self.model.config, "unsloth_version"):
+            tags.append("unsloth")
+
         model_card = generate_model_card(
             base_model=base_model,
             model_name=model_name,
diff --git a/trl/trainer/rloo_trainer.py b/trl/trainer/rloo_trainer.py
index 516eeb4dfb..e4d2591f56 100644
--- a/trl/trainer/rloo_trainer.py
+++ b/trl/trainer/rloo_trainer.py
@@ -563,6 +563,13 @@ def create_model_card(
         else:
             base_model = None
 
+        tags = tags or []
+        if isinstance(tags, str):
+            tags = [tags]
+
+        if hasattr(self.model.config, "unsloth_version"):
+            tags.append("unsloth")
+
         citation = textwrap.dedent("""\
         @inproceedings{ahmadian2024back,
             title        = {{Back to Basics: Revisiting REINFORCE-Style Optimization for Learning from Human Feedback in LLMs}},
diff --git a/trl/trainer/sft_trainer.py b/trl/trainer/sft_trainer.py
index f6ee0f7b3f..c32a979081 100644
--- a/trl/trainer/sft_trainer.py
+++ b/trl/trainer/sft_trainer.py
@@ -15,7 +15,6 @@
 import inspect
 import os
 import warnings
-from functools import wraps
 from typing import Callable, Dict, List, Optional, Tuple, Union
 
 import datasets
@@ -48,7 +47,6 @@
     DataCollatorForCompletionOnlyLM,
     generate_model_card,
     peft_module_casting_to_bf16,
-    trl_sanitze_kwargs_for_tagging,
 )
 
 
@@ -425,21 +423,6 @@ def make_inputs_require_grad(module, input, output):
             elif self.args.max_steps == -1 and args.packing:
                 self.train_dataset.infinite = False
 
-    @wraps(Trainer.push_to_hub)
-    def push_to_hub(
-        self,
-        commit_message: Optional[str] = "End of training",
-        blocking: bool = True,
-        **kwargs,
-    ) -> str:
-        """
-        Overwrite the `push_to_hub` method in order to force-add the tag "sft" when pushing the
-        model on the Hub. Please refer to `~transformers.Trainer.push_to_hub` for more details.
-        Unlike the parent class, we don't use the `token` argument to mitigate security risks.
-        """
-        kwargs = trl_sanitze_kwargs_for_tagging(model=self.model, tag_names=self._tag_names, kwargs=kwargs)
-        return super().push_to_hub(commit_message=commit_message, blocking=blocking, **kwargs)
-
     def _prepare_dataset(
         self,
         dataset,
@@ -629,6 +612,13 @@ def create_model_card(
         else:
             base_model = None
 
+        tags = tags or []
+        if isinstance(tags, str):
+            tags = [tags]
+
+        if hasattr(self.model.config, "unsloth_version"):
+            tags.append("unsloth")
+
         model_card = generate_model_card(
             base_model=base_model,
             model_name=model_name,
diff --git a/trl/trainer/utils.py b/trl/trainer/utils.py
index f1ed37f93f..1591b75ada 100644
--- a/trl/trainer/utils.py
+++ b/trl/trainer/utils.py
@@ -1388,7 +1388,7 @@ def generate_model_card(
     model_name: str,
     hub_model_id: str,
     dataset_name: Optional[str],
-    tags: Union[str, List[str], None],
+    tags: List[str],
     wandb_url: Optional[str],
     trainer_name: str,
     trainer_citation: Optional[str] = None,
@@ -1407,7 +1407,7 @@ def generate_model_card(
             Hub model ID as `username/model_id`.
         dataset_name (`str` or `None`):
             Dataset name.
-        tags (`str`, `List[str]`, or `None`):
+        tags (`List[str]`):
             Tags.
         wandb_url (`str` or `None`):
             Weights & Biases run URL.
@@ -1424,10 +1424,6 @@ def generate_model_card(
         `ModelCard`:
             A ModelCard object.
     """
-    if tags is None:
-        tags = []
-    elif isinstance(tags, str):
-        tags = [tags]
     card_data = ModelCardData(
         base_model=base_model,
         datasets=dataset_name,
diff --git a/trl/trainer/xpo_trainer.py b/trl/trainer/xpo_trainer.py
index 4fe778fb64..7f3e6a17e8 100644
--- a/trl/trainer/xpo_trainer.py
+++ b/trl/trainer/xpo_trainer.py
@@ -469,6 +469,13 @@ def create_model_card(
         else:
             base_model = None
 
+        tags = tags or []
+        if isinstance(tags, str):
+            tags = [tags]
+
+        if hasattr(self.model.config, "unsloth_version"):
+            tags.append("unsloth")
+
         citation = textwrap.dedent("""\
         @article{jung2024binary,
             title        = {{Exploratory Preference Optimization: Harnessing Implicit Q*-Approximation for Sample-Efficient RLHF}},