From 4bc7d64083a46e139bd04f92dbe01891ea28ce83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Fri, 4 Oct 2024 14:33:10 +0000 Subject: [PATCH] Unslosh tag --- tests/test_utils.py | 2 +- trl/trainer/alignprop_trainer.py | 7 +++++++ trl/trainer/bco_trainer.py | 7 +++++++ trl/trainer/cpo_trainer.py | 7 +++++++ trl/trainer/ddpo_trainer.py | 7 +++++++ trl/trainer/dpo_trainer.py | 7 +++++++ trl/trainer/gkd_trainer.py | 7 +++++++ trl/trainer/iterative_sft_trainer.py | 7 +++++++ trl/trainer/kto_trainer.py | 7 +++++++ trl/trainer/nash_md_trainer.py | 7 +++++++ trl/trainer/online_dpo_trainer.py | 7 +++++++ trl/trainer/orpo_trainer.py | 7 +++++++ trl/trainer/ppov2_trainer.py | 7 +++++++ trl/trainer/reward_trainer.py | 7 +++++++ trl/trainer/rloo_trainer.py | 7 +++++++ trl/trainer/sft_trainer.py | 24 +++++++----------------- trl/trainer/utils.py | 8 ++------ trl/trainer/xpo_trainer.py | 7 +++++++ 18 files changed, 115 insertions(+), 24 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index d23e18c841..e79edc755f 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -158,7 +158,7 @@ def test_val_none(self): model_name="my_model", hub_model_id="username/my_hub_model", dataset_name=None, - tags=None, + tags=[], wandb_url=None, trainer_name="My Trainer", trainer_citation=None, diff --git a/trl/trainer/alignprop_trainer.py b/trl/trainer/alignprop_trainer.py index 19342597da..84776a026b 100644 --- a/trl/trainer/alignprop_trainer.py +++ b/trl/trainer/alignprop_trainer.py @@ -415,6 +415,13 @@ def create_model_card( else: base_model = None + tags = tags or [] + if isinstance(tags, str): + tags = [tags] + + if hasattr(self.model.config, "unsloth_version"): + tags.append("unsloth") + citation = textwrap.dedent("""\ @article{prabhudesai2024aligning, title = {{Aligning Text-to-Image Diffusion Models with Reward Backpropagation}}, diff --git a/trl/trainer/bco_trainer.py b/trl/trainer/bco_trainer.py index a836e3b928..598bbab8bf 100644 --- a/trl/trainer/bco_trainer.py +++ b/trl/trainer/bco_trainer.py @@ -1475,6 +1475,13 @@ def create_model_card( else: base_model = None + tags = tags or [] + if isinstance(tags, str): + tags = [tags] + + if hasattr(self.model.config, "unsloth_version"): + tags.append("unsloth") + citation = textwrap.dedent("""\ @article{jung2024binary, title = {{Binary Classifier Optimization for Large Language Model Alignment}}, diff --git a/trl/trainer/cpo_trainer.py b/trl/trainer/cpo_trainer.py index 738f563323..363e304e56 100644 --- a/trl/trainer/cpo_trainer.py +++ b/trl/trainer/cpo_trainer.py @@ -997,6 +997,13 @@ def create_model_card( else: base_model = None + tags = tags or [] + if isinstance(tags, str): + tags = [tags] + + if hasattr(self.model.config, "unsloth_version"): + tags.append("unsloth") + citation = textwrap.dedent("""\ @inproceedings{xu2024contrastive, title = {{Contrastive Preference Optimization: Pushing the Boundaries of LLM Performance in Machine Translation}}, diff --git a/trl/trainer/ddpo_trainer.py b/trl/trainer/ddpo_trainer.py index df6cd94af1..412a461a30 100644 --- a/trl/trainer/ddpo_trainer.py +++ b/trl/trainer/ddpo_trainer.py @@ -617,6 +617,13 @@ def create_model_card( else: base_model = None + tags = tags or [] + if isinstance(tags, str): + tags = [tags] + + if hasattr(self.model.config, "unsloth_version"): + tags.append("unsloth") + citation = textwrap.dedent("""\ @inproceedings{black2024training, title = {{Training Diffusion Models with Reinforcement Learning}}, diff --git a/trl/trainer/dpo_trainer.py b/trl/trainer/dpo_trainer.py index bc17ffdf39..d5e57fdba1 100644 --- a/trl/trainer/dpo_trainer.py +++ b/trl/trainer/dpo_trainer.py @@ -1715,6 +1715,13 @@ def create_model_card( else: base_model = None + tags = tags or [] + if isinstance(tags, str): + tags = [tags] + + if hasattr(self.model.config, "unsloth_version"): + tags.append("unsloth") + citation = textwrap.dedent("""\ @inproceedings{rafailov2023direct, title = {{Direct Preference Optimization: Your Language Model is Secretly a Reward Model}}, diff --git a/trl/trainer/gkd_trainer.py b/trl/trainer/gkd_trainer.py index 1dc96b4135..c0b9242768 100644 --- a/trl/trainer/gkd_trainer.py +++ b/trl/trainer/gkd_trainer.py @@ -300,6 +300,13 @@ def create_model_card( else: base_model = None + tags = tags or [] + if isinstance(tags, str): + tags = [tags] + + if hasattr(self.model.config, "unsloth_version"): + tags.append("unsloth") + citation = textwrap.dedent("""\ @inproceedings{agarwal2024on-policy, title = {{On-Policy Distillation of Language Models: Learning from Self-Generated Mistakes}}, diff --git a/trl/trainer/iterative_sft_trainer.py b/trl/trainer/iterative_sft_trainer.py index 0e0d19f08e..c6cdeb158d 100644 --- a/trl/trainer/iterative_sft_trainer.py +++ b/trl/trainer/iterative_sft_trainer.py @@ -415,6 +415,13 @@ def create_model_card( else: base_model = None + tags = tags or [] + if isinstance(tags, str): + tags = [tags] + + if hasattr(self.model.config, "unsloth_version"): + tags.append("unsloth") + model_card = generate_model_card( base_model=base_model, model_name=model_name, diff --git a/trl/trainer/kto_trainer.py b/trl/trainer/kto_trainer.py index b670db9c6e..e2495d63b9 100644 --- a/trl/trainer/kto_trainer.py +++ b/trl/trainer/kto_trainer.py @@ -1453,6 +1453,13 @@ def create_model_card( else: base_model = None + tags = tags or [] + if isinstance(tags, str): + tags = [tags] + + if hasattr(self.model.config, "unsloth_version"): + tags.append("unsloth") + citation = textwrap.dedent("""\ @article{ethayarajh2024kto, title = {{KTO: Model Alignment as Prospect Theoretic Optimization}}, diff --git a/trl/trainer/nash_md_trainer.py b/trl/trainer/nash_md_trainer.py index b15c0d5c59..ad46ad18df 100644 --- a/trl/trainer/nash_md_trainer.py +++ b/trl/trainer/nash_md_trainer.py @@ -413,6 +413,13 @@ def create_model_card( else: base_model = None + tags = tags or [] + if isinstance(tags, str): + tags = [tags] + + if hasattr(self.model.config, "unsloth_version"): + tags.append("unsloth") + citation = textwrap.dedent("""\ @inproceedings{munos2024nash, title = {Nash Learning from Human Feedback}, diff --git a/trl/trainer/online_dpo_trainer.py b/trl/trainer/online_dpo_trainer.py index f495f5af1c..d9db374b87 100644 --- a/trl/trainer/online_dpo_trainer.py +++ b/trl/trainer/online_dpo_trainer.py @@ -573,6 +573,13 @@ def create_model_card( else: base_model = None + tags = tags or [] + if isinstance(tags, str): + tags = [tags] + + if hasattr(self.model.config, "unsloth_version"): + tags.append("unsloth") + citation = textwrap.dedent("""\ @article{guo2024direct, title = {{Direct Language Model Alignment from Online AI Feedback}}, diff --git a/trl/trainer/orpo_trainer.py b/trl/trainer/orpo_trainer.py index 7aa991ad93..c18c92d345 100644 --- a/trl/trainer/orpo_trainer.py +++ b/trl/trainer/orpo_trainer.py @@ -1019,6 +1019,13 @@ def create_model_card( else: base_model = None + tags = tags or [] + if isinstance(tags, str): + tags = [tags] + + if hasattr(self.model.config, "unsloth_version"): + tags.append("unsloth") + citation = textwrap.dedent("""\ @article{hong2024orpo, title = {{ORPO: Monolithic Preference Optimization without Reference Model}}, diff --git a/trl/trainer/ppov2_trainer.py b/trl/trainer/ppov2_trainer.py index e6fb9d70ef..63dec548dc 100644 --- a/trl/trainer/ppov2_trainer.py +++ b/trl/trainer/ppov2_trainer.py @@ -673,6 +673,13 @@ def create_model_card( else: base_model = None + tags = tags or [] + if isinstance(tags, str): + tags = [tags] + + if hasattr(self.model.config, "unsloth_version"): + tags.append("unsloth") + citation = textwrap.dedent("""\ @article{mziegler2019fine-tuning, title = {{Fine-Tuning Language Models from Human Preferences}}, diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py index c1cf590372..cb239b4fcc 100644 --- a/trl/trainer/reward_trainer.py +++ b/trl/trainer/reward_trainer.py @@ -380,6 +380,13 @@ def create_model_card( else: base_model = None + tags = tags or [] + if isinstance(tags, str): + tags = [tags] + + if hasattr(self.model.config, "unsloth_version"): + tags.append("unsloth") + model_card = generate_model_card( base_model=base_model, model_name=model_name, diff --git a/trl/trainer/rloo_trainer.py b/trl/trainer/rloo_trainer.py index 516eeb4dfb..e4d2591f56 100644 --- a/trl/trainer/rloo_trainer.py +++ b/trl/trainer/rloo_trainer.py @@ -563,6 +563,13 @@ def create_model_card( else: base_model = None + tags = tags or [] + if isinstance(tags, str): + tags = [tags] + + if hasattr(self.model.config, "unsloth_version"): + tags.append("unsloth") + citation = textwrap.dedent("""\ @inproceedings{ahmadian2024back, title = {{Back to Basics: Revisiting REINFORCE-Style Optimization for Learning from Human Feedback in LLMs}}, diff --git a/trl/trainer/sft_trainer.py b/trl/trainer/sft_trainer.py index f6ee0f7b3f..c32a979081 100644 --- a/trl/trainer/sft_trainer.py +++ b/trl/trainer/sft_trainer.py @@ -15,7 +15,6 @@ import inspect import os import warnings -from functools import wraps from typing import Callable, Dict, List, Optional, Tuple, Union import datasets @@ -48,7 +47,6 @@ DataCollatorForCompletionOnlyLM, generate_model_card, peft_module_casting_to_bf16, - trl_sanitze_kwargs_for_tagging, ) @@ -425,21 +423,6 @@ def make_inputs_require_grad(module, input, output): elif self.args.max_steps == -1 and args.packing: self.train_dataset.infinite = False - @wraps(Trainer.push_to_hub) - def push_to_hub( - self, - commit_message: Optional[str] = "End of training", - blocking: bool = True, - **kwargs, - ) -> str: - """ - Overwrite the `push_to_hub` method in order to force-add the tag "sft" when pushing the - model on the Hub. Please refer to `~transformers.Trainer.push_to_hub` for more details. - Unlike the parent class, we don't use the `token` argument to mitigate security risks. - """ - kwargs = trl_sanitze_kwargs_for_tagging(model=self.model, tag_names=self._tag_names, kwargs=kwargs) - return super().push_to_hub(commit_message=commit_message, blocking=blocking, **kwargs) - def _prepare_dataset( self, dataset, @@ -629,6 +612,13 @@ def create_model_card( else: base_model = None + tags = tags or [] + if isinstance(tags, str): + tags = [tags] + + if hasattr(self.model.config, "unsloth_version"): + tags.append("unsloth") + model_card = generate_model_card( base_model=base_model, model_name=model_name, diff --git a/trl/trainer/utils.py b/trl/trainer/utils.py index f1ed37f93f..1591b75ada 100644 --- a/trl/trainer/utils.py +++ b/trl/trainer/utils.py @@ -1388,7 +1388,7 @@ def generate_model_card( model_name: str, hub_model_id: str, dataset_name: Optional[str], - tags: Union[str, List[str], None], + tags: List[str], wandb_url: Optional[str], trainer_name: str, trainer_citation: Optional[str] = None, @@ -1407,7 +1407,7 @@ def generate_model_card( Hub model ID as `username/model_id`. dataset_name (`str` or `None`): Dataset name. - tags (`str`, `List[str]`, or `None`): + tags (`List[str]`): Tags. wandb_url (`str` or `None`): Weights & Biases run URL. @@ -1424,10 +1424,6 @@ def generate_model_card( `ModelCard`: A ModelCard object. """ - if tags is None: - tags = [] - elif isinstance(tags, str): - tags = [tags] card_data = ModelCardData( base_model=base_model, datasets=dataset_name, diff --git a/trl/trainer/xpo_trainer.py b/trl/trainer/xpo_trainer.py index 4fe778fb64..7f3e6a17e8 100644 --- a/trl/trainer/xpo_trainer.py +++ b/trl/trainer/xpo_trainer.py @@ -469,6 +469,13 @@ def create_model_card( else: base_model = None + tags = tags or [] + if isinstance(tags, str): + tags = [tags] + + if hasattr(self.model.config, "unsloth_version"): + tags.append("unsloth") + citation = textwrap.dedent("""\ @article{jung2024binary, title = {{Exploratory Preference Optimization: Harnessing Implicit Q*-Approximation for Sample-Efficient RLHF}},