fix: Remove chat template setting from non-SFT trainer scripts (#4437)

behroozazarkhalili · qgallouedec · web-flow · commit 6f41b18e498d · 2025-11-03T10:57:51.000-08:00
Co-authored-by: Quentin Gallouédec &lt;gallouedec.quentin@gmail.com&gt;
Co-authored-by: Quentin Gallouédec &lt;45557362+qgallouedec@users.noreply.github.com&gt;
diff --git a/examples/scripts/cpo.py b/examples/scripts/cpo.py
@@ -64,7 +64,6 @@
 from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser
 
 from trl import CPOConfig, CPOTrainer, ModelConfig, ScriptArguments, get_peft_config
-from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE
 
 
 # Enable logging in a Hugging Face Space
@@ -90,8 +89,6 @@
     # Dataset
     ################
     dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config)
-    if tokenizer.chat_template is None:
-        tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE
 
     ################
     # Training
diff --git a/examples/scripts/nash_md.py b/examples/scripts/nash_md.py
@@ -73,7 +73,6 @@
     get_kbit_device_map,
     get_quantization_config,
 )
-from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE
 
 
 # Enable logging in a Hugging Face Space
@@ -128,8 +127,6 @@
     )
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
-    if tokenizer.chat_template is None:
-        tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE
 
     dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config)
 
diff --git a/examples/scripts/online_dpo.py b/examples/scripts/online_dpo.py
@@ -69,7 +69,6 @@
     get_peft_config,
     get_quantization_config,
 )
-from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE
 
 
 # Enable logging in a Hugging Face Space
@@ -131,8 +130,6 @@
         trust_remote_code=model_args.trust_remote_code,
         **model_kwargs,
     )
-    if tokenizer.chat_template is None:
-        tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE
     if tokenizer.pad_token_id is None:
         tokenizer.pad_token = tokenizer.eos_token
 
diff --git a/examples/scripts/orpo.py b/examples/scripts/orpo.py
@@ -64,7 +64,6 @@
 from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser
 
 from trl import ModelConfig, ORPOConfig, ORPOTrainer, ScriptArguments, get_peft_config
-from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE
 
 
 # Enable logging in a Hugging Face Space
@@ -91,8 +90,6 @@
     # Dataset
     ################
     dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config)
-    if tokenizer.chat_template is None:
-        tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE
 
     ################
     # Training
diff --git a/examples/scripts/ppo/ppo.py b/examples/scripts/ppo/ppo.py
@@ -43,7 +43,6 @@
     get_peft_config,
     get_quantization_config,
 )
-from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE
 
 
 # Enable logging in a Hugging Face Space
@@ -106,8 +105,6 @@
         model_args.model_name_or_path, padding_side="left", trust_remote_code=model_args.trust_remote_code
     )
     tokenizer.add_special_tokens({"pad_token": "[PAD]"})
-    if tokenizer.chat_template is None:
-        tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE
     value_model = AutoModelForSequenceClassification.from_pretrained(
         training_args.reward_model_path, trust_remote_code=model_args.trust_remote_code, num_labels=1
     )
diff --git a/examples/scripts/ppo/ppo_tldr.py b/examples/scripts/ppo/ppo_tldr.py
@@ -43,7 +43,6 @@
     get_peft_config,
     get_quantization_config,
 )
-from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE
 
 
 # Enable logging in a Hugging Face Space
@@ -113,8 +112,6 @@
         model_args.model_name_or_path, padding_side="left", trust_remote_code=model_args.trust_remote_code
     )
     tokenizer.add_special_tokens({"pad_token": "[PAD]"})
-    if tokenizer.chat_template is None:
-        tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE
     value_model = AutoModelForSequenceClassification.from_pretrained(
         training_args.reward_model_path, trust_remote_code=model_args.trust_remote_code, num_labels=1
     )
diff --git a/examples/scripts/xpo.py b/examples/scripts/xpo.py
@@ -57,7 +57,6 @@
     get_kbit_device_map,
     get_quantization_config,
 )
-from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE
 
 
 # Enable logging in a Hugging Face Space
@@ -113,8 +112,6 @@
     )
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
-    if tokenizer.chat_template is None:
-        tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE
 
     dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config)
 
diff --git a/tests/test_cpo_trainer.py b/tests/test_cpo_trainer.py
@@ -17,7 +17,6 @@
 from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer
 
 from trl import CPOConfig, CPOTrainer
-from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE
 
 from .testing_utils import TrlTestCase, require_peft
 
@@ -33,15 +32,13 @@ def setup_method(self):
         model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration"
         self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
         self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id)
-        self.t5_tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE
 
     @pytest.mark.parametrize(
         "name, loss_type, config_name",
         [
             ("qwen", "sigmoid", "standard_preference"),
             ("t5", "hinge", "standard_implicit_prompt_preference"),
             ("qwen", "ipo", "conversational_preference"),
-            ("t5", "ipo", "conversational_implicit_prompt_preference"),
             ("qwen", "simpo", "standard_preference"),
             ("t5", "simpo", "standard_implicit_prompt_preference"),
             ("qwen", "hinge", "conversational_preference"),
diff --git a/tests/test_gkd_trainer.py b/tests/test_gkd_trainer.py
@@ -21,7 +21,6 @@
 from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
 
 from trl import GKDConfig, GKDTrainer
-from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE
 
 from .testing_utils import TrlTestCase, require_liger_kernel
 
@@ -206,10 +205,6 @@ def setup_method(self):
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
         self.tokenizer.pad_token = self.tokenizer.eos_token
 
-        # Ensure the tokenizer has a chat template
-        if not hasattr(self.tokenizer, "chat_template") or self.tokenizer.chat_template is None:
-            self.tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE
-
     def test_gkd_trainer(self):
         training_args = GKDConfig(
             output_dir=self.tmp_dir,
diff --git a/tests/test_orpo_trainer.py b/tests/test_orpo_trainer.py
@@ -17,7 +17,6 @@
 from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer
 
 from trl import ORPOConfig, ORPOTrainer
-from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE
 
 from .testing_utils import TrlTestCase, require_peft
 
@@ -33,15 +32,13 @@ def setup_method(self):
         model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration"
         self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
         self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id)
-        self.t5_tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE
 
     @pytest.mark.parametrize(
         "name, config_name",
         [
             ("qwen", "standard_preference"),
             ("t5", "standard_implicit_prompt_preference"),
             ("qwen", "conversational_preference"),
-            ("t5", "conversational_implicit_prompt_preference"),
         ],
     )
     def test_orpo_trainer(self, name, config_name):
diff --git a/tests/test_ppo_trainer.py b/tests/test_ppo_trainer.py
@@ -19,7 +19,6 @@
 from transformers.utils import is_peft_available
 
 from trl import PPOConfig, PPOTrainer
-from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE
 
 from .testing_utils import TrlTestCase, require_peft
 
@@ -37,9 +36,6 @@ def setup_method(self):
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, padding_side="left")
         self.tokenizer.add_special_tokens({"pad_token": "[PAD]"})
 
-        if self.tokenizer.chat_template is None:
-            self.tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE
-
         # Add reward and value models as in ppo.py
         reward_model_id = "trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5"
         self.value_model = AutoModelForSequenceClassification.from_pretrained(reward_model_id, num_labels=1)
diff --git a/trl/trainer/utils.py b/trl/trainer/utils.py
@@ -738,9 +738,6 @@ def print_rich_table(df: pd.DataFrame) -> None:
     console.print(table)
 
 
-SIMPLE_SFT_CHAT_TEMPLATE = "{% for message in messages %}{{' ' + message['content']}}{% endfor %}{{ eos_token }}"
-# SIMPLE_SFT_CHAT_TEMPLATE simply ends things with an EOS token, this helps the SFT model learn to end the completions with EOS tokens
-
 SIMPLE_CHAT_TEMPLATE = "{% for message in messages %}{{message['role'].capitalize() + ': ' + message['content'] + '\n\n'}}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}"
 
 

Original file line number	Diff line number	Diff line change
`@@ -73,7 +73,6 @@`
`73`	`73`	`get_kbit_device_map,`
`74`	`74`	`get_quantization_config,`
`75`	`75`	`)`
`76`		`-from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE`
`77`	`76`
`78`	`77`
`79`	`78`	`# Enable logging in a Hugging Face Space`
`@@ -128,8 +127,6 @@`
`128`	`127`	`)`
`129`	`128`	`if tokenizer.pad_token is None:`
`130`	`129`	`tokenizer.pad_token = tokenizer.eos_token`
`131`		`- if tokenizer.chat_template is None:`
`132`		`- tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE`
`133`	`130`
`134`	`131`	`dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config)`
`135`	`132`
Original file line number	Diff line number	Diff line change
`@@ -69,7 +69,6 @@`
`69`	`69`	`get_peft_config,`
`70`	`70`	`get_quantization_config,`
`71`	`71`	`)`
`72`		`-from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE`
`73`	`72`
`74`	`73`
`75`	`74`	`# Enable logging in a Hugging Face Space`
`@@ -131,8 +130,6 @@`
`131`	`130`	`trust_remote_code=model_args.trust_remote_code,`
`132`	`131`	`**model_kwargs,`
`133`	`132`	`)`
`134`		`- if tokenizer.chat_template is None:`
`135`		`- tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE`
`136`	`133`	`if tokenizer.pad_token_id is None:`
`137`	`134`	`tokenizer.pad_token = tokenizer.eos_token`
`138`	`135`
Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,6 @@`
`43`	`43`	`get_peft_config,`
`44`	`44`	`get_quantization_config,`
`45`	`45`	`)`
`46`		`-from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE`
`47`	`46`
`48`	`47`
`49`	`48`	`# Enable logging in a Hugging Face Space`
`@@ -106,8 +105,6 @@`
`106`	`105`	`model_args.model_name_or_path, padding_side="left", trust_remote_code=model_args.trust_remote_code`
`107`	`106`	`)`
`108`	`107`	`tokenizer.add_special_tokens({"pad_token": "[PAD]"})`
`109`		`- if tokenizer.chat_template is None:`
`110`		`- tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE`
`111`	`108`	`value_model = AutoModelForSequenceClassification.from_pretrained(`
`112`	`109`	`training_args.reward_model_path, trust_remote_code=model_args.trust_remote_code, num_labels=1`
`113`	`110`	`)`
Original file line number	Diff line number	Diff line change
`@@ -57,7 +57,6 @@`
`57`	`57`	`get_kbit_device_map,`
`58`	`58`	`get_quantization_config,`
`59`	`59`	`)`
`60`		`-from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE`
`61`	`60`
`62`	`61`
`63`	`62`	`# Enable logging in a Hugging Face Space`
`@@ -113,8 +112,6 @@`
`113`	`112`	`)`
`114`	`113`	`if tokenizer.pad_token is None:`
`115`	`114`	`tokenizer.pad_token = tokenizer.eos_token`
`116`		`- if tokenizer.chat_template is None:`
`117`		`- tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE`
`118`	`115`
`119`	`116`	`dataset = load_dataset(script_args.dataset_name, name=script_args.dataset_config)`
`120`	`117`