From 85cefe29a35c2a06431307a8c58bc9052651d569 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Fri, 2 Aug 2024 14:56:10 +0000 Subject: [PATCH 1/3] drop setUp class in reward tester --- tests/test_reward_trainer.py | 55 ++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/tests/test_reward_trainer.py b/tests/test_reward_trainer.py index e66e529481..e6c7b0f389 100644 --- a/tests/test_reward_trainer.py +++ b/tests/test_reward_trainer.py @@ -26,19 +26,16 @@ class RewardTrainerTester(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab" - cls.model = AutoModelForSequenceClassification.from_pretrained(cls.model_id) - cls.tokenizer = AutoTokenizer.from_pretrained(cls.model_id) - cls.tokenizer.pad_token = cls.tokenizer.eos_token - def test_accuracy_metrics(self): dummy_eval_predictions = EvalPrediction(torch.FloatTensor([[0.1, 0.9], [0.9, 0.1]]), torch.LongTensor([0, 0])) accuracy = compute_accuracy(dummy_eval_predictions) assert accuracy["accuracy"] == 0.5 def test_reward_trainer(self): + model_id = "trl-internal-testing/dummy-GPT2-correct-vocab" + model = AutoModelForSequenceClassification.from_pretrained(model_id) + tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer.pad_token = tokenizer.eos_token with tempfile.TemporaryDirectory() as tmp_dir: training_args = RewardConfig( output_dir=tmp_dir, @@ -81,9 +78,9 @@ def test_reward_trainer(self): dummy_dataset = Dataset.from_dict(dummy_dataset_dict) trainer = RewardTrainer( - model=self.model, + model=model, args=training_args, - tokenizer=self.tokenizer, + tokenizer=tokenizer, train_dataset=dummy_dataset, eval_dataset=dummy_dataset, ) @@ -108,6 +105,11 @@ def test_reward_trainer(self): def test_reward_trainer_peft(self): from peft import LoraConfig, TaskType + model_id = "trl-internal-testing/dummy-GPT2-correct-vocab" + model = AutoModelForSequenceClassification.from_pretrained(model_id) + tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer.pad_token = tokenizer.eos_token + peft_config = LoraConfig( task_type=TaskType.SEQ_CLS, inference_mode=False, @@ -158,9 +160,9 @@ def test_reward_trainer_peft(self): dummy_dataset = Dataset.from_dict(dummy_dataset_dict) trainer = RewardTrainer( - model=self.model, + model=model, args=training_args, - tokenizer=self.tokenizer, + tokenizer=tokenizer, train_dataset=dummy_dataset, eval_dataset=dummy_dataset, peft_config=peft_config, @@ -196,6 +198,10 @@ def test_reward_trainer_peft(self): assert preds.predictions.shape == (4, 2) def test_reward_trainer_assert_value_error(self): + model_id = "trl-internal-testing/dummy-GPT2-correct-vocab" + model = AutoModelForSequenceClassification.from_pretrained(model_id) + tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer.pad_token = tokenizer.eos_token with tempfile.TemporaryDirectory() as tmp_dir: training_args = RewardConfig( output_dir=tmp_dir, @@ -235,9 +241,9 @@ def test_reward_trainer_assert_value_error(self): dummy_dataset = Dataset.from_dict(dummy_dataset_dict) trainer = RewardTrainer( - model=self.model, + model=model, args=training_args, - tokenizer=self.tokenizer, + tokenizer=tokenizer, train_dataset=dummy_dataset, ) @@ -253,13 +259,17 @@ def test_reward_trainer_assert_value_error(self): with self.assertWarns(UserWarning): trainer = RewardTrainer( - model=self.model, + model=model, args=training_args, - tokenizer=self.tokenizer, + tokenizer=tokenizer, train_dataset=dummy_dataset, ) def test_reward_trainer_margin(self): + model_id = "trl-internal-testing/dummy-GPT2-correct-vocab" + model = AutoModelForSequenceClassification.from_pretrained(model_id) + tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer.pad_token = tokenizer.eos_token with tempfile.TemporaryDirectory() as tmp_dir: training_args = RewardConfig( output_dir=tmp_dir, @@ -293,24 +303,27 @@ def test_reward_trainer_margin(self): dummy_dataset = Dataset.from_dict(dummy_dataset_dict) trainer = RewardTrainer( - model=self.model, + model=model, args=training_args, - tokenizer=self.tokenizer, + tokenizer=tokenizer, train_dataset=dummy_dataset, eval_dataset=dummy_dataset, ) batch = [dummy_dataset[0]] batch = trainer.data_collator(batch) + batch = {k: v.to(trainer.model.device) if isinstance(v, torch.Tensor) else v for k, v in batch.items()} loss, outputs = trainer.compute_loss(trainer.model, batch, return_outputs=True) - l_val = -torch.nn.functional.logsigmoid( outputs["rewards_chosen"] - outputs["rewards_rejected"] - batch["margin"] ).mean() - assert abs(loss - l_val) < 1e-6 def test_reward_trainer_tags(self): + model_id = "trl-internal-testing/dummy-GPT2-correct-vocab" + model = AutoModelForSequenceClassification.from_pretrained(model_id) + tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer.pad_token = tokenizer.eos_token with tempfile.TemporaryDirectory() as tmp_dir: training_args = RewardConfig( output_dir=tmp_dir, @@ -353,9 +366,9 @@ def test_reward_trainer_tags(self): dummy_dataset = Dataset.from_dict(dummy_dataset_dict) trainer = RewardTrainer( - model=self.model, + model=model, args=training_args, - tokenizer=self.tokenizer, + tokenizer=tokenizer, train_dataset=dummy_dataset, eval_dataset=dummy_dataset, ) From eec48703d156a326fe2aabaf41c188d48b22988a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Sat, 3 Aug 2024 09:41:43 +0000 Subject: [PATCH 2/3] report to none --- tests/test_reward_trainer.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test_reward_trainer.py b/tests/test_reward_trainer.py index e6c7b0f389..0acc5fd5e3 100644 --- a/tests/test_reward_trainer.py +++ b/tests/test_reward_trainer.py @@ -36,6 +36,7 @@ def test_reward_trainer(self): model = AutoModelForSequenceClassification.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) tokenizer.pad_token = tokenizer.eos_token + with tempfile.TemporaryDirectory() as tmp_dir: training_args = RewardConfig( output_dir=tmp_dir, @@ -127,6 +128,7 @@ def test_reward_trainer_peft(self): gradient_accumulation_steps=2, learning_rate=9e-1, eval_strategy="steps", + report_to="none", ) # fmt: off @@ -202,12 +204,14 @@ def test_reward_trainer_assert_value_error(self): model = AutoModelForSequenceClassification.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) tokenizer.pad_token = tokenizer.eos_token + with tempfile.TemporaryDirectory() as tmp_dir: training_args = RewardConfig( output_dir=tmp_dir, per_device_train_batch_size=2, max_steps=1, remove_unused_columns=False, + report_to="none", ) # fmt: off @@ -249,12 +253,15 @@ def test_reward_trainer_assert_value_error(self): with pytest.raises(ValueError): trainer.train() + + training_args = RewardConfig( output_dir=tmp_dir, per_device_train_batch_size=2, max_steps=1, remove_unused_columns=True, + report_to="none", ) with self.assertWarns(UserWarning): @@ -270,6 +277,7 @@ def test_reward_trainer_margin(self): model = AutoModelForSequenceClassification.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) tokenizer.pad_token = tokenizer.eos_token + with tempfile.TemporaryDirectory() as tmp_dir: training_args = RewardConfig( output_dir=tmp_dir, @@ -279,6 +287,7 @@ def test_reward_trainer_margin(self): gradient_accumulation_steps=4, learning_rate=9e-1, eval_strategy="steps", + report_to="none", ) # fmt: off @@ -314,9 +323,11 @@ def test_reward_trainer_margin(self): batch = trainer.data_collator(batch) batch = {k: v.to(trainer.model.device) if isinstance(v, torch.Tensor) else v for k, v in batch.items()} loss, outputs = trainer.compute_loss(trainer.model, batch, return_outputs=True) + l_val = -torch.nn.functional.logsigmoid( outputs["rewards_chosen"] - outputs["rewards_rejected"] - batch["margin"] ).mean() + assert abs(loss - l_val) < 1e-6 def test_reward_trainer_tags(self): @@ -324,6 +335,7 @@ def test_reward_trainer_tags(self): model = AutoModelForSequenceClassification.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) tokenizer.pad_token = tokenizer.eos_token + with tempfile.TemporaryDirectory() as tmp_dir: training_args = RewardConfig( output_dir=tmp_dir, @@ -333,6 +345,7 @@ def test_reward_trainer_tags(self): gradient_accumulation_steps=4, learning_rate=9e-1, eval_strategy="steps", + report_to="none", ) # fmt: off From ad8e83b73195b670f6082a07cec93206e2621964 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Sat, 3 Aug 2024 09:55:22 +0000 Subject: [PATCH 3/3] style --- tests/test_reward_trainer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_reward_trainer.py b/tests/test_reward_trainer.py index 0acc5fd5e3..f50255cae5 100644 --- a/tests/test_reward_trainer.py +++ b/tests/test_reward_trainer.py @@ -253,8 +253,6 @@ def test_reward_trainer_assert_value_error(self): with pytest.raises(ValueError): trainer.train() - - training_args = RewardConfig( output_dir=tmp_dir,