diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 0468b74360e..0785e03ac54 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -1747,9 +1747,7 @@ def _inner_training_loop( # prepare using `accelerator` prepare if use_accelerator_prepare: - model, self.optimizer, self.lr_scheduler = self.accelerator.prepare( - self.model, self.optimizer, self.lr_scheduler - ) + model, self.optimizer = self.accelerator.prepare(self.model, self.optimizer) if self.is_fsdp_enabled: self.model = model @@ -1996,6 +1994,7 @@ def _inner_training_loop( optimizer_was_run = scale_before <= scale_after else: self.optimizer.step() + optimizer_was_run = not self.accelerator.optimizer_step_was_skipped if optimizer_was_run: # Delay optimizer scheduling until metrics are generated