Skip to content

Commit

Permalink
Fix test for auto_find_batch_size on multi-GPU (huggingface#27947)
Browse files Browse the repository at this point in the history
* Fix test for multi-GPU

* WIth CPU handle
  • Loading branch information
muellerzr authored and iantbutler01 committed Dec 16, 2023
1 parent d715266 commit d0f6481
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions tests/trainer/test_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1558,7 +1558,7 @@ def test_auto_batch_size_with_resume_from_checkpoint(self):
class MockCudaOOMCallback(TrainerCallback):
def on_step_end(self, args, state, control, **kwargs):
# simulate OOM on the first step
if state.train_batch_size == 16:
if state.train_batch_size >= 16:
raise RuntimeError("CUDA out of memory.")

args = RegressionTrainingArguments(
Expand All @@ -1577,7 +1577,7 @@ def on_step_end(self, args, state, control, **kwargs):
# We can then make a new Trainer
trainer = Trainer(model, args, train_dataset=train_dataset)
# Check we are at 16 to start
self.assertEqual(trainer._train_batch_size, 16)
self.assertEqual(trainer._train_batch_size, 16 * max(trainer.args.n_gpu, 1))
trainer.train(resume_from_checkpoint=True)
# We should be back to 8 again, picking up based upon the last ran Trainer
self.assertEqual(trainer._train_batch_size, 8)
Expand Down

0 comments on commit d0f6481

Please sign in to comment.