Lightning-AI · kaushikb11 · Feb 10, 2021 · Jan 31, 2021 · Jan 31, 2021 · Jan 31, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -109,6 +109,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Changed the default value for the `progress_bar_refresh_rate` Trainer argument in Google COLAB notebooks to 20 ([#5516](https://github.com/PyTorchLightning/pytorch-lightning/pull/5516))
 
 
+- Extended support for purely iteration-based training ([#5726](https://github.com/PyTorchLightning/pytorch-lightning/pull/5726))
+
+
 - Made `LightningModule.global_rank`, `LightningModule.local_rank` and `LightningModule.logger` read-only properties ([#5730](https://github.com/PyTorchLightning/pytorch-lightning/pull/5730))
 
 

diff --git a/pytorch_lightning/trainer/connectors/checkpoint_connector.py b/pytorch_lightning/trainer/connectors/checkpoint_connector.py
@@ -158,7 +158,7 @@ def restore_training_state(self, checkpoint):
  self.trainer.current_epoch = checkpoint['epoch']
 
  # crash if max_epochs is lower then the current epoch from the checkpoint
- if self.trainer.current_epoch > self.trainer.max_epochs:
+ if self.trainer.max_epochs is not None and self.trainer.current_epoch > self.trainer.max_epochs:
  m = f"""
  you restored a checkpoint with current_epoch={self.trainer.current_epoch}
  but the Trainer(max_epochs={self.trainer.max_epochs})

diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
@@ -14,6 +14,7 @@
 """Trainer to automate the training."""
 
 import warnings
+from itertools import count
 from pathlib import Path
 from typing import Dict, Iterable, List, Optional, Union
 
@@ -101,8 +102,8 @@ def __init__(
  check_val_every_n_epoch: int = 1,
  fast_dev_run: Union[int, bool] = False,
  accumulate_grad_batches: Union[int, Dict[int, int], List[list]] = 1,
- max_epochs: int = 1000,
- min_epochs: int = 1,
+ max_epochs: Optional[int] = None,
+ min_epochs: Optional[int] = None,
  max_steps: Optional[int] = None,
  min_steps: Optional[int] = None,
  limit_train_batches: Union[int, float] = 1.0,
@@ -231,9 +232,11 @@ def __init__(
 
  precision: Full precision (32), half precision (16). Can be used on CPU, GPU or TPUs.
 
- max_epochs: Stop training once this number of epochs is reached.
+ max_epochs: Stop training once this number of epochs is reached. Disabled by default (None).
+ If both max_epochs and max_steps are not specified, defaults to ``max_epochs`` = 1000.
 
- min_epochs: Force training for at least these many epochs
+ min_epochs: Force training for at least these many epochs. Disabled by default (None).
+ If both min_epochs and min_steps are not specified, defaults to ``min_epochs`` = 1.
 
  max_steps: Stop training after this number of steps. Disabled by default (None).
 
@@ -586,7 +589,8 @@ def train(self):
  if self.train_loop.should_skip_training():
  return
  # run all epochs
- for epoch in range(self.current_epoch, self.max_epochs):
+ epochs = range(self.current_epoch, self.max_epochs) if self.max_epochs else count(self.current_epoch)
+ for epoch in epochs:
 
  # hook
  self.train_loop.on_train_epoch_start(epoch)
@@ -599,7 +603,7 @@ def train(self):
  return
 
  # early stopping
- met_min_epochs = epoch >= self.min_epochs - 1
+ met_min_epochs = epoch >= self.min_epochs - 1 if self.min_epochs else True
  met_min_steps = self.global_step >= self.min_steps if self.min_steps else True
 
  if self.should_stop:

diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
@@ -71,8 +71,10 @@ def on_trainer_init(
  self.trainer.train_dataloader = None
  self.automatic_optimization = automatic_optimization
 
- self.trainer.max_epochs = max_epochs
- self.trainer.min_epochs = min_epochs
+ # If neither max_epochs or max_steps is set, then use existing default of max_epochs = 1000
+ self.trainer.max_epochs = 1000 if (max_epochs is None and max_steps is None) else max_epochs
+ # If neither max_epochs or max_steps is set, then use existing default of min_epochs = 1
+ self.trainer.min_epochs = 1 if (min_epochs is None and min_steps is None) else min_epochs
  self.trainer.max_steps = max_steps
  self.trainer.min_steps = min_steps
 
@@ -93,7 +95,9 @@ def num_optimizers(self):
  return num_optimizers
 
  def should_skip_training(self):
- return self.trainer.current_epoch >= self.trainer.max_epochs or self.trainer.num_training_batches == 0
+ return (
+ self.trainer.max_epochs is not None and self.trainer.current_epoch >= self.trainer.max_epochs
+ ) or self.trainer.num_training_batches == 0
 
  def on_train_start(self):
  # clear cache before training

diff --git a/tests/trainer/flags/test_min_max_epochs.py b/tests/trainer/flags/test_min_max_epochs.py
@@ -0,0 +1,43 @@
+import pytest
+
+from pytorch_lightning import Trainer
+from tests.helpers import BoringModel
+
+
+# @pytest.mark.parametrize("min_epochs", [None, 2])
+# @pytest.mark.parametrize("max_epochs", [None, 3])
+# @pytest.mark.parametrize("min_steps", [None, 20])
+# @pytest.mark.parametrize("max_steps", [None, 100])
+@pytest.mark.parametrize(
+ ["min_epochs", "max_epochs", "min_steps", "max_steps"],
+ [
+ pytest.param(None, 5, None, None),
+ pytest.param(None, None, None, 100),
+ pytest.param(None, 5, None, 100),
+ pytest.param(None, None, 10, 100),
+ pytest.param(1, 5, None, None),
+ pytest.param(1, None, None, 100),
+ pytest.param(None, 5, 10, None),
+ ],
+)
+def test_min_max_steps_epochs(tmpdir, min_epochs, max_epochs, min_steps, max_steps):
+ """
+ Tests that max_steps can be used without max_epochs
+ """
+ model = BoringModel()
+
+ trainer = Trainer(
+ default_root_dir=tmpdir,
+ min_epochs=min_epochs,
+ max_epochs=max_epochs,
+ min_steps=min_steps,
+ max_steps=max_steps,
+ weights_summary=None,
+ )
+
+ result = trainer.fit(model)
+ assert result == 1, "Training did not complete"
+
+ # check training stopped at max_epochs or max_steps
+ if trainer.max_steps and not trainer.max_epochs:
+ assert trainer.global_step == trainer.max_steps
diff --git a/tests/trainer/test_trainer_tricks.py b/tests/trainer/test_trainer_tricks.py
@@ -286,7 +286,7 @@ def test_auto_scale_batch_size_duplicate_attribute_warning(tmpdir):
  model = EvalModelTemplate(**hparams)
  model.hparams = hparams
  # now we have model.batch_size and model.hparams.batch_size
- trainer = Trainer(default_root_dir=tmpdir, max_steps=1, auto_scale_batch_size=True)
+ trainer = Trainer(default_root_dir=tmpdir, max_steps=1, max_epochs=1000, auto_scale_batch_size=True)
  expected_message = "Field `model.batch_size` and `model.hparams.batch_size` are mutually exclusive!"
  with pytest.warns(UserWarning, match=expected_message):
  trainer.tune(model)