Lightning-AI · carmocca · Aug 4, 2021 · Jul 23, 2021 · Jul 23, 2021 · Jul 23, 2021
@@ -338,6 +338,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Return the `default_root_dir` as the `log_dir` when the logger is a `LoggerCollection` ([#8187](https://github.com/PyTorchLightning/pytorch-lightning/pull/8187))
 
 
+- The `trainer.lightning_module` reference is now properly set at the very beginning of the run ([#8536](https://github.com/PyTorchLightning/pytorch-lightning/pull/8536))
+
+
+- The data-loading Trainer functions `model` argument is now optional ([#8536](https://github.com/PyTorchLightning/pytorch-lightning/pull/8536))
+
+
 ### Deprecated
 
 
@@ -434,6 +440,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Removed environment variable `PL_EXP_VERSION` from DDP subprocesses ([7403](https://github.com/PyTorchLightning/pytorch-lightning/pull/7403))
 
 
+- The accelerator and training type plugin `setup` hook no longer have a `model` argument ([#8536](https://github.com/PyTorchLightning/pytorch-lightning/pull/8536))
+
+
 ### Fixed
 
 - Fixed the `GPUStatsMonitor` callbacks to use the correct GPU IDs if `CUDA_VISIBLE_DEVICES` set ([#8260](https://github.com/PyTorchLightning/pytorch-lightning/pull/8260))

diff --git a/docs/source/governance.rst b/docs/source/governance.rst
@@ -39,7 +39,7 @@ Board
 
 Alumni
 ------
-- Jeff Yang (`ydcjeff <https://github.com/ydcjeff>`_)   
+- Jeff Yang (`ydcjeff <https://github.com/ydcjeff>`_)
 - Jeff Ling (`jeffling <https://github.com/jeffling>`_)
 - Teddy Koker (`teddykoker <https://github.com/teddykoker>`_)
 - Nate Raw (`nateraw <https://github.com/nateraw>`_)

@@ -79,15 +79,14 @@ def setup_environment(self) -> None:
         """
         self.training_type_plugin.setup_environment()
 
-    def setup(self, trainer: 'pl.Trainer', model: 'pl.LightningModule') -> None:
+    def setup(self, trainer: 'pl.Trainer') -> None:
         """
         Setup plugins for the trainer fit and creates optimizers.
 
         Args:
             trainer: the trainer instance
-            model: the LightningModule
         """
-        self.setup_training_type_plugin(model)
+        self.setup_training_type_plugin()
         if not self.training_type_plugin.setup_optimizers_in_pre_dispatch:
             self.setup_optimizers(trainer)
         self.setup_precision_plugin()
@@ -349,9 +348,9 @@ def setup_optimizers(self, trainer: 'pl.Trainer') -> None:
         self.lr_schedulers = lr_schedulers
         self.optimizer_frequencies = optimizer_frequencies
 
-    def setup_training_type_plugin(self, model: 'pl.LightningModule') -> None:
+    def setup_training_type_plugin(self) -> None:
         """Attaches the training type plugin to the accelerator."""
-        self.training_type_plugin.setup(model)
+        self.training_type_plugin.setup()
 
     def setup_precision_plugin(self) -> None:
         """Attaches the precision plugin to the accelerator"""
@@ -464,7 +463,7 @@ def model_sharded_context(self) -> Generator[None, None, None]:
             yield
 
     # todo: remove in v1.5
-    def connect_training_type_plugin(self, plugin: TrainingTypePlugin, model: 'pl.LightningModule') -> None:
+    def connect_training_type_plugin(self, plugin: TrainingTypePlugin, model: 'pl.LightningModule') -> None:  # noqa
         """
         Attaches the training type plugin to the accelerator.
         Also transfers ownership of the model to this plugin
@@ -476,7 +475,7 @@ def connect_training_type_plugin(self, plugin: TrainingTypePlugin, model: 'pl.Li
             'Accelerator method `connect_training_type_plugin` was deprecated in v1.3.'
             ' It will be removed in v1.5.'
         )
-        self.setup_training_type_plugin(model)
+        self.setup_training_type_plugin()
 
     # todo: remove in v1.5
     def connect_precision_plugin(self, plugin: PrecisionPlugin) -> None:

@@ -20,7 +20,7 @@
 class CPUAccelerator(Accelerator):
     """ Accelerator for CPU devices. """
 
-    def setup(self, trainer: 'pl.Trainer', model: 'pl.LightningModule') -> None:
+    def setup(self, trainer: 'pl.Trainer') -> None:
         """
         Raises:
             MisconfigurationException:
@@ -36,4 +36,4 @@ def setup(self, trainer: 'pl.Trainer', model: 'pl.LightningModule') -> None:
         if "cpu" not in str(self.root_device):
             raise MisconfigurationException(f"Device should be CPU, got {self.root_device} instead.")
 
-        return super().setup(trainer, model)
+        return super().setup(trainer)
@@ -32,14 +32,14 @@ def setup_environment(self) -> None:
             raise MisconfigurationException(f"Device should be GPU, got {self.root_device} instead")
         torch.cuda.set_device(self.root_device)
 
-    def setup(self, trainer: 'pl.Trainer', model: 'pl.LightningModule') -> None:
+    def setup(self, trainer: 'pl.Trainer') -> None:
         """
         Raises:
             MisconfigurationException:
                 If the selected device is not GPU.
         """
         self.set_nvidia_flags(trainer.local_rank)
-        return super().setup(trainer, model)
+        return super().setup(trainer)
 
     def on_train_start(self) -> None:
         # clear cache before training

@@ -32,7 +32,7 @@
 class TPUAccelerator(Accelerator):
     """ Accelerator for TPU devices. """
 
-    def setup(self, trainer: 'pl.Trainer', model: 'pl.LightningModule') -> None:
+    def setup(self, trainer: 'pl.Trainer') -> None:
         """
         Raises:
             MisconfigurationException:
@@ -45,7 +45,7 @@ def setup(self, trainer: 'pl.Trainer', model: 'pl.LightningModule') -> None:
 
         if not isinstance(self.training_type_plugin, (SingleTPUPlugin, TPUSpawnPlugin)):
             raise MisconfigurationException("TPUs only support a single tpu core or tpu spawn training.")
-        return super().setup(trainer, model)
+        return super().setup(trainer)
 
     def run_optimizer_step(
         self, optimizer: Optimizer, optimizer_idx: int, lambda_closure: Callable, **kwargs: Any

@@ -169,11 +169,10 @@ def get_max_batches(self) -> List[Union[int, float]]:
 
     def reload_evaluation_dataloaders(self) -> None:
         """Reloads dataloaders if necessary"""
-        model = self.trainer.lightning_module
         if self.trainer.testing:
-            self.trainer.reset_test_dataloader(model)
+            self.trainer.reset_test_dataloader()
         elif self.trainer.val_dataloaders is None or self.trainer._should_reload_dl_epoch:
-            self.trainer.reset_val_dataloader(model)
+            self.trainer.reset_val_dataloader()
 
     def on_evaluation_start(self, *args: Any, **kwargs: Any) -> None:
         """Runs ``on_{validation/test}_start`` hooks"""

@@ -29,8 +29,7 @@ def global_rank(self) -> int:
     def world_size(self) -> int:
         return self.num_nodes
 
-    def setup(self, model):
-        self._model = model
+    def setup(self) -> None:
         # set the task idx
         self.task_idx = self.cluster_environment.local_rank()
         # the difference to DDP is that we don't call children processes here

@@ -136,7 +136,7 @@ def distributed_sampler_kwargs(self):
     def _is_single_process_single_device(self):
         return True
 
-    def setup(self, model):
+    def setup(self) -> None:
         os.environ["MASTER_PORT"] = str(self.cluster_environment.master_port())
         # pass in a state q
         smp = mp.get_context("spawn")

@@ -48,10 +48,10 @@ def node_rank(self) -> int:
     def world_size(self) -> int:
         return 1
 
-    def setup(self, model):
+    def setup(self) -> None:
         # model needs to be moved to the device before it is wrapped
-        model.to(self.root_device)
-        self._model = DataParallel(LightningParallelModule(model), self.parallel_devices)
+        self.model_to_device()
+        self._model = DataParallel(LightningParallelModule(self._model), self.parallel_devices)
 
     def reduce(self, collection: _METRIC_COLLECTION, *args, **kwargs) -> _METRIC_COLLECTION:
         """
@@ -76,9 +76,8 @@ def mean(t: torch.Tensor) -> torch.Tensor:
     def root_device(self):
         return self.parallel_devices[0]
 
-    def model_to_device(self):
-        # no need to do anything when model is wrapped in torch.nn.DataParallel
-        pass
+    def model_to_device(self) -> None:
+        self._model.to(self.root_device)
 
     def barrier(self, *args, **kwargs):
         pass

@@ -58,8 +58,7 @@ def distributed_sampler_kwargs(self):
         distributed_sampler_kwargs = dict(num_replicas=self.world_size, rank=self.global_rank)
         return distributed_sampler_kwargs
 
-    def setup(self, model):
-        self._model = model
+    def setup(self) -> None:
         self.model_to_device()
 
     def pre_dispatch(self):

@@ -63,9 +63,8 @@ def root_device(self) -> torch.device:
     def model_to_device(self) -> None:
         self._model.to(self.root_device)
 
-    def setup(self, model: torch.nn.Module) -> torch.nn.Module:
+    def setup(self) -> None:
         self.model_to_device()
-        return self.model
 
     @property
     def is_global_zero(self) -> bool:

@@ -112,9 +112,8 @@ def pre_dispatch(self):
         if self.debug:
             os.environ["PT_XLA_DEBUG"] = str(1)
 
-    def setup(self, model: Module) -> Module:
+    def setup(self) -> None:
         self.create_mp_queue()
-        return self.model
 
     def create_mp_queue(self):
         self.start_method = 'fork'

@@ -54,7 +54,7 @@ def setup_environment(self) -> None:
         which allows the user to access the accelerator environment before setup is complete.
         """
 
-    def setup(self, model: Module) -> None:
+    def setup(self) -> None:
         """Called by the accelerator to finish setup."""
 
     @property

@@ -36,20 +36,20 @@ class TrainerCallbackHookMixin(ABC):
     callbacks: List[Callback] = []
     lightning_module: 'pl.LightningModule'
 
-    def on_before_accelerator_backend_setup(self, model: 'pl.LightningModule') -> None:
+    def on_before_accelerator_backend_setup(self) -> None:
         """Called at the beginning of fit (train + validate), validate, test, or predict, or tune."""
         for callback in self.callbacks:
-            callback.on_before_accelerator_backend_setup(self, model)
+            callback.on_before_accelerator_backend_setup(self, self.lightning_module)
 
-    def configure_sharded_model(self, model: 'pl.LightningModule') -> None:
+    def on_configure_sharded_model(self) -> None:
         """Called at the beginning of fit (train + validate), validate, test, or predict, or tune."""
         for callback in self.callbacks:
-            callback.on_configure_sharded_model(self, model)
+            callback.on_configure_sharded_model(self, self.lightning_module)
 
-    def setup(self, model: 'pl.LightningModule', stage: Optional[str]) -> None:
+    def setup(self, stage: Optional[str]) -> None:
         """Called at the beginning of fit (train + validate), validate, test, or predict, or tune."""
         for callback in self.callbacks:
-            callback.setup(self, model, stage=stage)
+            callback.setup(self, self.lightning_module, stage=stage)
 
     def teardown(self, stage: Optional[str] = None) -> None:
         """Called at the end of fit (train + validate), validate, test, or predict, or tune."""

@@ -15,7 +15,6 @@
 from datetime import timedelta
 from typing import Dict, List, Optional, Union
 
-import pytorch_lightning as pl
 from pytorch_lightning.callbacks import Callback, ModelCheckpoint, ProgressBar, ProgressBarBase
 from pytorch_lightning.callbacks.timer import Timer
 from pytorch_lightning.utilities import rank_zero_info
@@ -136,25 +135,19 @@ def attach_model_logging_functions(self, model):
             callback.log = model.log
             callback.log_dict = model.log_dict
 
-    @staticmethod
-    def _attach_model_callbacks(model: 'pl.LightningModule', trainer) -> None:
+    def _attach_model_callbacks(self) -> None:
         """
         Attaches the callbacks defined in the model.
         If a callback returned by the model's configure_callback method has the same type as one or several
         callbacks already present in the trainer callbacks list, it will replace them.
         In addition, all :class:`~pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint` callbacks
         will be pushed to the end of the list, ensuring they run last.
-
-        Args:
-            model: A model which may or may not define new callbacks in
-                :meth:`~pytorch_lightning.core.lightning.LightningModule.configure_callbacks`.
-            trainer: The trainer on which the callbacks get attached/merged.
         """
-        model_callbacks = model.configure_callbacks()
+        model_callbacks = self.trainer.model.configure_callbacks()
         if not model_callbacks:
             return
         model_callback_types = {type(c) for c in model_callbacks}
-        trainer_callback_types = {type(c) for c in trainer.callbacks}
+        trainer_callback_types = {type(c) for c in self.trainer.callbacks}
         override_types = model_callback_types.intersection(trainer_callback_types)
         if override_types:
             rank_zero_info(
@@ -163,11 +156,11 @@ def _attach_model_callbacks(model: 'pl.LightningModule', trainer) -> None:
                 f" {', '.join(sorted(t.__name__ for t in override_types))}"
             )
         # remove all callbacks with a type that occurs in model callbacks
-        all_callbacks = [c for c in trainer.callbacks if type(c) not in override_types]
+        all_callbacks = [c for c in self.trainer.callbacks if type(c) not in override_types]
         all_callbacks.extend(model_callbacks)
         all_callbacks = CallbackConnector._reorder_callbacks(all_callbacks)
         # TODO: connectors refactor: move callbacks list to connector and do not write Trainer state
-        trainer.callbacks = all_callbacks
+        self.trainer.callbacks = all_callbacks
 
     @staticmethod
     def _reorder_callbacks(callbacks: List[Callback]) -> List[Callback]:

@@ -67,13 +67,13 @@ def get_profiled_train_dataloader(self, train_dataloader):
         )
         return profiled_dl
 
-    def prepare_data(self, model):
+    def prepare_data(self) -> None:
         # on multi-gpu jobs we only want to manipulate (download, etc) on node_rank=0, local_rank=0
         # or in the case where each node needs to do its own manipulation in which case just local_rank=0
         if self.can_prepare_data():
             if self.trainer.datamodule is not None:
                 self.trainer.datamodule.prepare_data()
-            model.prepare_data()
+            self.trainer.lightning_module.prepare_data()
             self.trainer._is_data_prepared = True
 
     def can_prepare_data(self):