update CHANGELOG and fix bugs in ddp and ddp_spawn

Lightning-AI · Aug 9, 2021 · 98fe2b0 · 98fe2b0
1 parent 63fb7ea
commit 98fe2b0
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 3 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -127,7 +127,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 
 - Fixed an issue with `training_step` outputs not getting collected correctly for `training_epoch_end` ([#8613](https://github.com/PyTorchLightning/pytorch-lightning/pull/8613))
-- Fixed avoid wrapping LightningModule in *DataParallel overrides when not fitting ([#6977]https://github.com/PyTorchLightning/pytorch-lightning/issues/6977)
 
 
 - Fixed save/load/resume from checkpoint for DeepSpeed Plugin (
@@ -141,6 +140,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Fixed an issue with logger outputs not being finalized correctly after prediction runs ([#8333](https://github.com/PyTorchLightning/pytorch-lightning/issues/8333))
 
+- Fixed avoid wrapping LightningModule in *DataParallel overrides when not fitting ([#6977]https://github.com/PyTorchLightning/pytorch-lightning/issues/6977). Specifically,
+    - Update `configure_ddp` function in `DDPPlugin`, `DDPSpawnPlugin`, `DDPShardedPlugin` and `DDPSpawnShardedPlugin` by checking the state of LightningModule and avoiding wrapping LihgningModule as *DataParallel when the state is not `TrainerFn.FITTING`.
+    - Update `validation_step` function in `DDPPlugin` and `DDPSpawnPlugin` to use LightningModule's `validation_step` function if `self.model` is not `DistributedDataParallel` instance.
+    - Update `test_step` and `prediction_step` functions in  `DDPPlugin` and `DDPSpawnPlugin` to use  LightningModule's `*_step` functions directly.
+
 
 ## [1.4.0] - 2021-07-27
 

diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py
@@ -305,10 +305,10 @@ def configure_ddp(self) -> None:
         if trainer_fn != TrainerFn.FITTING:
             rank_zero_debug(f"In {trainer_fn} stage: Skipping wrapping the model with DistributedDataParallel")
             return
+        self.pre_configure_ddp()
         self._model = DistributedDataParallel(
             LightningDistributedModule(self.model), device_ids=self.determine_ddp_device_ids(), **self._ddp_kwargs
         )
-        self.pre_configure_ddp()
         self._register_ddp_hooks()
 
     def determine_ddp_device_ids(self):

diff --git a/pytorch_lightning/plugins/training_type/ddp_spawn.py b/pytorch_lightning/plugins/training_type/ddp_spawn.py
@@ -254,10 +254,10 @@ def configure_ddp(self) -> None:
         if trainer_fn != TrainerFn.FITTING:
             rank_zero_debug(f"In {trainer_fn} stage: Skipping wrapping the model with DistributedDataParallel")
             return
+        self.pre_configure_ddp()
         self._model = DistributedDataParallel(
             LightningDistributedModule(self.model), device_ids=self.determine_ddp_device_ids(), **self._ddp_kwargs
         )
-        self.pre_configure_ddp()
         self._register_ddp_hooks()
 
     def init_ddp_connection(self, global_rank: Optional[int], world_size: Optional[int]) -> None: