From a96ff632f2b8e76ed94dd7555c2c11e13291fe67 Mon Sep 17 00:00:00 2001
From: tchaton <thomas@grid.ai>
Date: Mon, 18 Oct 2021 14:09:54 +0100
Subject: [PATCH 01/14] update

---
 .../pytorch_2_lite_2_lightning.py             | 232 ++++++++++++++++++
 .../lite_examples/simple/mnist_example.py     |  13 +
 pytorch_lightning/lite/lite.py                |   9 +-
 pytorch_lightning/lite/wrappers.py            |   4 +-
 4 files changed, 251 insertions(+), 7 deletions(-)
 create mode 100644 pl_examples/lite_examples/pytorch_2_lite_2_lightning.py

diff --git a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
new file mode 100644
index 0000000000000..694ff729fc6aa
--- /dev/null
+++ b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
@@ -0,0 +1,232 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+from torch import nn
+from torch.utils.data import DataLoader, Dataset
+
+from pytorch_lightning import seed_everything
+from pytorch_lightning.lite import LightningLite
+
+#############################################################################################
+#                        Section 1: PyTorch to Lightning Lite                               #
+#                                                                                           #
+#                               What is LightningLite ?                                     #
+#                                                                                           #
+# `LightningLite` is a python class you can override to get access to Lightning             #
+# accelerators and scale your training, but furthermore, it is intentend to be the safe     #
+# route to fully transition to Lightning.                                                   #
+#                                                                                           #
+#                         Does LightningLite requires code changes ?                        #
+#                                                                                           #
+# `LightningLite` code changes are minimal and this tutorial will show you easy it is to    #
+# convert using a BoringModel to `LightningLite`.                                           #
+#                                                                                           #
+#############################################################################################
+
+#############################################################################################
+#                               Pure PyTorch Section                                        #
+#############################################################################################
+
+
+# 1 / 6: Implement a BoringModel with only one layer.
+class BoringModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.layer = torch.nn.Linear(32, 2)
+
+    def forward(self, x):
+        x = self.layer(x)
+        return torch.nn.functional.mse_loss(x, torch.ones_like(x))
+
+
+# 2 / 6: Implement a `configure_optimizers` taking a  and returning an optimizer
+
+
+def configure_optimizers(module: nn.Module):
+    return torch.optim.SGD(module.parameters(), lr=0.001)
+
+
+# 3 / 6: Implement a simple dataset returning random data with the specificed shape
+
+
+class RandomDataset(Dataset):
+    def __init__(self, length: int, size: int):
+        self.len = length
+        self.data = torch.randn(length, size)
+
+    def __getitem__(self, index):
+        return self.data[index]
+
+    def __len__(self):
+        return self.len
+
+
+# 4 / 6: Implement the functions to create the dataloaders.
+
+
+def train_dataloader():
+    return DataLoader(RandomDataset(64, 32))
+
+
+def val_dataloader():
+    return DataLoader(RandomDataset(64, 32))
+
+
+# 5 / 6: Our main PyTorch Loop to train our `BoringModel` on our random data.
+
+
+def main(model: nn.Module, train_dataloader: DataLoader, val_dataloader: DataLoader, num_epochs: int = 10):
+    optimizer = configure_optimizers(model)
+
+    for epoch in range(num_epochs):
+        train_losses = []
+        val_losses = []
+
+        for batch in train_dataloader:
+            optimizer.zero_grad()
+            loss = model(batch)
+            train_losses.append(loss)
+            loss.backward()
+            optimizer.step()
+
+        for batch in val_dataloader:
+            val_losses.append(model(batch))
+
+        train_epoch_loss = torch.stack(train_losses).mean()
+        val_epoch_loss = torch.stack(val_losses).mean()
+
+        print(f"{epoch}/{num_epochs}| Train Epoch Loss: {torch.mean(train_epoch_loss)}")
+        print(f"{epoch}/{num_epochs}| Valid Epoch Loss: {torch.mean(val_epoch_loss)}")
+
+    return model.state_dict()
+
+
+# 6 / 6: Run the pure PyTorch Loop and train / validate the model.
+seed_everything(42)
+model = BoringModel()
+pure_model_weights = main(model, train_dataloader(), val_dataloader())
+
+
+#############################################################################################
+#                                 Convert to LightningLite                                  #
+#                                                                                           #
+# By converting the `LightningLite`, you get the full power of Lightning accelerators       #
+# while conversing your original code !                                                     #
+# To get started, you would need to `from pytorch_lightning.lite import LightningLite`      #
+# and override its `run` method.                                                            #
+#############################################################################################
+
+
+class LiteTrainer(LightningLite):
+    def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: DataLoader, num_epochs: int = 10):
+        optimizer = configure_optimizers(model)
+
+        ##################################################################
+        # You would need to call `self.setup` to wrap `model`            #
+        # and `optimizer`. If you have multiple models (c.f GAN),        #
+        # call `setup` for each one of them and their associated         #
+        # optimizers                                                     #
+        model, optimizer = self.setup(model=model, optimizers=optimizer) #
+        ##################################################################
+
+        for epoch in range(num_epochs):
+            train_losses = []
+            val_losses = []
+
+            for batch in train_dataloader:
+                optimizer.zero_grad()
+                loss = model(batch)
+                train_losses.append(loss)
+                ##################################################################
+                # By calling `self.backward` directly, `LightningLite` will      #
+                # automate precision and distributions.                          #
+                self.backward(loss)  #                                           #
+                ##################################################################
+                optimizer.step()
+
+            for batch in val_dataloader:
+                val_losses.append(model(batch))
+
+            train_epoch_loss = torch.stack(train_losses).mean()
+            val_epoch_loss = torch.stack(val_losses).mean()
+
+            #######################################################################################
+            # Optional: Utility to print only one rank 0 (when using distributed setting )        #
+            self.print(f"{epoch}/{num_epochs}| Train Epoch Loss: {torch.mean(train_epoch_loss)}")  #
+            self.print(f"{epoch}/{num_epochs}| Valid Epoch Loss: {torch.mean(val_epoch_loss)}")  #
+            #######################################################################################
+
+        return model.state_dict()
+
+
+seed_everything(42)
+model = BoringModel()
+lite = LiteTrainer()
+lite_model_weights = lite.run(model, train_dataloader(), val_dataloader())
+
+#############################################################################################
+#                           Assert the weights are the same                                 #
+#############################################################################################
+
+for pure_w, lite_w in zip(pure_model_weights.values(), lite_model_weights.values()):
+    torch.equal(pure_w, lite_w)
+
+
+#############################################################################################
+#                                 Convert to Lightning                                      #
+#                                                                                           #
+# By converting to Lightning, non-only your research code becomes inter-operable            #
+# (can easily be shared), but you get access to hundreds of extra features to make your     #
+# research faster.                                                                          #
+#############################################################################################
+
+from pytorch_lightning import LightningDataModule, LightningModule, Trainer  # noqa E402
+
+
+class LightningBoringModel(LightningModule, BoringModel):
+    def training_step(self, batch, batch_idx):
+        x = self.forward(batch)
+        self.log("train_loss", x)
+        return x
+
+    def validation_step(self, batch, batch_idx):
+        x = self.forward(batch)
+        self.log("val_loss", x)
+        return x
+
+    def configure_optimizers(self):
+        return configure_optimizers(self)
+
+
+class BoringDataModule(LightningDataModule):
+    def train_dataloader(self):
+        return train_dataloader()
+
+    def val_dataloader(self):
+        return val_dataloader()
+
+
+seed_everything(42)
+lightning_module = LightningBoringModel()
+datamodule = BoringDataModule()
+trainer = Trainer(max_epochs=10)
+trainer.fit(lightning_module, datamodule)
+
+
+#############################################################################################
+#                           Assert the weights are the same                                 #
+#############################################################################################
+
+for pure_w, lite_w in zip(pure_model_weights.values(), lightning_module.state_dict().values()):
+    torch.equal(pure_w, lite_w)
diff --git a/pl_examples/lite_examples/simple/mnist_example.py b/pl_examples/lite_examples/simple/mnist_example.py
index 1d9cb715c4137..afbf50c7f778a 100644
--- a/pl_examples/lite_examples/simple/mnist_example.py
+++ b/pl_examples/lite_examples/simple/mnist_example.py
@@ -1,3 +1,16 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import argparse
 
 import torch
diff --git a/pytorch_lightning/lite/lite.py b/pytorch_lightning/lite/lite.py
index c613d6744ab89..c226d0a271dcb 100644
--- a/pytorch_lightning/lite/lite.py
+++ b/pytorch_lightning/lite/lite.py
@@ -23,12 +23,12 @@
 import torch.nn as nn
 from torch import Tensor
 from torch.optim import Optimizer
-from torch.utils.data import DataLoader, DistributedSampler, RandomSampler, Sampler, SequentialSampler
+from torch.utils.data import DataLoader, DistributedSampler, RandomSampler, SequentialSampler
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.accelerators import Accelerator, TPUAccelerator
 from pytorch_lightning.lite.wrappers import _LiteDataLoader, _LiteModule, _LiteOptimizer
-from pytorch_lightning.plugins import DDPSpawnPlugin, DeepSpeedPlugin, PLUGIN_INPUT, TrainingTypePlugin
+from pytorch_lightning.plugins import DDPSpawnPlugin, PLUGIN_INPUT, TrainingTypePlugin
 from pytorch_lightning.trainer.connectors.accelerator_connector import AcceleratorConnector
 from pytorch_lightning.trainer.data_loading import TrainerDataLoadingMixin
 from pytorch_lightning.utilities import DeviceType, DistributedType, move_data_to_device
@@ -129,7 +129,7 @@ def world_size(self) -> int:
         return getattr(self._strategy, "world_size", 1)
 
     @abstractmethod
-    def run(self, *args: Any, **kwargs: Any) -> None:
+    def run(self, *args: Any, **kwargs: Any) -> Any:
         """All the code inside this run method gets accelerated by Lite.
 
         Args:
@@ -300,8 +300,7 @@ def _run_impl(self, run_method: Callable, *args: Any, **kwargs: Any) -> None:
         if isinstance(self._strategy, DDPSpawnPlugin):
             self._strategy.spawn(run_method, *args, **kwargs)
         else:
-            run_method(*args, **kwargs)
-        # TODO: any teardown needed here?
+            return run_method(*args, **kwargs)
 
     def _setup_model_and_optimizers(
         self,
diff --git a/pytorch_lightning/lite/wrappers.py b/pytorch_lightning/lite/wrappers.py
index ad2804469428b..60d9ccc7c1e2e 100644
--- a/pytorch_lightning/lite/wrappers.py
+++ b/pytorch_lightning/lite/wrappers.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Callable, Optional
+from typing import Any, Callable, Generator, Iterator, Optional, Union
 
 import torch
 from torch import nn as nn
@@ -89,7 +89,7 @@ def __init__(self, device: Optional[torch.device] = None, **dl_kwargs: Any) -> N
         super().__init__(**dl_kwargs)
         self._device = device
 
-    def __iter__(self) -> Union[Iterator[Any], Generator[Any, None, None]]
+    def __iter__(self) -> Union[Iterator[Any], Generator[Any, None, None]]:
         iterator = super().__iter__()
         if self._device is None:
             return iterator

From 617f63877111402f94845ccc731eb870ff76d872 Mon Sep 17 00:00:00 2001
From: tchaton <thomas@grid.ai>
Date: Mon, 18 Oct 2021 14:10:03 +0100
Subject: [PATCH 02/14] update

---
 pl_examples/lite_examples/pytorch_2_lite_2_lightning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
index 694ff729fc6aa..5dfa6c0f406bd 100644
--- a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
+++ b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
@@ -137,7 +137,7 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: Da
         # and `optimizer`. If you have multiple models (c.f GAN),        #
         # call `setup` for each one of them and their associated         #
         # optimizers                                                     #
-        model, optimizer = self.setup(model=model, optimizers=optimizer) #
+        model, optimizer = self.setup(model=model, optimizers=optimizer)  #
         ##################################################################
 
         for epoch in range(num_epochs):

From c303bee9bf3b11f1b9ce9eed176cf7dadb7116ac Mon Sep 17 00:00:00 2001
From: tchaton <thomas@grid.ai>
Date: Mon, 18 Oct 2021 14:33:50 +0100
Subject: [PATCH 03/14] update

---
 .../pytorch_2_lite_2_lightning.py             | 23 ++++++++++++++-----
 pytorch_lightning/lite/lite.py                |  2 +-
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
index 5dfa6c0f406bd..ae74eebfc65ca 100644
--- a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
+++ b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
@@ -167,19 +167,17 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: Da
             self.print(f"{epoch}/{num_epochs}| Valid Epoch Loss: {torch.mean(val_epoch_loss)}")  #
             #######################################################################################
 
-        return model.state_dict()
-
 
 seed_everything(42)
-model = BoringModel()
+lite_model = BoringModel()
 lite = LiteTrainer()
-lite_model_weights = lite.run(model, train_dataloader(), val_dataloader())
+lite.run(lite_model, train_dataloader(), val_dataloader())
 
 #############################################################################################
 #                           Assert the weights are the same                                 #
 #############################################################################################
 
-for pure_w, lite_w in zip(pure_model_weights.values(), lite_model_weights.values()):
+for pure_w, lite_w in zip(pure_model_weights.values(), lite_model.state_dict().values()):
     torch.equal(pure_w, lite_w)
 
 
@@ -194,7 +192,18 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: Da
 from pytorch_lightning import LightningDataModule, LightningModule, Trainer  # noqa E402
 
 
-class LightningBoringModel(LightningModule, BoringModel):
+class LightningBoringModel(LightningModule):
+    def __init__(self):
+        super().__init__()
+        self.layer = torch.nn.Linear(32, 2)
+
+    def forward(self, x):
+        x = self.layer(x)
+        return torch.nn.functional.mse_loss(x, torch.ones_like(x))
+
+    #############################################################################################
+    #                                 LightningModule hooks                                     #
+    #
     def training_step(self, batch, batch_idx):
         x = self.forward(batch)
         self.log("train_loss", x)
@@ -208,6 +217,8 @@ def validation_step(self, batch, batch_idx):
     def configure_optimizers(self):
         return configure_optimizers(self)
 
+    #############################################################################################
+
 
 class BoringDataModule(LightningDataModule):
     def train_dataloader(self):
diff --git a/pytorch_lightning/lite/lite.py b/pytorch_lightning/lite/lite.py
index c226d0a271dcb..4dbf530d14f3f 100644
--- a/pytorch_lightning/lite/lite.py
+++ b/pytorch_lightning/lite/lite.py
@@ -300,7 +300,7 @@ def _run_impl(self, run_method: Callable, *args: Any, **kwargs: Any) -> None:
         if isinstance(self._strategy, DDPSpawnPlugin):
             self._strategy.spawn(run_method, *args, **kwargs)
         else:
-            return run_method(*args, **kwargs)
+            run_method(*args, **kwargs)
 
     def _setup_model_and_optimizers(
         self,

From 13f46860c92828989c7d5cbb9eb77bae8fa4619c Mon Sep 17 00:00:00 2001
From: Kaushik B <45285388+kaushikb11@users.noreply.github.com>
Date: Mon, 18 Oct 2021 19:13:01 +0530
Subject: [PATCH 04/14] Update
 pl_examples/lite_examples/pytorch_2_lite_2_lightning.py

---
 pl_examples/lite_examples/pytorch_2_lite_2_lightning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
index ae74eebfc65ca..0b95fb6838602 100644
--- a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
+++ b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
@@ -162,7 +162,7 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: Da
             val_epoch_loss = torch.stack(val_losses).mean()
 
             #######################################################################################
-            # Optional: Utility to print only one rank 0 (when using distributed setting )        #
+            # Optional: Utility to print only on rank 0 (when using distributed setting )        #
             self.print(f"{epoch}/{num_epochs}| Train Epoch Loss: {torch.mean(train_epoch_loss)}")  #
             self.print(f"{epoch}/{num_epochs}| Valid Epoch Loss: {torch.mean(val_epoch_loss)}")  #
             #######################################################################################

From cbcf7b5b6947deba98cc40c3d945ee0315b7edf2 Mon Sep 17 00:00:00 2001
From: tchaton <thomas@grid.ai>
Date: Mon, 18 Oct 2021 15:22:42 +0100
Subject: [PATCH 05/14] update on comments

---
 .../pytorch_2_lite_2_lightning.py             | 72 +++++++++----------
 1 file changed, 35 insertions(+), 37 deletions(-)

diff --git a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
index ae74eebfc65ca..3da638582f4d0 100644
--- a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
+++ b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
@@ -24,13 +24,13 @@
 #                               What is LightningLite ?                                     #
 #                                                                                           #
 # `LightningLite` is a python class you can override to get access to Lightning             #
-# accelerators and scale your training, but furthermore, it is intentend to be the safe     #
+# accelerators and scale your training, but furthermore, it is intended to be the safest    #
 # route to fully transition to Lightning.                                                   #
 #                                                                                           #
 #                         Does LightningLite requires code changes ?                        #
 #                                                                                           #
-# `LightningLite` code changes are minimal and this tutorial will show you easy it is to    #
-# convert using a BoringModel to `LightningLite`.                                           #
+# `LightningLite` code changes are minimal and this tutorial will show you how easy it      #
+# is to convert to `lite` using a `BoringModel`.                                            #
 #                                                                                           #
 #############################################################################################
 
@@ -39,7 +39,7 @@
 #############################################################################################
 
 
-# 1 / 6: Implement a BoringModel with only one layer.
+# 1 / 6: Implement a `BoringModel` with only one layer.
 class BoringModel(nn.Module):
     def __init__(self):
         super().__init__()
@@ -50,16 +50,12 @@ def forward(self, x):
         return torch.nn.functional.mse_loss(x, torch.ones_like(x))
 
 
-# 2 / 6: Implement a `configure_optimizers` taking a  and returning an optimizer
-
-
+# 2 / 6: Implement a `configure_optimizers` taking a module and returning an optimizer.
 def configure_optimizers(module: nn.Module):
     return torch.optim.SGD(module.parameters(), lr=0.001)
 
 
-# 3 / 6: Implement a simple dataset returning random data with the specificed shape
-
-
+# 3 / 6: Implement a simple dataset returning random data with the specified shape.
 class RandomDataset(Dataset):
     def __init__(self, length: int, size: int):
         self.len = length
@@ -73,8 +69,6 @@ def __len__(self):
 
 
 # 4 / 6: Implement the functions to create the dataloaders.
-
-
 def train_dataloader():
     return DataLoader(RandomDataset(64, 32))
 
@@ -84,8 +78,6 @@ def val_dataloader():
 
 
 # 5 / 6: Our main PyTorch Loop to train our `BoringModel` on our random data.
-
-
 def main(model: nn.Module, train_dataloader: DataLoader, val_dataloader: DataLoader, num_epochs: int = 10):
     optimizer = configure_optimizers(model)
 
@@ -93,6 +85,7 @@ def main(model: nn.Module, train_dataloader: DataLoader, val_dataloader: DataLoa
         train_losses = []
         val_losses = []
 
+        model.train()
         for batch in train_dataloader:
             optimizer.zero_grad()
             loss = model(batch)
@@ -100,8 +93,10 @@ def main(model: nn.Module, train_dataloader: DataLoader, val_dataloader: DataLoa
             loss.backward()
             optimizer.step()
 
-        for batch in val_dataloader:
-            val_losses.append(model(batch))
+        model.eval()
+        with torch.no_grad():
+            for batch in val_dataloader:
+                val_losses.append(model(batch))
 
         train_epoch_loss = torch.stack(train_losses).mean()
         val_epoch_loss = torch.stack(val_losses).mean()
@@ -121,7 +116,7 @@ def main(model: nn.Module, train_dataloader: DataLoader, val_dataloader: DataLoa
 #############################################################################################
 #                                 Convert to LightningLite                                  #
 #                                                                                           #
-# By converting the `LightningLite`, you get the full power of Lightning accelerators       #
+# By converting to `LightningLite`, you get the full power of Lightning accelerators       #
 # while conversing your original code !                                                     #
 # To get started, you would need to `from pytorch_lightning.lite import LightningLite`      #
 # and override its `run` method.                                                            #
@@ -137,13 +132,14 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: Da
         # and `optimizer`. If you have multiple models (c.f GAN),        #
         # call `setup` for each one of them and their associated         #
         # optimizers                                                     #
-        model, optimizer = self.setup(model=model, optimizers=optimizer)  #
+        model, optimizer = self.setup(model=model, optimizers=optimizer) #
         ##################################################################
 
         for epoch in range(num_epochs):
             train_losses = []
             val_losses = []
 
+            model.train()
             for batch in train_dataloader:
                 optimizer.zero_grad()
                 loss = model(batch)
@@ -151,20 +147,22 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: Da
                 ##################################################################
                 # By calling `self.backward` directly, `LightningLite` will      #
                 # automate precision and distributions.                          #
-                self.backward(loss)  #                                           #
+                self.backward(loss)                                              #
                 ##################################################################
                 optimizer.step()
 
-            for batch in val_dataloader:
-                val_losses.append(model(batch))
+            model.eval()
+            with torch.no_grad():
+                for batch in val_dataloader:
+                    val_losses.append(model(batch))
 
             train_epoch_loss = torch.stack(train_losses).mean()
             val_epoch_loss = torch.stack(val_losses).mean()
 
             #######################################################################################
             # Optional: Utility to print only one rank 0 (when using distributed setting )        #
-            self.print(f"{epoch}/{num_epochs}| Train Epoch Loss: {torch.mean(train_epoch_loss)}")  #
-            self.print(f"{epoch}/{num_epochs}| Valid Epoch Loss: {torch.mean(val_epoch_loss)}")  #
+            self.print(f"{epoch}/{num_epochs}| Train Epoch Loss: {torch.mean(train_epoch_loss)}") #
+            self.print(f"{epoch}/{num_epochs}| Valid Epoch Loss: {torch.mean(val_epoch_loss)}")   #
             #######################################################################################
 
 
@@ -203,20 +201,20 @@ def forward(self, x):
 
     #############################################################################################
     #                                 LightningModule hooks                                     #
-    #
-    def training_step(self, batch, batch_idx):
-        x = self.forward(batch)
-        self.log("train_loss", x)
-        return x
-
-    def validation_step(self, batch, batch_idx):
-        x = self.forward(batch)
-        self.log("val_loss", x)
-        return x
-
-    def configure_optimizers(self):
-        return configure_optimizers(self)
-
+    #                                                                                           #
+    def training_step(self, batch, batch_idx):                                                  #
+        x = self.forward(batch)                                                                 #
+        self.log("train_loss", x)                                                               #
+        return x                                                                                #
+                                                                                                #
+    def validation_step(self, batch, batch_idx):                                                #
+        x = self.forward(batch)                                                                 #
+        self.log("val_loss", x)                                                                 #
+        return x                                                                                #
+                                                                                                #
+    def configure_optimizers(self):                                                             #
+        return configure_optimizers(self)                                                       #
+    #                                                                                           #
     #############################################################################################
 
 

From a20fcc19aca4b4aac7a718be083abdc2432d0d96 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 18 Oct 2021 14:24:29 +0000
Subject: [PATCH 06/14] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../pytorch_2_lite_2_lightning.py             | 35 ++++++++++---------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
index 3da638582f4d0..659b2d8aa3000 100644
--- a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
+++ b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
@@ -132,7 +132,7 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: Da
         # and `optimizer`. If you have multiple models (c.f GAN),        #
         # call `setup` for each one of them and their associated         #
         # optimizers                                                     #
-        model, optimizer = self.setup(model=model, optimizers=optimizer) #
+        model, optimizer = self.setup(model=model, optimizers=optimizer)  #
         ##################################################################
 
         for epoch in range(num_epochs):
@@ -147,7 +147,7 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: Da
                 ##################################################################
                 # By calling `self.backward` directly, `LightningLite` will      #
                 # automate precision and distributions.                          #
-                self.backward(loss)                                              #
+                self.backward(loss)  #
                 ##################################################################
                 optimizer.step()
 
@@ -161,8 +161,8 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: Da
 
             #######################################################################################
             # Optional: Utility to print only one rank 0 (when using distributed setting )        #
-            self.print(f"{epoch}/{num_epochs}| Train Epoch Loss: {torch.mean(train_epoch_loss)}") #
-            self.print(f"{epoch}/{num_epochs}| Valid Epoch Loss: {torch.mean(val_epoch_loss)}")   #
+            self.print(f"{epoch}/{num_epochs}| Train Epoch Loss: {torch.mean(train_epoch_loss)}")  #
+            self.print(f"{epoch}/{num_epochs}| Valid Epoch Loss: {torch.mean(val_epoch_loss)}")  #
             #######################################################################################
 
 
@@ -202,18 +202,21 @@ def forward(self, x):
     #############################################################################################
     #                                 LightningModule hooks                                     #
     #                                                                                           #
-    def training_step(self, batch, batch_idx):                                                  #
-        x = self.forward(batch)                                                                 #
-        self.log("train_loss", x)                                                               #
-        return x                                                                                #
-                                                                                                #
-    def validation_step(self, batch, batch_idx):                                                #
-        x = self.forward(batch)                                                                 #
-        self.log("val_loss", x)                                                                 #
-        return x                                                                                #
-                                                                                                #
-    def configure_optimizers(self):                                                             #
-        return configure_optimizers(self)                                                       #
+    def training_step(self, batch, batch_idx):  #
+        x = self.forward(batch)  #
+        self.log("train_loss", x)  #
+        return x  #
+        #
+
+    def validation_step(self, batch, batch_idx):  #
+        x = self.forward(batch)  #
+        self.log("val_loss", x)  #
+        return x  #
+        #
+
+    def configure_optimizers(self):  #
+        return configure_optimizers(self)  #
+
     #                                                                                           #
     #############################################################################################
 

From 1618c1f4f46d7a21899d3ee33292fed4455c24d0 Mon Sep 17 00:00:00 2001
From: tchaton <thomas@grid.ai>
Date: Mon, 18 Oct 2021 15:42:47 +0100
Subject: [PATCH 07/14] typo

---
 pl_examples/lite_examples/pytorch_2_lite_2_lightning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
index 3da638582f4d0..76e4066d338af 100644
--- a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
+++ b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
@@ -116,7 +116,7 @@ def main(model: nn.Module, train_dataloader: DataLoader, val_dataloader: DataLoa
 #############################################################################################
 #                                 Convert to LightningLite                                  #
 #                                                                                           #
-# By converting to `LightningLite`, you get the full power of Lightning accelerators       #
+# By converting to `LightningLite`, you get the full power of Lightning accelerators        #
 # while conversing your original code !                                                     #
 # To get started, you would need to `from pytorch_lightning.lite import LightningLite`      #
 # and override its `run` method.                                                            #

From 9461d1b10842964e9f4b5d1dac7ce2bbf4ac346e Mon Sep 17 00:00:00 2001
From: tchaton <thomas@grid.ai>
Date: Mon, 18 Oct 2021 15:47:50 +0100
Subject: [PATCH 08/14] update

---
 .../pytorch_2_lite_2_lightning.py             | 70 +++++++++----------
 1 file changed, 33 insertions(+), 37 deletions(-)

diff --git a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
index 055669206891e..ce96e2e0175a8 100644
--- a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
+++ b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
@@ -127,13 +127,12 @@ class LiteTrainer(LightningLite):
     def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: DataLoader, num_epochs: int = 10):
         optimizer = configure_optimizers(model)
 
-        ##################################################################
-        # You would need to call `self.setup` to wrap `model`            #
-        # and `optimizer`. If you have multiple models (c.f GAN),        #
-        # call `setup` for each one of them and their associated         #
-        # optimizers                                                     #
-        model, optimizer = self.setup(model=model, optimizers=optimizer)  #
-        ##################################################################
+        ###################################################################################
+        # You would need to call `self.setup` to wrap `model` and `optimizer`. If you     #
+        # have multiple models (c.f GAN), call `setup` for each one of them and their     #
+        # associated optimizers.                                                          #
+        model, optimizer = self.setup(model=model, optimizers=optimizer)                  #
+        ###################################################################################
 
         for epoch in range(num_epochs):
             train_losses = []
@@ -144,11 +143,11 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: Da
                 optimizer.zero_grad()
                 loss = model(batch)
                 train_losses.append(loss)
-                ##################################################################
-                # By calling `self.backward` directly, `LightningLite` will      #
-                # automate precision and distributions.                          #
-                self.backward(loss)  #
-                ##################################################################
+                ###########################################################################
+                # By calling `self.backward` directly, `LightningLite` will automate      #
+                # precision and distributions.                                            #
+                self.backward(loss)                                                       #
+                ###########################################################################
                 optimizer.step()
 
             model.eval()
@@ -159,11 +158,11 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: Da
             train_epoch_loss = torch.stack(train_losses).mean()
             val_epoch_loss = torch.stack(val_losses).mean()
 
-            #######################################################################################
-            # Optional: Utility to print only one rank 0 (when using distributed setting )        #
-            self.print(f"{epoch}/{num_epochs}| Train Epoch Loss: {torch.mean(train_epoch_loss)}")  #
-            self.print(f"{epoch}/{num_epochs}| Valid Epoch Loss: {torch.mean(val_epoch_loss)}")  #
-            #######################################################################################
+            ################################################################################
+            # Optional: Utility to print only one rank 0 (when using distributed setting)  #
+            self.print(f"{epoch}/{num_epochs}| Train Epoch Loss: {train_epoch_loss}")      #
+            self.print(f"{epoch}/{num_epochs}| Valid Epoch Loss: {val_epoch_loss}")        #
+            ################################################################################
 
 
 seed_everything(42)
@@ -199,26 +198,23 @@ def forward(self, x):
         x = self.layer(x)
         return torch.nn.functional.mse_loss(x, torch.ones_like(x))
 
-    #############################################################################################
-    #                                 LightningModule hooks                                     #
-    #                                                                                           #
-    def training_step(self, batch, batch_idx):  #
-        x = self.forward(batch)  #
-        self.log("train_loss", x)  #
-        return x  #
-        #
-
-    def validation_step(self, batch, batch_idx):  #
-        x = self.forward(batch)  #
-        self.log("val_loss", x)  #
-        return x  #
-        #
-
-    def configure_optimizers(self):  #
-        return configure_optimizers(self)  #
-
-    #                                                                                           #
-    #############################################################################################
+    #########################################################################################
+    #                                 LightningModule hooks                                 #
+    #                                                                                       #
+    def training_step(self, batch, batch_idx):                                              #
+        x = self.forward(batch)                                                             #
+        self.log("train_loss", x)                                                           #
+        return x                                                                            #
+                                                                                            #
+    def validation_step(self, batch, batch_idx):                                            #
+        x = self.forward(batch)                                                             #
+        self.log("val_loss", x)                                                             #
+        return x                                                                            #
+                                                                                            #
+    def configure_optimizers(self):                                                         #
+        return configure_optimizers(self)                                                   #
+    #                                                                                       #
+    #########################################################################################
 
 
 class BoringDataModule(LightningDataModule):

From 6d88c4cc2b5e616df3a5199b574f0df3a5596a3b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 18 Oct 2021 14:49:18 +0000
Subject: [PATCH 09/14] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../pytorch_2_lite_2_lightning.py             | 35 ++++++++++---------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
index ce96e2e0175a8..cdc26d752602f 100644
--- a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
+++ b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
@@ -131,7 +131,7 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: Da
         # You would need to call `self.setup` to wrap `model` and `optimizer`. If you     #
         # have multiple models (c.f GAN), call `setup` for each one of them and their     #
         # associated optimizers.                                                          #
-        model, optimizer = self.setup(model=model, optimizers=optimizer)                  #
+        model, optimizer = self.setup(model=model, optimizers=optimizer)  #
         ###################################################################################
 
         for epoch in range(num_epochs):
@@ -146,7 +146,7 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: Da
                 ###########################################################################
                 # By calling `self.backward` directly, `LightningLite` will automate      #
                 # precision and distributions.                                            #
-                self.backward(loss)                                                       #
+                self.backward(loss)  #
                 ###########################################################################
                 optimizer.step()
 
@@ -160,8 +160,8 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: Da
 
             ################################################################################
             # Optional: Utility to print only one rank 0 (when using distributed setting)  #
-            self.print(f"{epoch}/{num_epochs}| Train Epoch Loss: {train_epoch_loss}")      #
-            self.print(f"{epoch}/{num_epochs}| Valid Epoch Loss: {val_epoch_loss}")        #
+            self.print(f"{epoch}/{num_epochs}| Train Epoch Loss: {train_epoch_loss}")  #
+            self.print(f"{epoch}/{num_epochs}| Valid Epoch Loss: {val_epoch_loss}")  #
             ################################################################################
 
 
@@ -201,18 +201,21 @@ def forward(self, x):
     #########################################################################################
     #                                 LightningModule hooks                                 #
     #                                                                                       #
-    def training_step(self, batch, batch_idx):                                              #
-        x = self.forward(batch)                                                             #
-        self.log("train_loss", x)                                                           #
-        return x                                                                            #
-                                                                                            #
-    def validation_step(self, batch, batch_idx):                                            #
-        x = self.forward(batch)                                                             #
-        self.log("val_loss", x)                                                             #
-        return x                                                                            #
-                                                                                            #
-    def configure_optimizers(self):                                                         #
-        return configure_optimizers(self)                                                   #
+    def training_step(self, batch, batch_idx):  #
+        x = self.forward(batch)  #
+        self.log("train_loss", x)  #
+        return x  #
+        #
+
+    def validation_step(self, batch, batch_idx):  #
+        x = self.forward(batch)  #
+        self.log("val_loss", x)  #
+        return x  #
+        #
+
+    def configure_optimizers(self):  #
+        return configure_optimizers(self)  #
+
     #                                                                                       #
     #########################################################################################
 

From cbd4bafdc0d53df8ec592e57a1efd40eb658375b Mon Sep 17 00:00:00 2001
From: tchaton <thomas@grid.ai>
Date: Mon, 18 Oct 2021 16:39:40 +0100
Subject: [PATCH 10/14] update

---
 .../pytorch_2_lite_2_lightning.py             | 35 ++++++++++---------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
index ce96e2e0175a8..058aeca0054c8 100644
--- a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
+++ b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
@@ -131,7 +131,7 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: Da
         # You would need to call `self.setup` to wrap `model` and `optimizer`. If you     #
         # have multiple models (c.f GAN), call `setup` for each one of them and their     #
         # associated optimizers.                                                          #
-        model, optimizer = self.setup(model=model, optimizers=optimizer)                  #
+        model, optimizer = self.setup(model=model, optimizers=optimizer)  #
         ###################################################################################
 
         for epoch in range(num_epochs):
@@ -146,7 +146,7 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: Da
                 ###########################################################################
                 # By calling `self.backward` directly, `LightningLite` will automate      #
                 # precision and distributions.                                            #
-                self.backward(loss)                                                       #
+                self.backward(loss)
                 ###########################################################################
                 optimizer.step()
 
@@ -159,9 +159,9 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: Da
             val_epoch_loss = torch.stack(val_losses).mean()
 
             ################################################################################
-            # Optional: Utility to print only one rank 0 (when using distributed setting)  #
-            self.print(f"{epoch}/{num_epochs}| Train Epoch Loss: {train_epoch_loss}")      #
-            self.print(f"{epoch}/{num_epochs}| Valid Epoch Loss: {val_epoch_loss}")        #
+            # Optional: Utility to print only on rank 0 (when using distributed setting)   #
+            self.print(f"{epoch}/{num_epochs}| Train Epoch Loss: {train_epoch_loss}")
+            self.print(f"{epoch}/{num_epochs}| Valid Epoch Loss: {val_epoch_loss}")
             ################################################################################
 
 
@@ -201,18 +201,19 @@ def forward(self, x):
     #########################################################################################
     #                                 LightningModule hooks                                 #
     #                                                                                       #
-    def training_step(self, batch, batch_idx):                                              #
-        x = self.forward(batch)                                                             #
-        self.log("train_loss", x)                                                           #
-        return x                                                                            #
-                                                                                            #
-    def validation_step(self, batch, batch_idx):                                            #
-        x = self.forward(batch)                                                             #
-        self.log("val_loss", x)                                                             #
-        return x                                                                            #
-                                                                                            #
-    def configure_optimizers(self):                                                         #
-        return configure_optimizers(self)                                                   #
+    def training_step(self, batch, batch_idx):
+        x = self.forward(batch)
+        self.log("train_loss", x)
+        return x
+
+    def validation_step(self, batch, batch_idx):
+        x = self.forward(batch)
+        self.log("val_loss", x)
+        return x
+
+    def configure_optimizers(self):
+        return configure_optimizers(self)
+
     #                                                                                       #
     #########################################################################################
 

From 26de6927d8fd7acd896dd0ca806bb4eb0359f398 Mon Sep 17 00:00:00 2001
From: tchaton <thomas@grid.ai>
Date: Mon, 18 Oct 2021 16:39:46 +0100
Subject: [PATCH 11/14] update

---
 pl_examples/lite_examples/pytorch_2_lite_2_lightning.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
index 058aeca0054c8..136d9d69e865b 100644
--- a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
+++ b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
@@ -198,9 +198,7 @@ def forward(self, x):
         x = self.layer(x)
         return torch.nn.functional.mse_loss(x, torch.ones_like(x))
 
-    #########################################################################################
-    #                                 LightningModule hooks                                 #
-    #                                                                                       #
+    # LightningModule hooks
     def training_step(self, batch, batch_idx):
         x = self.forward(batch)
         self.log("train_loss", x)
@@ -214,9 +212,6 @@ def validation_step(self, batch, batch_idx):
     def configure_optimizers(self):
         return configure_optimizers(self)
 
-    #                                                                                       #
-    #########################################################################################
-
 
 class BoringDataModule(LightningDataModule):
     def train_dataloader(self):

From e821d95640a1f86507e4a4abe1767ccab78bbcbd Mon Sep 17 00:00:00 2001
From: tchaton <thomas@grid.ai>
Date: Mon, 18 Oct 2021 16:43:13 +0100
Subject: [PATCH 12/14] update

---
 pl_examples/lite_examples/pytorch_2_lite_2_lightning.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
index 136d9d69e865b..0106a019aa98f 100644
--- a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
+++ b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
@@ -181,9 +181,12 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: Da
 #############################################################################################
 #                                 Convert to Lightning                                      #
 #                                                                                           #
-# By converting to Lightning, non-only your research code becomes inter-operable            #
+# By converting to Lightning, not-only your research code becomes inter-operable            #
 # (can easily be shared), but you get access to hundreds of extra features to make your     #
 # research faster.                                                                          #
+# Check Facebook blogpost about `Lightning` enabled them to scale their research in         #
+# production: https://ai.facebook.com/blog                                                  #
+# /reengineering-facebook-ais-deep-learning-platforms-for-interoperability/                 #
 #############################################################################################
 
 from pytorch_lightning import LightningDataModule, LightningModule, Trainer  # noqa E402

From 20d7ab66d082958d267d76e675a4227fd66f124b Mon Sep 17 00:00:00 2001
From: tchaton <thomas@grid.ai>
Date: Mon, 18 Oct 2021 16:44:09 +0100
Subject: [PATCH 13/14] update

---
 pl_examples/lite_examples/pytorch_2_lite_2_lightning.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
index 0106a019aa98f..147451f36d23d 100644
--- a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
+++ b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
@@ -184,8 +184,8 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: Da
 # By converting to Lightning, not-only your research code becomes inter-operable            #
 # (can easily be shared), but you get access to hundreds of extra features to make your     #
 # research faster.                                                                          #
-# Check Facebook blogpost about `Lightning` enabled them to scale their research in         #
-# production: https://ai.facebook.com/blog                                                  #
+# Check `Facebook` blogpost on how `Lightning` enabled their research to scale at scale     #
+# On https://ai.facebook.com/blog                                                           #
 # /reengineering-facebook-ais-deep-learning-platforms-for-interoperability/                 #
 #############################################################################################
 

From d8a22c43feda0c6e88807b68d90c48a457b09a7b Mon Sep 17 00:00:00 2001
From: tchaton <thomas@grid.ai>
Date: Tue, 19 Oct 2021 09:53:16 +0100
Subject: [PATCH 14/14] update

---
 pl_examples/lite_examples/pytorch_2_lite_2_lightning.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
index 147451f36d23d..511031697dc8a 100644
--- a/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
+++ b/pl_examples/lite_examples/pytorch_2_lite_2_lightning.py
@@ -131,7 +131,14 @@ def run(self, model: nn.Module, train_dataloader: DataLoader, val_dataloader: Da
         # You would need to call `self.setup` to wrap `model` and `optimizer`. If you     #
         # have multiple models (c.f GAN), call `setup` for each one of them and their     #
         # associated optimizers.                                                          #
-        model, optimizer = self.setup(model=model, optimizers=optimizer)  #
+        model, optimizer = self.setup(model=model, optimizers=optimizer)
+        ###################################################################################
+
+        ###################################################################################
+        # You would need to call `self.setup_dataloaders` to prepare the dataloaders      #
+        # in case you are running in a distributed setting.                               #
+        train_dataloader = self.setup_dataloaders(train_dataloader)
+        val_dataloader = self.setup_dataloaders(val_dataloader)
         ###################################################################################
 
         for epoch in range(num_epochs):