From 6d75c086a582af3c5738ac5e811ac9acabc6a71d Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Thu, 18 Feb 2021 16:24:11 +0000
Subject: [PATCH 1/6] Add warnings to hooks

---
 docs/source/extensions/datamodules.rst | 6 ++++++
 pytorch_lightning/core/hooks.py        | 4 ++++
 2 files changed, 10 insertions(+)

diff --git a/docs/source/extensions/datamodules.rst b/docs/source/extensions/datamodules.rst
index 8a6a85eb4bb70..8b5f408830a40 100644
--- a/docs/source/extensions/datamodules.rst
+++ b/docs/source/extensions/datamodules.rst
@@ -319,6 +319,9 @@ Override to alter or apply augmentations to your batch before it is transferred
             return batch
 
 
+.. warning::
+    The hook signature will change once the dataloader_idx is supported as an argument.
+
 .. note:: This hook only runs on single GPU training and DDP (no data-parallel).
 
 
@@ -334,6 +337,9 @@ Override to alter or apply augmentations to your batch after it is transferred t
             return batch
 
 
+.. warning::
+    The hook signature will change once the dataloader_idx is supported as an argument.
+
 .. note::
     This hook only runs on single GPU training and DDP (no data-parallel). This hook
     will also be called when using CPU device, so adding augmentations here or in
diff --git a/pytorch_lightning/core/hooks.py b/pytorch_lightning/core/hooks.py
index 05fc9e9ec3cee..962e9e3ae3188 100644
--- a/pytorch_lightning/core/hooks.py
+++ b/pytorch_lightning/core/hooks.py
@@ -620,6 +620,8 @@ def on_before_batch_transfer(self, batch):
         """
         Override to alter or apply batch augmentations to your batch before it is transferred to the device.
 
+        .. warning:: The hook signature will change once the dataloader_idx is supported as an argument.
+
         Note:
             This hook only runs on single GPU training and DDP (no data-parallel).
 
@@ -645,6 +647,8 @@ def on_after_batch_transfer(self, batch):
         """
         Override to alter or apply batch augmentations to your batch after it is transferred to the device.
 
+        .. warning:: The hook signature will change once the dataloader_idx is supported as an argument.
+
         Note:
             This hook only runs on single GPU training and DDP (no data-parallel).
 

From ac961d77e37ec595299685bdfa3af95e21eb21ab Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Thu, 18 Feb 2021 16:54:14 +0000
Subject: [PATCH 2/6] Add default idx to prevent signature change in the future

---
 docs/source/extensions/datamodules.rst             |  8 ++++----
 pytorch_lightning/core/hooks.py                    | 14 ++++++++------
 pytorch_lightning/core/lightning.py                |  6 +++---
 .../plugins/training_type/deepspeed.py             |  8 +++++++-
 tests/core/test_datamodules.py                     |  4 ++--
 tests/models/test_hooks.py                         |  4 ++--
 6 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/docs/source/extensions/datamodules.rst b/docs/source/extensions/datamodules.rst
index 8b5f408830a40..615158d967f52 100644
--- a/docs/source/extensions/datamodules.rst
+++ b/docs/source/extensions/datamodules.rst
@@ -314,13 +314,13 @@ Override to alter or apply augmentations to your batch before it is transferred
 .. testcode::
 
     class MNISTDataModule(LightningDataModule):
-        def on_before_batch_transfer(self, batch):
+        def on_before_batch_transfer(self, batch, dataloader_idx):
             batch['x'] = transforms(batch['x'])
             return batch
 
 
 .. warning::
-    The hook signature will change once the dataloader_idx is supported as an argument.
+    Currently dataloader_idx always returns 0 and will be updated to support the true idx in the future.
 
 .. note:: This hook only runs on single GPU training and DDP (no data-parallel).
 
@@ -332,13 +332,13 @@ Override to alter or apply augmentations to your batch after it is transferred t
 .. testcode::
 
     class MNISTDataModule(LightningDataModule):
-        def on_after_batch_transfer(self, batch):
+        def on_after_batch_transfer(self, batch, dataloader_idx):
             batch['x'] = gpu_transforms(batch['x'])
             return batch
 
 
 .. warning::
-    The hook signature will change once the dataloader_idx is supported as an argument.
+    Currently dataloader_idx always returns 0 and will be updated to support the true idx in the future.
 
 .. note::
     This hook only runs on single GPU training and DDP (no data-parallel). This hook
diff --git a/pytorch_lightning/core/hooks.py b/pytorch_lightning/core/hooks.py
index 962e9e3ae3188..2554c839176ff 100644
--- a/pytorch_lightning/core/hooks.py
+++ b/pytorch_lightning/core/hooks.py
@@ -616,24 +616,25 @@ def transfer_batch_to_device(self, batch, device):
         device = device or self.device
         return move_data_to_device(batch, device)
 
-    def on_before_batch_transfer(self, batch):
+    def on_before_batch_transfer(self, batch, dataloader_idx):
         """
         Override to alter or apply batch augmentations to your batch before it is transferred to the device.
 
-        .. warning:: The hook signature will change once the dataloader_idx is supported as an argument.
+        .. warning:: dataloader_idx always returns 0, and will be updated to support the true idx in the future.
 
         Note:
             This hook only runs on single GPU training and DDP (no data-parallel).
 
         Args:
             batch: A batch of data that needs to be altered or augmented.
+            dataloader_idx: DataLoader idx for batch (Default: 0)
 
         Returns:
             A batch of data
 
         Example::
 
-            def on_before_batch_transfer(self, batch):
+            def on_before_batch_transfer(self, batch, dataloader_idx):
                 batch['x'] = transforms(batch['x'])
                 return batch
 
@@ -643,24 +644,25 @@ def on_before_batch_transfer(self, batch):
         """
         return batch
 
-    def on_after_batch_transfer(self, batch):
+    def on_after_batch_transfer(self, batch, dataloader_idx):
         """
         Override to alter or apply batch augmentations to your batch after it is transferred to the device.
 
-        .. warning:: The hook signature will change once the dataloader_idx is supported as an argument.
+        .. warning:: dataloader_idx always returns 0, and will be updated to support the true idx in the future.
 
         Note:
             This hook only runs on single GPU training and DDP (no data-parallel).
 
         Args:
             batch: A batch of data that needs to be altered or augmented.
+            dataloader_idx: DataLoader idx for batch (Default: 0)
 
         Returns:
             A batch of data
 
         Example::
 
-            def on_after_batch_transfer(self, batch):
+            def on_after_batch_transfer(self, batch, dataloader_idx):
                 batch['x'] = gpu_transforms(batch['x'])
                 return batch
 
diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 91d2b3565d193..3f6dd3fb0d110 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -179,10 +179,10 @@ def logger(self):
         """ Reference to the logger object in the Trainer. """
         return self.trainer.logger if self.trainer else None
 
-    def _apply_batch_transfer_handler(self, batch: Any, device: Optional[torch.device] = None):
-        batch = self.on_before_batch_transfer(batch)
+    def _apply_batch_transfer_handler(self, batch: Any, device: Optional[torch.device] = None, dataloader_idx: int = 0):
+        batch = self.on_before_batch_transfer(batch, dataloader_idx)
         batch = self.transfer_batch_to_device(batch, device)
-        batch = self.on_after_batch_transfer(batch)
+        batch = self.on_after_batch_transfer(batch, dataloader_idx)
         return batch
 
     def print(self, *args, **kwargs) -> None:
diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py
index 69fdb4c19a4b6..c352d589759cb 100644
--- a/pytorch_lightning/plugins/training_type/deepspeed.py
+++ b/pytorch_lightning/plugins/training_type/deepspeed.py
@@ -305,7 +305,13 @@ def _format_precision_config(self):
         precision = self.lightning_module.trainer.accelerator_connector.precision
         if precision == 16:
             if "amp" not in self.config and amp_type == AMPType.NATIVE:
-                self.config["fp16"] = {"enabled": True}
+                self.config["fp16"] = {
+                    "enabled": True,
+                    "loss_scale": 0,
+                    "loss_scale_window": 1000,
+                    "hysteresis": 2,
+                    "min_loss_scale": 1
+                }
             elif "apex" not in self.config and amp_type == AMPType.APEX:
                 self.config["amp"] = {
                     "enabled": True,
diff --git a/tests/core/test_datamodules.py b/tests/core/test_datamodules.py
index e2f3b559073d2..4e2e5d34c1963 100644
--- a/tests/core/test_datamodules.py
+++ b/tests/core/test_datamodules.py
@@ -438,13 +438,13 @@ class CurrentTestDM(LightningDataModule):
         on_before_batch_transfer_hook_rank = None
         on_after_batch_transfer_hook_rank = None
 
-        def on_before_batch_transfer(self, batch):
+        def on_before_batch_transfer(self, batch, dataloader_idx):
             self.on_before_batch_transfer_hook_rank = self.rank
             self.rank += 1
             batch.samples += 1
             return batch
 
-        def on_after_batch_transfer(self, batch):
+        def on_after_batch_transfer(self, batch, dataloader_idx):
             assert batch.samples.device == batch.targets.device == expected_device
             self.on_after_batch_transfer_hook_rank = self.rank
             self.rank += 1
diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index 7323996bcab3e..e92c01939e489 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -160,13 +160,13 @@ class CurrentTestModel(BoringModel):
         on_before_batch_transfer_hook_rank = None
         on_after_batch_transfer_hook_rank = None
 
-        def on_before_batch_transfer(self, batch):
+        def on_before_batch_transfer(self, batch, dataloader_idx):
             self.on_before_batch_transfer_hook_rank = self.rank
             self.rank += 1
             batch.samples += 1
             return batch
 
-        def on_after_batch_transfer(self, batch):
+        def on_after_batch_transfer(self, batch, dataloader_idx):
             assert batch.samples.device == batch.targets.device == expected_device
             self.on_after_batch_transfer_hook_rank = self.rank
             self.rank += 1

From 1f324a465673f67d13cd69414159cb90342d9af1 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Thu, 18 Feb 2021 16:58:09 +0000
Subject: [PATCH 3/6] Nothing to see here

---
 pytorch_lightning/plugins/training_type/deepspeed.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/pytorch_lightning/plugins/training_type/deepspeed.py b/pytorch_lightning/plugins/training_type/deepspeed.py
index c352d589759cb..69fdb4c19a4b6 100644
--- a/pytorch_lightning/plugins/training_type/deepspeed.py
+++ b/pytorch_lightning/plugins/training_type/deepspeed.py
@@ -305,13 +305,7 @@ def _format_precision_config(self):
         precision = self.lightning_module.trainer.accelerator_connector.precision
         if precision == 16:
             if "amp" not in self.config and amp_type == AMPType.NATIVE:
-                self.config["fp16"] = {
-                    "enabled": True,
-                    "loss_scale": 0,
-                    "loss_scale_window": 1000,
-                    "hysteresis": 2,
-                    "min_loss_scale": 1
-                }
+                self.config["fp16"] = {"enabled": True}
             elif "apex" not in self.config and amp_type == AMPType.APEX:
                 self.config["amp"] = {
                     "enabled": True,

From 5c6a68f269814765db4c74931c2f2067ca7a3c9e Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Thu, 18 Feb 2021 17:26:53 +0000
Subject: [PATCH 4/6] Add default val to transfer_batch_to_device hook

---
 docs/source/extensions/datamodules.rst | 5 ++++-
 pytorch_lightning/core/hooks.py        | 9 ++++++---
 pytorch_lightning/core/lightning.py    | 2 +-
 tests/core/test_datamodules.py         | 2 +-
 tests/models/test_hooks.py             | 2 +-
 5 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/docs/source/extensions/datamodules.rst b/docs/source/extensions/datamodules.rst
index 615158d967f52..a1eb99091b447 100644
--- a/docs/source/extensions/datamodules.rst
+++ b/docs/source/extensions/datamodules.rst
@@ -297,13 +297,16 @@ Override to define how you want to move an arbitrary batch to a device.
 .. testcode::
 
     class MNISTDataModule(LightningDataModule):
-        def transfer_batch_to_device(self, batch, device):
+        def transfer_batch_to_device(self, batch, device, dataloader_idx):
             x = batch['x']
             x = CustomDataWrapper(x)
             batch['x'] = x.to(device)
             return batch
 
 
+.. warning::
+    Currently dataloader_idx always returns 0 and will be updated to support the true idx in the future.
+
 .. note:: This hook only runs on single GPU training and DDP (no data-parallel).
 
 
diff --git a/pytorch_lightning/core/hooks.py b/pytorch_lightning/core/hooks.py
index 2554c839176ff..923ec4849d381 100644
--- a/pytorch_lightning/core/hooks.py
+++ b/pytorch_lightning/core/hooks.py
@@ -565,11 +565,13 @@ def predict_dataloader(self) -> Union[DataLoader, List[DataLoader]]:
             will have an argument ``dataloader_idx`` which matches the order here.
         """
 
-    def transfer_batch_to_device(self, batch: Any, device: Optional[torch.device] = None) -> Any:
+    def transfer_batch_to_device(self, batch: Any, device: Optional[torch.device] = None, dataloader_idx=0) -> Any:
         """
         Override this hook if your :class:`~torch.utils.data.DataLoader` returns tensors
         wrapped in a custom data structure.
 
+        .. warning:: dataloader_idx always returns 0, and will be updated to support the true idx in the future.
+
         The data types listed below (and any arbitrary nesting of them) are supported out of the box:
 
         - :class:`torch.Tensor` or anything that implements `.to(...)`
@@ -594,19 +596,20 @@ def transfer_batch_to_device(self, batch: Any, device: Optional[torch.device] =
         Args:
             batch: A batch of data that needs to be transferred to a new device.
             device: The target device as defined in PyTorch.
+            dataloader_idx: DataLoader idx for batch (Default: 0)
 
         Returns:
             A reference to the data on the new device.
 
         Example::
 
-            def transfer_batch_to_device(self, batch, device):
+            def transfer_batch_to_device(self, batch, device, dataloader_idx):
                 if isinstance(batch, CustomBatch):
                     # move all tensors in your custom data structure to the device
                     batch.samples = batch.samples.to(device)
                     batch.targets = batch.targets.to(device)
                 else:
-                    batch = super().transfer_batch_to_device(data, device)
+                    batch = super().transfer_batch_to_device(data, device, dataloader_idx)
                 return batch
 
         See Also:
diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 3f6dd3fb0d110..472125e90fede 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -181,7 +181,7 @@ def logger(self):
 
     def _apply_batch_transfer_handler(self, batch: Any, device: Optional[torch.device] = None, dataloader_idx: int = 0):
         batch = self.on_before_batch_transfer(batch, dataloader_idx)
-        batch = self.transfer_batch_to_device(batch, device)
+        batch = self.transfer_batch_to_device(batch, device, dataloader_idx)
         batch = self.on_after_batch_transfer(batch, dataloader_idx)
         return batch
 
diff --git a/tests/core/test_datamodules.py b/tests/core/test_datamodules.py
index 4e2e5d34c1963..dc961c8ab534f 100644
--- a/tests/core/test_datamodules.py
+++ b/tests/core/test_datamodules.py
@@ -451,7 +451,7 @@ def on_after_batch_transfer(self, batch, dataloader_idx):
             batch.targets *= 2
             return batch
 
-        def transfer_batch_to_device(self, batch, device):
+        def transfer_batch_to_device(self, batch, device, dataloader_idx):
             self.transfer_batch_to_device_hook_rank = self.rank
             self.rank += 1
             batch.samples = batch.samples.to(device)
diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index e92c01939e489..dc7bd947825d7 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -173,7 +173,7 @@ def on_after_batch_transfer(self, batch, dataloader_idx):
             batch.targets *= 2
             return batch
 
-        def transfer_batch_to_device(self, batch, device):
+        def transfer_batch_to_device(self, batch, device, dataloader_idx):
             self.transfer_batch_to_device_hook_rank = self.rank
             self.rank += 1
             batch.samples = batch.samples.to(device)

From 83330b95ba0f36512c84fe30253c0d1b1e15f558 Mon Sep 17 00:00:00 2001
From: Sean Naren <sean.narenthiran@gmail.com>
Date: Thu, 18 Feb 2021 17:37:20 +0000
Subject: [PATCH 5/6] Apply suggestions from code review

Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
---
 docs/source/extensions/datamodules.rst | 6 ++++--
 pytorch_lightning/core/hooks.py        | 8 ++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/docs/source/extensions/datamodules.rst b/docs/source/extensions/datamodules.rst
index a1eb99091b447..6203e82e86bd5 100644
--- a/docs/source/extensions/datamodules.rst
+++ b/docs/source/extensions/datamodules.rst
@@ -305,7 +305,8 @@ Override to define how you want to move an arbitrary batch to a device.
 
 
 .. warning::
-    Currently dataloader_idx always returns 0 and will be updated to support the true idx in the future.
+
+    Currently ``dataloader_idx`` always returns 0 and will be updated to support the true ``idx`` in the future.
 
 .. note:: This hook only runs on single GPU training and DDP (no data-parallel).
 
@@ -341,7 +342,8 @@ Override to alter or apply augmentations to your batch after it is transferred t
 
 
 .. warning::
-    Currently dataloader_idx always returns 0 and will be updated to support the true idx in the future.
+
+    Currently ``dataloader_idx`` always returns 0 and will be updated to support the true ``idx`` in the future.
 
 .. note::
     This hook only runs on single GPU training and DDP (no data-parallel). This hook
diff --git a/pytorch_lightning/core/hooks.py b/pytorch_lightning/core/hooks.py
index 923ec4849d381..2d3182700750a 100644
--- a/pytorch_lightning/core/hooks.py
+++ b/pytorch_lightning/core/hooks.py
@@ -570,7 +570,7 @@ def transfer_batch_to_device(self, batch: Any, device: Optional[torch.device] =
         Override this hook if your :class:`~torch.utils.data.DataLoader` returns tensors
         wrapped in a custom data structure.
 
-        .. warning:: dataloader_idx always returns 0, and will be updated to support the true idx in the future.
+        .. warning:: ``dataloader_idx`` always returns 0, and will be updated to support the true ``idx`` in the future.
 
         The data types listed below (and any arbitrary nesting of them) are supported out of the box:
 
@@ -596,7 +596,7 @@ def transfer_batch_to_device(self, batch: Any, device: Optional[torch.device] =
         Args:
             batch: A batch of data that needs to be transferred to a new device.
             device: The target device as defined in PyTorch.
-            dataloader_idx: DataLoader idx for batch (Default: 0)
+            dataloader_idx: DataLoader idx for batch
 
         Returns:
             A reference to the data on the new device.
@@ -630,7 +630,7 @@ def on_before_batch_transfer(self, batch, dataloader_idx):
 
         Args:
             batch: A batch of data that needs to be altered or augmented.
-            dataloader_idx: DataLoader idx for batch (Default: 0)
+            dataloader_idx: DataLoader idx for batch
 
         Returns:
             A batch of data
@@ -651,7 +651,7 @@ def on_after_batch_transfer(self, batch, dataloader_idx):
         """
         Override to alter or apply batch augmentations to your batch after it is transferred to the device.
 
-        .. warning:: dataloader_idx always returns 0, and will be updated to support the true idx in the future.
+        .. warning:: ``dataloader_idx`` always returns 0, and will be updated to support the true ``idx`` in the future.
 
         Note:
             This hook only runs on single GPU training and DDP (no data-parallel).

From 6cf848350b4d5fa1367fec970b88544752f0c25c Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Thu, 18 Feb 2021 19:22:56 +0000
Subject: [PATCH 6/6] Revert "Add default val to transfer_batch_to_device hook"

This reverts commit 5c6a68f2
---
 docs/source/extensions/datamodules.rst | 6 +-----
 pytorch_lightning/core/hooks.py        | 9 +++------
 pytorch_lightning/core/lightning.py    | 2 +-
 tests/core/test_datamodules.py         | 2 +-
 tests/models/test_hooks.py             | 2 +-
 5 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/docs/source/extensions/datamodules.rst b/docs/source/extensions/datamodules.rst
index 6203e82e86bd5..a6c083dc61fcf 100644
--- a/docs/source/extensions/datamodules.rst
+++ b/docs/source/extensions/datamodules.rst
@@ -297,17 +297,13 @@ Override to define how you want to move an arbitrary batch to a device.
 .. testcode::
 
     class MNISTDataModule(LightningDataModule):
-        def transfer_batch_to_device(self, batch, device, dataloader_idx):
+        def transfer_batch_to_device(self, batch, device):
             x = batch['x']
             x = CustomDataWrapper(x)
             batch['x'] = x.to(device)
             return batch
 
 
-.. warning::
-
-    Currently ``dataloader_idx`` always returns 0 and will be updated to support the true ``idx`` in the future.
-
 .. note:: This hook only runs on single GPU training and DDP (no data-parallel).
 
 
diff --git a/pytorch_lightning/core/hooks.py b/pytorch_lightning/core/hooks.py
index 2d3182700750a..e0b33c1219e8b 100644
--- a/pytorch_lightning/core/hooks.py
+++ b/pytorch_lightning/core/hooks.py
@@ -565,13 +565,11 @@ def predict_dataloader(self) -> Union[DataLoader, List[DataLoader]]:
             will have an argument ``dataloader_idx`` which matches the order here.
         """
 
-    def transfer_batch_to_device(self, batch: Any, device: Optional[torch.device] = None, dataloader_idx=0) -> Any:
+    def transfer_batch_to_device(self, batch: Any, device: Optional[torch.device] = None) -> Any:
         """
         Override this hook if your :class:`~torch.utils.data.DataLoader` returns tensors
         wrapped in a custom data structure.
 
-        .. warning:: ``dataloader_idx`` always returns 0, and will be updated to support the true ``idx`` in the future.
-
         The data types listed below (and any arbitrary nesting of them) are supported out of the box:
 
         - :class:`torch.Tensor` or anything that implements `.to(...)`
@@ -596,20 +594,19 @@ def transfer_batch_to_device(self, batch: Any, device: Optional[torch.device] =
         Args:
             batch: A batch of data that needs to be transferred to a new device.
             device: The target device as defined in PyTorch.
-            dataloader_idx: DataLoader idx for batch
 
         Returns:
             A reference to the data on the new device.
 
         Example::
 
-            def transfer_batch_to_device(self, batch, device, dataloader_idx):
+            def transfer_batch_to_device(self, batch, device):
                 if isinstance(batch, CustomBatch):
                     # move all tensors in your custom data structure to the device
                     batch.samples = batch.samples.to(device)
                     batch.targets = batch.targets.to(device)
                 else:
-                    batch = super().transfer_batch_to_device(data, device, dataloader_idx)
+                    batch = super().transfer_batch_to_device(data, device)
                 return batch
 
         See Also:
diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 472125e90fede..3f6dd3fb0d110 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -181,7 +181,7 @@ def logger(self):
 
     def _apply_batch_transfer_handler(self, batch: Any, device: Optional[torch.device] = None, dataloader_idx: int = 0):
         batch = self.on_before_batch_transfer(batch, dataloader_idx)
-        batch = self.transfer_batch_to_device(batch, device, dataloader_idx)
+        batch = self.transfer_batch_to_device(batch, device)
         batch = self.on_after_batch_transfer(batch, dataloader_idx)
         return batch
 
diff --git a/tests/core/test_datamodules.py b/tests/core/test_datamodules.py
index dc961c8ab534f..4e2e5d34c1963 100644
--- a/tests/core/test_datamodules.py
+++ b/tests/core/test_datamodules.py
@@ -451,7 +451,7 @@ def on_after_batch_transfer(self, batch, dataloader_idx):
             batch.targets *= 2
             return batch
 
-        def transfer_batch_to_device(self, batch, device, dataloader_idx):
+        def transfer_batch_to_device(self, batch, device):
             self.transfer_batch_to_device_hook_rank = self.rank
             self.rank += 1
             batch.samples = batch.samples.to(device)
diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index dc7bd947825d7..e92c01939e489 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -173,7 +173,7 @@ def on_after_batch_transfer(self, batch, dataloader_idx):
             batch.targets *= 2
             return batch
 
-        def transfer_batch_to_device(self, batch, device, dataloader_idx):
+        def transfer_batch_to_device(self, batch, device):
             self.transfer_batch_to_device_hook_rank = self.rank
             self.rank += 1
             batch.samples = batch.samples.to(device)