From c6a47dd35cc4f172476ac92e1f4f63153a4befcf Mon Sep 17 00:00:00 2001 From: Harry Mellor Date: Wed, 27 Jul 2022 13:14:21 +0100 Subject: [PATCH 01/18] Move half cast to occur on host instead of in fwd --- src/pytorch_lightning/strategies/ipu.py | 28 ++++++++------------ tests/tests_pytorch/accelerators/test_ipu.py | 2 +- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index 001ad77fbb5cc..14d3ce7a3d9c5 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -43,24 +43,9 @@ class LightningIPUModule(_LightningModuleWrapperBase): def __init__( - self, pl_module: Union["pl.LightningModule", _LightningPrecisionModuleWrapperBase], precision: Union[str, int] + self, pl_module: Union["pl.LightningModule", _LightningPrecisionModuleWrapperBase] ) -> None: super().__init__(pl_module) - self.precision = precision - - def forward(self, *inputs: Any, **kwargs: Any) -> Any: - if self.precision in (PrecisionType.MIXED, PrecisionType.HALF): - inputs = self._move_float_tensors_to_half(inputs) - - return super().forward(*inputs, **kwargs) - - @staticmethod - def batch_to(data: Tensor) -> Tensor: - return data.half() - - def _move_float_tensors_to_half(self, batch: Any) -> Any: - batch = apply_to_collection(batch, (FloatTensor, torch.cuda.FloatTensor), function=self.batch_to) - return batch class IPUStrategy(ParallelStrategy): @@ -142,7 +127,7 @@ def setup(self, trainer: "pl.Trainer") -> None: self._optimizer_zero_grad_original = self.lightning_module.optimizer_zero_grad self._disable_zero_grad() - model = LightningIPUModule(self.lightning_module, self.precision_plugin.precision) + model = LightningIPUModule(self.lightning_module) self.model = model # reset the backup @@ -272,6 +257,15 @@ def to_tensor(x): args = apply_to_collection(args, dtype=(int, float), function=to_tensor) return args + @staticmethod + def batch_to(data: Tensor) -> Tensor: + return data.half() + + def batch_to_device(self, batch: Any, device: Optional[torch.device] = None, dataloader_idx: int = 0) -> Any: + if self.precision_plugin.precision in (PrecisionType.MIXED, PrecisionType.HALF): + batch = apply_to_collection(batch, (FloatTensor, torch.cuda.FloatTensor), function=self.batch_to) + return batch + def _disable_zero_grad(self) -> None: lightning_module = self.lightning_module if is_overridden("optimizer_zero_grad", lightning_module): diff --git a/tests/tests_pytorch/accelerators/test_ipu.py b/tests/tests_pytorch/accelerators/test_ipu.py index 589ec7b29dd5b..a60a8aa330a37 100644 --- a/tests/tests_pytorch/accelerators/test_ipu.py +++ b/tests/tests_pytorch/accelerators/test_ipu.py @@ -205,7 +205,7 @@ def setup(self, trainer: Trainer, pl_module: LightningModule, stage: Optional[st def test_pure_half_precision(tmpdir): class TestCallback(Callback): def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None: - assert trainer.strategy.model.precision == 16 + assert trainer.strategy.precision_plugin.precision == 16 for param in trainer.strategy.model.parameters(): assert param.dtype == torch.float16 raise SystemExit From 435d03fa1ce1b7fd8b122a31f901296c621d112f Mon Sep 17 00:00:00 2001 From: Harry Mellor Date: Wed, 27 Jul 2022 14:28:04 +0100 Subject: [PATCH 02/18] Update changelog --- src/pytorch_lightning/CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md index f8341248b20e8..f7a0acd14e182 100644 --- a/src/pytorch_lightning/CHANGELOG.md +++ b/src/pytorch_lightning/CHANGELOG.md @@ -113,6 +113,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Changed +- When training with `precision=16` on IPU, the cast has been moved off the IPU onto the host, making the copies from host to IPU cheaper ([#13880](https://github.com/Lightning-AI/lightning/pull/13880)) + - `accelerator="gpu"` now automatically selects an available GPU backend (CUDA and MPS currently) ([#13642](https://github.com/Lightning-AI/lightning/pull/13642)) From 8379f0143bf6d5673aae8c83a16ffa94d1b02304 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 27 Jul 2022 13:30:36 +0000 Subject: [PATCH 03/18] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/pytorch_lightning/strategies/ipu.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index 14d3ce7a3d9c5..417bc28baab2d 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -42,9 +42,7 @@ class LightningIPUModule(_LightningModuleWrapperBase): - def __init__( - self, pl_module: Union["pl.LightningModule", _LightningPrecisionModuleWrapperBase] - ) -> None: + def __init__(self, pl_module: Union["pl.LightningModule", _LightningPrecisionModuleWrapperBase]) -> None: super().__init__(pl_module) From 423d63ac6f10c703ce6ba56b0a993b7a040c86c1 Mon Sep 17 00:00:00 2001 From: Harry Mellor Date: Thu, 28 Jul 2022 09:47:39 +0100 Subject: [PATCH 04/18] batch_to -> to_half, now local to batch_to_device --- src/pytorch_lightning/strategies/ipu.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index 417bc28baab2d..49a0daee79ca4 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -255,13 +255,11 @@ def to_tensor(x): args = apply_to_collection(args, dtype=(int, float), function=to_tensor) return args - @staticmethod - def batch_to(data: Tensor) -> Tensor: - return data.half() - def batch_to_device(self, batch: Any, device: Optional[torch.device] = None, dataloader_idx: int = 0) -> Any: if self.precision_plugin.precision in (PrecisionType.MIXED, PrecisionType.HALF): - batch = apply_to_collection(batch, (FloatTensor, torch.cuda.FloatTensor), function=self.batch_to) + def to_half(data: Tensor) -> Tensor: + return data.half() + batch = apply_to_collection(batch, (FloatTensor, torch.cuda.FloatTensor), function=to_half) return batch def _disable_zero_grad(self) -> None: From 28016af9a279cbf921b2467494b3d3b9d57fca30 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 28 Jul 2022 08:49:37 +0000 Subject: [PATCH 05/18] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/pytorch_lightning/strategies/ipu.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index 49a0daee79ca4..5ba8c1aa48454 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -257,8 +257,10 @@ def to_tensor(x): def batch_to_device(self, batch: Any, device: Optional[torch.device] = None, dataloader_idx: int = 0) -> Any: if self.precision_plugin.precision in (PrecisionType.MIXED, PrecisionType.HALF): + def to_half(data: Tensor) -> Tensor: return data.half() + batch = apply_to_collection(batch, (FloatTensor, torch.cuda.FloatTensor), function=to_half) return batch From 2cd738db3f4682ec21918823bf195754405f5e80 Mon Sep 17 00:00:00 2001 From: Harry Mellor Date: Thu, 28 Jul 2022 10:00:24 +0100 Subject: [PATCH 06/18] Add comments to overloaded function --- src/pytorch_lightning/strategies/ipu.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index 5ba8c1aa48454..6a32485997c10 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -256,12 +256,17 @@ def to_tensor(x): return args def batch_to_device(self, batch: Any, device: Optional[torch.device] = None, dataloader_idx: int = 0) -> Any: + # This override is necessary because the cast must occur before the data + # is moved to the device to prevent wasteful host->device copies. if self.precision_plugin.precision in (PrecisionType.MIXED, PrecisionType.HALF): def to_half(data: Tensor) -> Tensor: return data.half() batch = apply_to_collection(batch, (FloatTensor, torch.cuda.FloatTensor), function=to_half) + # We don't call `super().batch_to_device` because `data.to(device)` is not + # currently necessary for IPUs. The movement of data from host<->IPU is + # currently handled by PopTorch. return batch def _disable_zero_grad(self) -> None: From b9ca3d9656457b45c1f2f77066e9bdcf4145a2c0 Mon Sep 17 00:00:00 2001 From: Harry Mellor Date: Thu, 28 Jul 2022 10:13:11 +0100 Subject: [PATCH 07/18] Remove `LightningIPUModule` --- src/pytorch_lightning/strategies/ipu.py | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index 6a32485997c10..54daa808369be 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -41,11 +41,6 @@ poptorch = None -class LightningIPUModule(_LightningModuleWrapperBase): - def __init__(self, pl_module: Union["pl.LightningModule", _LightningPrecisionModuleWrapperBase]) -> None: - super().__init__(pl_module) - - class IPUStrategy(ParallelStrategy): """Plugin for training on IPU devices.""" @@ -125,8 +120,7 @@ def setup(self, trainer: "pl.Trainer") -> None: self._optimizer_zero_grad_original = self.lightning_module.optimizer_zero_grad self._disable_zero_grad() - model = LightningIPUModule(self.lightning_module) - self.model = model + self.model = _LightningModuleWrapperBase(self.lightning_module) # reset the backup self.poptorch_models = {} @@ -139,22 +133,22 @@ def setup(self, trainer: "pl.Trainer") -> None: training_opts = self.training_opts inference_opts = self.inference_opts optimizer = self.lightning_module.trainer.optimizers[0] - model = poptorch.trainingModel(model=model, options=training_opts, optimizer=optimizer) + model = poptorch.trainingModel(model=self.model, options=training_opts, optimizer=optimizer) self.poptorch_models[RunningStage.TRAINING] = model if self.lightning_module.trainer.enable_validation: - model = poptorch.inferenceModel(model=model, options=inference_opts) + model = poptorch.inferenceModel(model=self.model, options=inference_opts) self.poptorch_models[RunningStage.VALIDATING] = model if self.lightning_module.trainer.num_sanity_val_steps > 0: self.poptorch_models[RunningStage.SANITY_CHECKING] = model elif trainer_fn == TrainerFn.VALIDATING: - model = poptorch.inferenceModel(model=model, options=self.inference_opts) + model = poptorch.inferenceModel(model=self.model, options=self.inference_opts) self.poptorch_models[RunningStage.VALIDATING] = model elif trainer_fn == TrainerFn.TESTING: - model = poptorch.inferenceModel(model=model, options=self.inference_opts) + model = poptorch.inferenceModel(model=self.model, options=self.inference_opts) self.poptorch_models[RunningStage.TESTING] = model elif trainer_fn == TrainerFn.PREDICTING: - model = poptorch.inferenceModel(model=model, options=self.inference_opts) + model = poptorch.inferenceModel(model=self.model, options=self.inference_opts) self.poptorch_models[RunningStage.PREDICTING] = model def setup_optimizers(self, trainer: "pl.Trainer") -> None: @@ -202,10 +196,6 @@ def inference_opts(self) -> "poptorch.Options": self._inference_opts = self._create_opts(training=False) return self._inference_opts - @property - def lightning_module(self) -> Optional["pl.LightningModule"]: - return self.model.module if isinstance(self.model, LightningIPUModule) else self.model - def _convert_to_poptorch_loader( self, dataloader: DataLoader, sampler, mode: Optional[RunningStage] = None ) -> "poptorch.DataLoader": From daf99497bb39f4074aca12376ea54d157f8172c3 Mon Sep 17 00:00:00 2001 From: Harry Mellor Date: Thu, 28 Jul 2022 11:13:56 +0100 Subject: [PATCH 08/18] Validate precision in IPU tests --- tests/tests_pytorch/accelerators/test_ipu.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/tests_pytorch/accelerators/test_ipu.py b/tests/tests_pytorch/accelerators/test_ipu.py index a60a8aa330a37..0908c87085a46 100644 --- a/tests/tests_pytorch/accelerators/test_ipu.py +++ b/tests/tests_pytorch/accelerators/test_ipu.py @@ -40,16 +40,19 @@ class IPUModel(BoringModel): def training_step(self, batch, batch_idx): + assert self.precision == torch.finfo(batch.dtype).bits output = self(batch) loss = self.loss(batch, output) return loss def validation_step(self, batch, batch_idx): + assert self.precision == torch.finfo(batch.dtype).bits output = self(batch) loss = self.loss(batch, output) return loss def test_step(self, batch, batch_idx): + assert self.precision == torch.finfo(batch.dtype).bits output = self(batch) loss = self.loss(batch, output) return loss From 300056ee1750c1ac077eb5651814e937eccdc2d6 Mon Sep 17 00:00:00 2001 From: Harry Mellor Date: Thu, 28 Jul 2022 11:37:08 +0100 Subject: [PATCH 09/18] Deprecate `LightningIPUModel` --- src/pytorch_lightning/strategies/ipu.py | 24 +++++++++++++++++++ .../deprecated_api/test_remove_2-0.py | 6 +++++ 2 files changed, 30 insertions(+) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index 54daa808369be..f10198be23939 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -33,6 +33,7 @@ from pytorch_lightning.utilities.enums import PrecisionType from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.model_helpers import is_overridden +from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation from pytorch_lightning.utilities.types import STEP_OUTPUT if _POPTORCH_AVAILABLE: @@ -41,6 +42,29 @@ poptorch = None +class LightningIPUModule(_LightningModuleWrapperBase): + def __init__( + self, pl_module: Union["pl.LightningModule", _LightningPrecisionModuleWrapperBase], precision: Union[str, int] + ) -> None: + rank_zero_deprecation('`LightningIPUModule` is deprecated in v1.8 and will be removed in v2.0.0') + super().__init__(pl_module) + self.precision = precision + + def forward(self, *inputs: Any, **kwargs: Any) -> Any: + if self.precision in (PrecisionType.MIXED, PrecisionType.HALF): + inputs = self._move_float_tensors_to_half(inputs) + + return super().forward(*inputs, **kwargs) + + @staticmethod + def batch_to(data: Tensor) -> Tensor: + return data.half() + + def _move_float_tensors_to_half(self, batch: Any) -> Any: + batch = apply_to_collection(batch, (FloatTensor, torch.cuda.FloatTensor), function=self.batch_to) + return batch + + class IPUStrategy(ParallelStrategy): """Plugin for training on IPU devices.""" diff --git a/tests/tests_pytorch/deprecated_api/test_remove_2-0.py b/tests/tests_pytorch/deprecated_api/test_remove_2-0.py index b39c6dafc1696..48954a5446cb0 100644 --- a/tests/tests_pytorch/deprecated_api/test_remove_2-0.py +++ b/tests/tests_pytorch/deprecated_api/test_remove_2-0.py @@ -19,6 +19,7 @@ import pytorch_lightning from pytorch_lightning import Trainer from pytorch_lightning.demos.boring_classes import BoringModel +from pytorch_lightning.strategies.ipu import LightningIPUModule from tests_pytorch.callbacks.test_callbacks import OldStatefulCallback from tests_pytorch.helpers.runif import RunIf @@ -49,6 +50,11 @@ def test_v2_0_0_deprecated_ipus(_, monkeypatch): with pytest.deprecated_call(match=r"is deprecated in v1.7 and will be removed in v2.0."): _ = Trainer(ipus=4) +@mock.patch("pytorch_lightning.accelerators.ipu.IPUAccelerator.is_available", return_value=True) +def test_v2_0_0_deprecated_lightning_ipu_module(_, monkeypatch): + monkeypatch.setattr(pytorch_lightning.strategies.ipu, "_IPU_AVAILABLE", True) + with pytest.deprecated_call(match=r"is deprecated in v1.8 and will be removed in v2.0."): + _ = LightningIPUModule(BoringModel(), 32) def test_v2_0_resume_from_checkpoint_trainer_constructor(tmpdir): # test resume_from_checkpoint still works until v2.0 deprecation From ee6a4cfc21b7c10b2aacc32ea1edb5365d052b30 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 28 Jul 2022 10:38:48 +0000 Subject: [PATCH 10/18] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/pytorch_lightning/strategies/ipu.py | 2 +- tests/tests_pytorch/deprecated_api/test_remove_2-0.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index f10198be23939..a332d339a8daf 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -46,7 +46,7 @@ class LightningIPUModule(_LightningModuleWrapperBase): def __init__( self, pl_module: Union["pl.LightningModule", _LightningPrecisionModuleWrapperBase], precision: Union[str, int] ) -> None: - rank_zero_deprecation('`LightningIPUModule` is deprecated in v1.8 and will be removed in v2.0.0') + rank_zero_deprecation("`LightningIPUModule` is deprecated in v1.8 and will be removed in v2.0.0") super().__init__(pl_module) self.precision = precision diff --git a/tests/tests_pytorch/deprecated_api/test_remove_2-0.py b/tests/tests_pytorch/deprecated_api/test_remove_2-0.py index 48954a5446cb0..82369a6412c7a 100644 --- a/tests/tests_pytorch/deprecated_api/test_remove_2-0.py +++ b/tests/tests_pytorch/deprecated_api/test_remove_2-0.py @@ -50,12 +50,14 @@ def test_v2_0_0_deprecated_ipus(_, monkeypatch): with pytest.deprecated_call(match=r"is deprecated in v1.7 and will be removed in v2.0."): _ = Trainer(ipus=4) + @mock.patch("pytorch_lightning.accelerators.ipu.IPUAccelerator.is_available", return_value=True) def test_v2_0_0_deprecated_lightning_ipu_module(_, monkeypatch): monkeypatch.setattr(pytorch_lightning.strategies.ipu, "_IPU_AVAILABLE", True) with pytest.deprecated_call(match=r"is deprecated in v1.8 and will be removed in v2.0."): _ = LightningIPUModule(BoringModel(), 32) + def test_v2_0_resume_from_checkpoint_trainer_constructor(tmpdir): # test resume_from_checkpoint still works until v2.0 deprecation model = BoringModel() From de82e00e278164f63daa5c1bfcc9ee16cc98cf3b Mon Sep 17 00:00:00 2001 From: Harry Mellor Date: Thu, 28 Jul 2022 18:22:32 +0100 Subject: [PATCH 11/18] Update deprecation timeline --- src/pytorch_lightning/strategies/ipu.py | 2 +- tests/tests_pytorch/deprecated_api/test_remove_1-8.py | 8 ++++++++ tests/tests_pytorch/deprecated_api/test_remove_2-0.py | 8 -------- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index a332d339a8daf..5abd829e84e0e 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -46,7 +46,7 @@ class LightningIPUModule(_LightningModuleWrapperBase): def __init__( self, pl_module: Union["pl.LightningModule", _LightningPrecisionModuleWrapperBase], precision: Union[str, int] ) -> None: - rank_zero_deprecation("`LightningIPUModule` is deprecated in v1.8 and will be removed in v2.0.0") + rank_zero_deprecation("`LightningIPUModule` is deprecated in v1.7 and will be removed in v1.8.0") super().__init__(pl_module) self.precision = precision diff --git a/tests/tests_pytorch/deprecated_api/test_remove_1-8.py b/tests/tests_pytorch/deprecated_api/test_remove_1-8.py index 12aca123eacc1..f412d37730044 100644 --- a/tests/tests_pytorch/deprecated_api/test_remove_1-8.py +++ b/tests/tests_pytorch/deprecated_api/test_remove_1-8.py @@ -43,6 +43,7 @@ from pytorch_lightning.profiler import AbstractProfiler, BaseProfiler from pytorch_lightning.profilers import AdvancedProfiler, Profiler, SimpleProfiler from pytorch_lightning.strategies import DDP2Strategy, ParallelStrategy +from pytorch_lightning.strategies.ipu import LightningIPUModule from pytorch_lightning.trainer.configuration_validator import _check_datamodule_checkpoint_hooks from pytorch_lightning.trainer.states import RunningStage from pytorch_lightning.utilities import device_parser @@ -1008,6 +1009,13 @@ def test_trainer_config_ipus(monkeypatch, trainer_kwargs, expected_ipus): trainer.ipus == expected_ipus +@mock.patch("pytorch_lightning.accelerators.ipu.IPUAccelerator.is_available", return_value=True) +def test_v2_0_0_deprecated_lightning_ipu_module(_, monkeypatch): + monkeypatch.setattr(pytorch_lightning.strategies.ipu, "_IPU_AVAILABLE", True) + with pytest.deprecated_call(match=r"is deprecated in v1.7 and will be removed in v1.8."): + _ = LightningIPUModule(BoringModel(), 32) + + @pytest.mark.parametrize( ["trainer_kwargs", "expected_num_processes"], [ diff --git a/tests/tests_pytorch/deprecated_api/test_remove_2-0.py b/tests/tests_pytorch/deprecated_api/test_remove_2-0.py index 82369a6412c7a..b39c6dafc1696 100644 --- a/tests/tests_pytorch/deprecated_api/test_remove_2-0.py +++ b/tests/tests_pytorch/deprecated_api/test_remove_2-0.py @@ -19,7 +19,6 @@ import pytorch_lightning from pytorch_lightning import Trainer from pytorch_lightning.demos.boring_classes import BoringModel -from pytorch_lightning.strategies.ipu import LightningIPUModule from tests_pytorch.callbacks.test_callbacks import OldStatefulCallback from tests_pytorch.helpers.runif import RunIf @@ -51,13 +50,6 @@ def test_v2_0_0_deprecated_ipus(_, monkeypatch): _ = Trainer(ipus=4) -@mock.patch("pytorch_lightning.accelerators.ipu.IPUAccelerator.is_available", return_value=True) -def test_v2_0_0_deprecated_lightning_ipu_module(_, monkeypatch): - monkeypatch.setattr(pytorch_lightning.strategies.ipu, "_IPU_AVAILABLE", True) - with pytest.deprecated_call(match=r"is deprecated in v1.8 and will be removed in v2.0."): - _ = LightningIPUModule(BoringModel(), 32) - - def test_v2_0_resume_from_checkpoint_trainer_constructor(tmpdir): # test resume_from_checkpoint still works until v2.0 deprecation model = BoringModel() From bb45874a5d4aa15ac8b8d370a0aeb97ca170bb71 Mon Sep 17 00:00:00 2001 From: Harry Mellor Date: Thu, 28 Jul 2022 18:24:24 +0100 Subject: [PATCH 12/18] Moved changelog entry to fixed --- src/pytorch_lightning/CHANGELOG.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md index f7a0acd14e182..dae416f4ecf11 100644 --- a/src/pytorch_lightning/CHANGELOG.md +++ b/src/pytorch_lightning/CHANGELOG.md @@ -113,8 +113,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Changed -- When training with `precision=16` on IPU, the cast has been moved off the IPU onto the host, making the copies from host to IPU cheaper ([#13880](https://github.com/Lightning-AI/lightning/pull/13880)) - - `accelerator="gpu"` now automatically selects an available GPU backend (CUDA and MPS currently) ([#13642](https://github.com/Lightning-AI/lightning/pull/13642)) @@ -417,6 +415,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed +- When training with `precision=16` on IPU, the cast has been moved off the IPU onto the host, making the copies from host to IPU cheaper ([#13880](https://github.com/Lightning-AI/lightning/pull/13880)) - Fixed an issue causing zero-division error for empty dataloaders ([#12885](https://github.com/PyTorchLightning/pytorch-lightning/pull/12885)) - Fixed mismatching default values for the types of some arguments in the DeepSpeed and Fully-Sharded strategies which made the CLI unable to use them ([#12989](https://github.com/PyTorchLightning/pytorch-lightning/pull/12989)) - Avoid redundant callback restore warning while tuning ([#13026](https://github.com/PyTorchLightning/pytorch-lightning/pull/13026)) From 5ffbec0a51872a1d935faee56c3968a1be8ae12f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Thu, 28 Jul 2022 19:28:06 +0200 Subject: [PATCH 13/18] Minor fixes --- src/pytorch_lightning/CHANGELOG.md | 4 +++- tests/tests_pytorch/deprecated_api/test_remove_1-8.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md index dae416f4ecf11..47d56c454648d 100644 --- a/src/pytorch_lightning/CHANGELOG.md +++ b/src/pytorch_lightning/CHANGELOG.md @@ -385,6 +385,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Used `global_step` while restoring logging step for old checkpoints ([#13645](https://github.com/Lightning-AI/lightning/pull/13645)) +- When training with `precision=16` on IPU, the cast has been moved off the IPU onto the host, making the copies from host to IPU cheaper ([#13880](https://github.com/Lightning-AI/lightning/pull/13880)) + + - Fixed error handling in learning rate finder when not enough data points are available to give a good suggestion ([#13845](https://github.com/Lightning-AI/lightning/pull/13845)) @@ -415,7 +418,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed -- When training with `precision=16` on IPU, the cast has been moved off the IPU onto the host, making the copies from host to IPU cheaper ([#13880](https://github.com/Lightning-AI/lightning/pull/13880)) - Fixed an issue causing zero-division error for empty dataloaders ([#12885](https://github.com/PyTorchLightning/pytorch-lightning/pull/12885)) - Fixed mismatching default values for the types of some arguments in the DeepSpeed and Fully-Sharded strategies which made the CLI unable to use them ([#12989](https://github.com/PyTorchLightning/pytorch-lightning/pull/12989)) - Avoid redundant callback restore warning while tuning ([#13026](https://github.com/PyTorchLightning/pytorch-lightning/pull/13026)) diff --git a/tests/tests_pytorch/deprecated_api/test_remove_1-8.py b/tests/tests_pytorch/deprecated_api/test_remove_1-8.py index f412d37730044..3f8505cc8de82 100644 --- a/tests/tests_pytorch/deprecated_api/test_remove_1-8.py +++ b/tests/tests_pytorch/deprecated_api/test_remove_1-8.py @@ -1010,7 +1010,7 @@ def test_trainer_config_ipus(monkeypatch, trainer_kwargs, expected_ipus): @mock.patch("pytorch_lightning.accelerators.ipu.IPUAccelerator.is_available", return_value=True) -def test_v2_0_0_deprecated_lightning_ipu_module(_, monkeypatch): +def test_v1_8_0_deprecated_lightning_ipu_module(_, monkeypatch): monkeypatch.setattr(pytorch_lightning.strategies.ipu, "_IPU_AVAILABLE", True) with pytest.deprecated_call(match=r"is deprecated in v1.7 and will be removed in v1.8."): _ = LightningIPUModule(BoringModel(), 32) From a5b1883e9f9639af20ddf6ac0bd3d0c169e855f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Thu, 28 Jul 2022 19:30:16 +0200 Subject: [PATCH 14/18] Refactor --- src/pytorch_lightning/strategies/ipu.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index 5abd829e84e0e..ce5e97e6bbbc3 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -273,11 +273,7 @@ def batch_to_device(self, batch: Any, device: Optional[torch.device] = None, dat # This override is necessary because the cast must occur before the data # is moved to the device to prevent wasteful host->device copies. if self.precision_plugin.precision in (PrecisionType.MIXED, PrecisionType.HALF): - - def to_half(data: Tensor) -> Tensor: - return data.half() - - batch = apply_to_collection(batch, (FloatTensor, torch.cuda.FloatTensor), function=to_half) + batch = apply_to_collection(batch, (FloatTensor, torch.cuda.FloatTensor), function=Tensor.half) # We don't call `super().batch_to_device` because `data.to(device)` is not # currently necessary for IPUs. The movement of data from host<->IPU is # currently handled by PopTorch. From 9f303a4b448ab7fb16aebb31a305268cc7a2b862 Mon Sep 17 00:00:00 2001 From: Harry Mellor Date: Thu, 28 Jul 2022 19:42:09 +0100 Subject: [PATCH 15/18] Update test --- tests/tests_pytorch/accelerators/test_ipu.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/tests_pytorch/accelerators/test_ipu.py b/tests/tests_pytorch/accelerators/test_ipu.py index 0c54e3a6707f3..9d510d9d60e9e 100644 --- a/tests/tests_pytorch/accelerators/test_ipu.py +++ b/tests/tests_pytorch/accelerators/test_ipu.py @@ -40,19 +40,16 @@ class IPUModel(BoringModel): def training_step(self, batch, batch_idx): - assert self.precision == torch.finfo(batch.dtype).bits output = self(batch) loss = self.loss(batch, output) return loss def validation_step(self, batch, batch_idx): - assert self.precision == torch.finfo(batch.dtype).bits output = self(batch) loss = self.loss(batch, output) return loss def test_step(self, batch, batch_idx): - assert self.precision == torch.finfo(batch.dtype).bits output = self(batch) loss = self.loss(batch, output) return loss @@ -222,6 +219,7 @@ def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None: assert isinstance(trainer.strategy, IPUStrategy) assert isinstance(trainer.strategy.precision_plugin, IPUPrecisionPlugin) assert trainer.strategy.precision_plugin.precision == 16 + assert trainer.strategy.batch_to_device(torch.zeros((1), dtype=torch.float)).dtype == torch.half with pytest.raises(SystemExit): trainer.fit(model) From a9f12e94705c3ef18ac94535a76873bf6fe64326 Mon Sep 17 00:00:00 2001 From: HMellor Date: Thu, 28 Jul 2022 19:43:33 +0100 Subject: [PATCH 16/18] Update src/pytorch_lightning/strategies/ipu.py Co-authored-by: Rohit Gupta --- src/pytorch_lightning/strategies/ipu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index ce5e97e6bbbc3..8d9cc50b90350 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -46,7 +46,7 @@ class LightningIPUModule(_LightningModuleWrapperBase): def __init__( self, pl_module: Union["pl.LightningModule", _LightningPrecisionModuleWrapperBase], precision: Union[str, int] ) -> None: - rank_zero_deprecation("`LightningIPUModule` is deprecated in v1.7 and will be removed in v1.8.0") + rank_zero_deprecation("`LightningIPUModule` has been deprecated in v1.7.0 and will be removed in v1.8.0") super().__init__(pl_module) self.precision = precision From af485fa73045dee3e7d28007aacffd55ce4b2b98 Mon Sep 17 00:00:00 2001 From: HMellor Date: Thu, 28 Jul 2022 19:43:42 +0100 Subject: [PATCH 17/18] Update tests/tests_pytorch/deprecated_api/test_remove_1-8.py Co-authored-by: Rohit Gupta --- tests/tests_pytorch/deprecated_api/test_remove_1-8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests_pytorch/deprecated_api/test_remove_1-8.py b/tests/tests_pytorch/deprecated_api/test_remove_1-8.py index 59ec7331fefe5..0d9fc1e7a2baf 100644 --- a/tests/tests_pytorch/deprecated_api/test_remove_1-8.py +++ b/tests/tests_pytorch/deprecated_api/test_remove_1-8.py @@ -1010,7 +1010,7 @@ def test_trainer_config_ipus(monkeypatch, trainer_kwargs, expected_ipus): @mock.patch("pytorch_lightning.accelerators.ipu.IPUAccelerator.is_available", return_value=True) def test_v1_8_0_deprecated_lightning_ipu_module(_, monkeypatch): monkeypatch.setattr(pytorch_lightning.strategies.ipu, "_IPU_AVAILABLE", True) - with pytest.deprecated_call(match=r"is deprecated in v1.7 and will be removed in v1.8."): + with pytest.deprecated_call(match=r"has been deprecated in v1.7.0 and will be removed in v1.8."): _ = LightningIPUModule(BoringModel(), 32) From 87d14266f5010a1a5bf18153d7a9e103410f932a Mon Sep 17 00:00:00 2001 From: HMellor Date: Thu, 28 Jul 2022 19:45:22 +0100 Subject: [PATCH 18/18] Update src/pytorch_lightning/strategies/ipu.py Co-authored-by: Rohit Gupta --- src/pytorch_lightning/strategies/ipu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pytorch_lightning/strategies/ipu.py b/src/pytorch_lightning/strategies/ipu.py index 8d9cc50b90350..82ba4ad227f7c 100644 --- a/src/pytorch_lightning/strategies/ipu.py +++ b/src/pytorch_lightning/strategies/ipu.py @@ -273,7 +273,7 @@ def batch_to_device(self, batch: Any, device: Optional[torch.device] = None, dat # This override is necessary because the cast must occur before the data # is moved to the device to prevent wasteful host->device copies. if self.precision_plugin.precision in (PrecisionType.MIXED, PrecisionType.HALF): - batch = apply_to_collection(batch, (FloatTensor, torch.cuda.FloatTensor), function=Tensor.half) + batch = apply_to_collection(batch, Tensor, function=Tensor.half) # We don't call `super().batch_to_device` because `data.to(device)` is not # currently necessary for IPUs. The movement of data from host<->IPU is # currently handled by PopTorch.