From aa84b4cfcc8bd0f7b6ca816748bcfd27a78ff86b Mon Sep 17 00:00:00 2001 From: DuYicong515 Date: Wed, 23 Mar 2022 23:14:36 -0700 Subject: [PATCH 1/7] Deprecate Trainer.tpu_cores --- pytorch_lightning/callbacks/xla_stats_monitor.py | 4 ++-- pytorch_lightning/trainer/trainer.py | 10 ++++++---- tests/accelerators/test_tpu.py | 4 ++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pytorch_lightning/callbacks/xla_stats_monitor.py b/pytorch_lightning/callbacks/xla_stats_monitor.py index ebc6ca9d72357..a2552980b235d 100644 --- a/pytorch_lightning/callbacks/xla_stats_monitor.py +++ b/pytorch_lightning/callbacks/xla_stats_monitor.py @@ -75,8 +75,8 @@ def on_train_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") if isinstance(trainer.accelerator, TPUAccelerator): raise MisconfigurationException( - "You are using XLAStatsMonitor but are not running on TPU" - f" since `tpu_cores` attribute in Trainer is set to {trainer.tpu_cores}." + "You are using XLAStatsMonitor but are not running on TPU." + f" The Trainer accelerator type is set to {trainer.accelerator.name().upper()}." ) device = trainer.strategy.root_device diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index bcda0764ff11f..0f4cefa079ab1 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1811,9 +1811,7 @@ def _log_device_info(self) -> None: f"GPU available: {torch.cuda.is_available()}, used: {isinstance(self.accelerator, GPUAccelerator)}" ) - num_tpu_cores = ( - self.tpu_cores if self.tpu_cores is not None and isinstance(self.accelerator, TPUAccelerator) else 0 - ) + num_tpu_cores = self.num_devices if isinstance(self.accelerator, TPUAccelerator) else 0 rank_zero_info(f"TPU available: {_TPU_AVAILABLE}, using: {num_tpu_cores} TPU cores") num_ipus = self.num_devices if isinstance(self.accelerator, IPUAccelerator) else 0 @@ -2100,7 +2098,11 @@ def root_gpu(self) -> Optional[int]: @property def tpu_cores(self) -> int: - return self._accelerator_connector.tpu_cores + rank_zero_deprecation( + "`Trainer.tpu_cores` is deprecated in v1.6 and will be removed in v1.8. " + "Please use `Trainer.devices` instead." + ) + return self.num_devices if isinstance(self.accelerator, TPUAccelerator) else 0 @property def ipus(self) -> int: diff --git a/tests/accelerators/test_tpu.py b/tests/accelerators/test_tpu.py index 7e522ffd170cd..5c0325e03b559 100644 --- a/tests/accelerators/test_tpu.py +++ b/tests/accelerators/test_tpu.py @@ -103,7 +103,6 @@ def test_accelerator_tpu(accelerator, devices): assert isinstance(trainer.accelerator, TPUAccelerator) assert isinstance(trainer.strategy, TPUSpawnStrategy) assert trainer.num_devices == 8 - assert trainer.tpu_cores == 8 @RunIf(tpu=True) @@ -114,7 +113,8 @@ def test_accelerator_tpu_with_tpu_cores_priority(): with pytest.warns(UserWarning, match="The flag `devices=1` will be ignored,"): trainer = Trainer(accelerator="tpu", devices=1, tpu_cores=tpu_cores) - assert trainer.tpu_cores == tpu_cores + assert isinstance(trainer.accelerator, TPUAccelerator) + assert trainer.num_devices == tpu_cores @RunIf(tpu=True) From 23a2f5eacd85b04a426c0a8df8a45854b47c1acc Mon Sep 17 00:00:00 2001 From: DuYicong515 Date: Wed, 23 Mar 2022 23:25:57 -0700 Subject: [PATCH 2/7] update changelog and deprecation test --- CHANGELOG.md | 6 ++++++ pytorch_lightning/callbacks/xla_stats_monitor.py | 2 +- tests/accelerators/test_tpu.py | 6 ++++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4092e04d5765b..fea95cfd95fcf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -600,6 +600,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Deprecated passing only the callback state to `Callback.on_load_checkpoint(callback_state)` in favor of passing the callback state to `Callback.load_state_dict` and in 1.8, passing the entire checkpoint dictionary to `Callback.on_load_checkpoint(checkpoint)` ([#11887](https://github.com/PyTorchLightning/pytorch-lightning/pull/11887)) +- Deprecated `Trainer.tpu_cores` in favor of `Trainer.num_devices` ([#12437](https://github.com/PyTorchLightning/pytorch-lightning/pull/12437)) + + ### Removed - Removed deprecated parameter `method` in `pytorch_lightning.utilities.model_helpers.is_overridden` ([#10507](https://github.com/PyTorchLightning/pytorch-lightning/pull/10507)) @@ -811,6 +814,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed `AcceleratorConnector.parallel_devices` property ([#12075](https://github.com/PyTorchLightning/pytorch-lightning/pull/12075)) +- Removed `AcceleratorConnector.tpu_cores` property ([#12437](https://github.com/PyTorchLightning/pytorch-lightning/pull/12437)) + + ### Fixed - Fixed an issue where `ModelCheckpoint` could delete older checkpoints when `dirpath` has changed during resumed training ([#12045](https://github.com/PyTorchLightning/pytorch-lightning/pull/12045)) diff --git a/pytorch_lightning/callbacks/xla_stats_monitor.py b/pytorch_lightning/callbacks/xla_stats_monitor.py index a2552980b235d..33d0c7938c44e 100644 --- a/pytorch_lightning/callbacks/xla_stats_monitor.py +++ b/pytorch_lightning/callbacks/xla_stats_monitor.py @@ -76,7 +76,7 @@ def on_train_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") if isinstance(trainer.accelerator, TPUAccelerator): raise MisconfigurationException( "You are using XLAStatsMonitor but are not running on TPU." - f" The Trainer accelerator type is set to {trainer.accelerator.name().upper()}." + f" The accelerator type is set to {trainer.accelerator.name().upper()}." ) device = trainer.strategy.root_device diff --git a/tests/accelerators/test_tpu.py b/tests/accelerators/test_tpu.py index 5c0325e03b559..bf5c4e4a4bdb1 100644 --- a/tests/accelerators/test_tpu.py +++ b/tests/accelerators/test_tpu.py @@ -103,6 +103,12 @@ def test_accelerator_tpu(accelerator, devices): assert isinstance(trainer.accelerator, TPUAccelerator) assert isinstance(trainer.strategy, TPUSpawnStrategy) assert trainer.num_devices == 8 + with pytest.deprecated_call( + match= "`Trainer.tpu_cores` is deprecated in v1.6 and will be removed in v1.8. " + "Please use `Trainer.devices` instead." + ): + trainer.tpu_cores == 8 + @RunIf(tpu=True) From 951627ca2a21f983c8df9fba8fee8474ad9176ed Mon Sep 17 00:00:00 2001 From: DuYicong515 Date: Fri, 25 Mar 2022 16:44:21 -0700 Subject: [PATCH 3/7] avoid calling xm.xla_device() on multi-tpu --- pytorch_lightning/trainer/trainer.py | 6 +++++- tests/accelerators/test_tpu.py | 5 +---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 0f4cefa079ab1..30daadfa56909 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -2066,7 +2066,11 @@ def num_nodes(self) -> int: @property def device_ids(self) -> List[int]: """List of device indexes per node.""" - devices = getattr(self.strategy, "parallel_devices", [self.strategy.root_device]) + devices = ( + self.strategy.parallel_devices + if isinstance(self.strategy, ParallelStrategy) + else [self.strategy.root_device] + ) device_ids = [] for idx, device in enumerate(devices): if isinstance(device, torch.device): diff --git a/tests/accelerators/test_tpu.py b/tests/accelerators/test_tpu.py index bf5c4e4a4bdb1..ef1c5b9563aec 100644 --- a/tests/accelerators/test_tpu.py +++ b/tests/accelerators/test_tpu.py @@ -94,7 +94,6 @@ def test_accelerator_cpu_with_tpu_cores_flag(): @RunIf(tpu=True) -@pl_multi_process_test @pytest.mark.parametrize(["accelerator", "devices"], [("auto", 8), ("auto", "auto"), ("tpu", None)]) def test_accelerator_tpu(accelerator, devices): assert TPUAccelerator.is_available() @@ -104,13 +103,12 @@ def test_accelerator_tpu(accelerator, devices): assert isinstance(trainer.strategy, TPUSpawnStrategy) assert trainer.num_devices == 8 with pytest.deprecated_call( - match= "`Trainer.tpu_cores` is deprecated in v1.6 and will be removed in v1.8. " + match="`Trainer.tpu_cores` is deprecated in v1.6 and will be removed in v1.8. " "Please use `Trainer.devices` instead." ): trainer.tpu_cores == 8 - @RunIf(tpu=True) def test_accelerator_tpu_with_tpu_cores_priority(): """Test for checking `tpu_cores` flag takes priority over `devices`.""" @@ -124,7 +122,6 @@ def test_accelerator_tpu_with_tpu_cores_priority(): @RunIf(tpu=True) -@pl_multi_process_test def test_set_devices_if_none_tpu(): trainer = Trainer(accelerator="tpu", tpu_cores=8) assert trainer.num_devices == 8 From eb99c73f196949d0aad7b6133fa51748cf9dcbb7 Mon Sep 17 00:00:00 2001 From: DuYicong515 Date: Fri, 25 Mar 2022 17:00:07 -0700 Subject: [PATCH 4/7] test deprecated call --- tests/accelerators/test_tpu.py | 6 +----- tests/deprecated_api/test_remove_1-8.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/tests/accelerators/test_tpu.py b/tests/accelerators/test_tpu.py index ef1c5b9563aec..c1cb1e2f369a9 100644 --- a/tests/accelerators/test_tpu.py +++ b/tests/accelerators/test_tpu.py @@ -102,11 +102,6 @@ def test_accelerator_tpu(accelerator, devices): assert isinstance(trainer.accelerator, TPUAccelerator) assert isinstance(trainer.strategy, TPUSpawnStrategy) assert trainer.num_devices == 8 - with pytest.deprecated_call( - match="`Trainer.tpu_cores` is deprecated in v1.6 and will be removed in v1.8. " - "Please use `Trainer.devices` instead." - ): - trainer.tpu_cores == 8 @RunIf(tpu=True) @@ -124,6 +119,7 @@ def test_accelerator_tpu_with_tpu_cores_priority(): @RunIf(tpu=True) def test_set_devices_if_none_tpu(): trainer = Trainer(accelerator="tpu", tpu_cores=8) + assert isinstance(trainer.accelerator, TPUAccelerator) assert trainer.num_devices == 8 diff --git a/tests/deprecated_api/test_remove_1-8.py b/tests/deprecated_api/test_remove_1-8.py index 2157dd2ad38fa..01da9aaa0fdd5 100644 --- a/tests/deprecated_api/test_remove_1-8.py +++ b/tests/deprecated_api/test_remove_1-8.py @@ -1106,3 +1106,14 @@ def on_save_checkpoint(self, trainer, pl_module, checkpoint): trainer.callbacks = [TestCallbackSaveHookOverride()] trainer.save_checkpoint(tmpdir + "/pathok.ckpt") + + +def test_trainer_tpu_cores(monkeypatch): + monkeypatch.setattr(pytorch_lightning.accelerators.tpu.TPUAccelerator, "is_available", lambda: True) + monkeypatch.setattr(pytorch_lightning.accelerators.tpu.TPUAccelerator, "parse_devices", lambda: 8) + trainer = Trainer(accelerator="TPU", devices=8) + with pytest.deprecated_call( + match="`Trainer.tpu_cores` is deprecated in v1.6 and will be removed in v1.8. " + "Please use `Trainer.devices` instead." + ): + trainer.tpu_cores == 8 From 8b744521a72af2a64010e3add530c368e2bd90ec Mon Sep 17 00:00:00 2001 From: DuYicong515 Date: Fri, 25 Mar 2022 22:14:15 -0700 Subject: [PATCH 5/7] update xla_stats_monitor configeratation error --- pytorch_lightning/callbacks/xla_stats_monitor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/callbacks/xla_stats_monitor.py b/pytorch_lightning/callbacks/xla_stats_monitor.py index 33d0c7938c44e..c7fe59a59d515 100644 --- a/pytorch_lightning/callbacks/xla_stats_monitor.py +++ b/pytorch_lightning/callbacks/xla_stats_monitor.py @@ -73,10 +73,10 @@ def on_train_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") if not trainer.loggers: raise MisconfigurationException("Cannot use XLAStatsMonitor callback with Trainer that has no logger.") - if isinstance(trainer.accelerator, TPUAccelerator): + if not isinstance(trainer.accelerator, TPUAccelerator): raise MisconfigurationException( "You are using XLAStatsMonitor but are not running on TPU." - f" The accelerator type is set to {trainer.accelerator.name().upper()}." + f" The accelerator is set to {trainer.accelerator.__class__.__name__}." ) device = trainer.strategy.root_device From 8536a8a53cd238a267e1d48114065e239160e0ab Mon Sep 17 00:00:00 2001 From: DuYicong515 Date: Sun, 27 Mar 2022 13:24:57 -0700 Subject: [PATCH 6/7] add xla_stats_monitor misconfig test and fix deprecation message --- pytorch_lightning/trainer/trainer.py | 2 +- tests/deprecated_api/test_remove_1-7.py | 20 ++++++++++++++++++-- tests/deprecated_api/test_remove_1-8.py | 2 +- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index eaddf0dfd6d64..0cb5f59f14dc4 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -2104,7 +2104,7 @@ def root_gpu(self) -> Optional[int]: def tpu_cores(self) -> int: rank_zero_deprecation( "`Trainer.tpu_cores` is deprecated in v1.6 and will be removed in v1.8. " - "Please use `Trainer.devices` instead." + "Please use `Trainer.num_devices` instead." ) return self.num_devices if isinstance(self.accelerator, TPUAccelerator) else 0 diff --git a/tests/deprecated_api/test_remove_1-7.py b/tests/deprecated_api/test_remove_1-7.py index 8cfaa843a9f10..51d273499fa1f 100644 --- a/tests/deprecated_api/test_remove_1-7.py +++ b/tests/deprecated_api/test_remove_1-7.py @@ -20,6 +20,7 @@ import pytest import torch +import pytorch_lightning from pytorch_lightning import Callback, LightningDataModule, Trainer from pytorch_lightning.callbacks.gpu_stats_monitor import GPUStatsMonitor from pytorch_lightning.callbacks.lr_monitor import LearningRateMonitor @@ -35,6 +36,7 @@ TorchElasticEnvironment, ) from pytorch_lightning.strategies import SingleDeviceStrategy +from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.deprecated_api import _soft_unimport_module from tests.helpers import BoringModel from tests.helpers.datamodules import MNISTDataModule @@ -393,8 +395,8 @@ def test_v1_7_0_deprecate_gpu_stats_monitor(tmpdir): _ = GPUStatsMonitor() -@RunIf(tpu=True) -def test_v1_7_0_deprecate_xla_stats_monitor(tmpdir): +def test_v1_7_0_deprecate_xla_stats_monitor(monkeypatch): + monkeypatch.setattr(pytorch_lightning.callbacks.xla_stats_monitor, "_TPU_AVAILABLE", lambda: True) with pytest.deprecated_call(match="The `XLAStatsMonitor` callback was deprecated in v1.5"): _ = XLAStatsMonitor() @@ -516,3 +518,17 @@ def post_dispatch(self, trainer): with pytest.deprecated_call(match=escape("`CustomPlugin.post_dispatch()` has been deprecated in v1.6")): CustomPlugin(torch.device("cpu")) + + +def test_xla_stats_monitor_tpu_not_used(monkeypatch): + monkeypatch.setattr(pytorch_lightning.callbacks.xla_stats_monitor, "_TPU_AVAILABLE", lambda: True) + with pytest.deprecated_call(match="The `XLAStatsMonitor` callback was deprecated in v1.5"): + xla_stats = XLAStatsMonitor() + + trainer = Trainer(accelerator="cpu", callbacks=[xla_stats]) + model = BoringModel() + with pytest.raises( + MisconfigurationException, + match="You are using XLAStatsMonitor but are not running on TPU. The accelerator is set to CPUAccelerator.", + ): + trainer.fit(model) diff --git a/tests/deprecated_api/test_remove_1-8.py b/tests/deprecated_api/test_remove_1-8.py index 4775d6afd9d31..ae2e0104ab3b2 100644 --- a/tests/deprecated_api/test_remove_1-8.py +++ b/tests/deprecated_api/test_remove_1-8.py @@ -1134,6 +1134,6 @@ def test_trainer_tpu_cores(monkeypatch): trainer = Trainer(accelerator="TPU", devices=8) with pytest.deprecated_call( match="`Trainer.tpu_cores` is deprecated in v1.6 and will be removed in v1.8. " - "Please use `Trainer.devices` instead." + "Please use `Trainer.num_devices` instead." ): trainer.tpu_cores == 8 From f5b6250b433ebc5db2d2f54fae1551db1ecd8819 Mon Sep 17 00:00:00 2001 From: DuYicong515 Date: Sun, 27 Mar 2022 15:33:46 -0700 Subject: [PATCH 7/7] fix _TPU_AVAILABLE mock --- tests/deprecated_api/test_remove_1-7.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/deprecated_api/test_remove_1-7.py b/tests/deprecated_api/test_remove_1-7.py index 51d273499fa1f..548e45683c13f 100644 --- a/tests/deprecated_api/test_remove_1-7.py +++ b/tests/deprecated_api/test_remove_1-7.py @@ -396,7 +396,7 @@ def test_v1_7_0_deprecate_gpu_stats_monitor(tmpdir): def test_v1_7_0_deprecate_xla_stats_monitor(monkeypatch): - monkeypatch.setattr(pytorch_lightning.callbacks.xla_stats_monitor, "_TPU_AVAILABLE", lambda: True) + monkeypatch.setattr(pytorch_lightning.callbacks.xla_stats_monitor, "_TPU_AVAILABLE", True) with pytest.deprecated_call(match="The `XLAStatsMonitor` callback was deprecated in v1.5"): _ = XLAStatsMonitor() @@ -521,7 +521,7 @@ def post_dispatch(self, trainer): def test_xla_stats_monitor_tpu_not_used(monkeypatch): - monkeypatch.setattr(pytorch_lightning.callbacks.xla_stats_monitor, "_TPU_AVAILABLE", lambda: True) + monkeypatch.setattr(pytorch_lightning.callbacks.xla_stats_monitor, "_TPU_AVAILABLE", True) with pytest.deprecated_call(match="The `XLAStatsMonitor` callback was deprecated in v1.5"): xla_stats = XLAStatsMonitor()