From b6410a48fbd2c7c0a20ee836a6e2b0aa81668314 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20POIRET?= Date: Fri, 22 Apr 2022 11:33:36 +0200 Subject: [PATCH 1/3] Updated callback * Fix for `max_steps` (defaults to `-1`), * Fix for deprecation of `num_gpus`, `num_processes` and `tpu_cores`. --- pl_bolts/callbacks/sparseml.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pl_bolts/callbacks/sparseml.py b/pl_bolts/callbacks/sparseml.py index 50963c3dd2..d0cd456326 100644 --- a/pl_bolts/callbacks/sparseml.py +++ b/pl_bolts/callbacks/sparseml.py @@ -66,14 +66,17 @@ def _num_training_steps_per_epoch(self, trainer: Trainer) -> int: else: dataset_size = len(trainer.datamodule.train_dataloader()) - num_devices = max(1, trainer.num_gpus, trainer.num_processes) - if trainer.tpu_cores: - num_devices = max(num_devices, trainer.tpu_cores) + if hasattr(trainer, 'num_devices'): + num_devices = max(1, trainer.num_devices) + else: + num_devices = max(1, trainer.num_gpus, trainer.num_processes) + if trainer.tpu_cores: + num_devices = max(num_devices, trainer.tpu_cores) effective_batch_size = trainer.accumulate_grad_batches * num_devices max_estimated_steps = dataset_size // effective_batch_size - if trainer.max_steps and trainer.max_steps < max_estimated_steps: + if trainer.max_steps != -1 and trainer.max_steps < max_estimated_steps: return trainer.max_steps return max_estimated_steps From c2a40ec4219add40f85fed91cac3905d4ec00ec7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20POIRET?= Date: Fri, 22 Apr 2022 11:43:33 +0200 Subject: [PATCH 2/3] Avoid breaking changes for `max_steps=None` --- pl_bolts/callbacks/sparseml.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pl_bolts/callbacks/sparseml.py b/pl_bolts/callbacks/sparseml.py index d0cd456326..15b58f0059 100644 --- a/pl_bolts/callbacks/sparseml.py +++ b/pl_bolts/callbacks/sparseml.py @@ -76,8 +76,10 @@ def _num_training_steps_per_epoch(self, trainer: Trainer) -> int: effective_batch_size = trainer.accumulate_grad_batches * num_devices max_estimated_steps = dataset_size // effective_batch_size - if trainer.max_steps != -1 and trainer.max_steps < max_estimated_steps: - return trainer.max_steps + # To avoid breaking changes, max_steps is set to -1 if it is not defined + max_steps = -1 if not trainer.max_steps else trainer.max_steps + if max_steps != -1 and max_steps < max_estimated_steps: + return max_steps return max_estimated_steps @staticmethod From 84db0cab4944f5b04e437b67a31b7350a2929f17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20POIRET?= Date: Fri, 22 Apr 2022 11:52:32 +0200 Subject: [PATCH 3/3] Added comments for clarity and future maintenance --- pl_bolts/callbacks/sparseml.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pl_bolts/callbacks/sparseml.py b/pl_bolts/callbacks/sparseml.py index 15b58f0059..6963052a30 100644 --- a/pl_bolts/callbacks/sparseml.py +++ b/pl_bolts/callbacks/sparseml.py @@ -67,8 +67,10 @@ def _num_training_steps_per_epoch(self, trainer: Trainer) -> int: dataset_size = len(trainer.datamodule.train_dataloader()) if hasattr(trainer, 'num_devices'): + # New behavior in Lightning num_devices = max(1, trainer.num_devices) else: + # Old behavior deprecated in v1.6 num_devices = max(1, trainer.num_gpus, trainer.num_processes) if trainer.tpu_cores: num_devices = max(num_devices, trainer.tpu_cores)