From 762dd7aee35539ce8405b5249f261f4968b383bc Mon Sep 17 00:00:00 2001 From: shenoynikhil Date: Sat, 14 May 2022 23:56:21 +0530 Subject: [PATCH 1/5] updated docs corresponding to flush_logs_every_n_steps in trainer --- docs/source/common/trainer.rst | 24 ---------------------- docs/source/visualize/logging_advanced.rst | 20 ------------------ 2 files changed, 44 deletions(-) diff --git a/docs/source/common/trainer.rst b/docs/source/common/trainer.rst index 22458e320a825..a4be86d6a620c 100644 --- a/docs/source/common/trainer.rst +++ b/docs/source/common/trainer.rst @@ -695,30 +695,6 @@ impact to subsequent runs. These are the changes enabled: - Disables the Tuner. - If using the CLI, the configuration file is not saved. -flush_logs_every_n_steps -^^^^^^^^^^^^^^^^^^^^^^^^ - -.. warning:: ``flush_logs_every_n_steps`` has been deprecated in v1.5 and will be removed in v1.7. - Please configure flushing directly in the logger instead. - -.. raw:: html - - - -| - -Writes logs to disk this often. - -.. testcode:: - - # default used by the Trainer - trainer = Trainer(flush_logs_every_n_steps=100) - -See Also: - - :doc:`logging <../extensions/logging>` - .. _gpus: gpus diff --git a/docs/source/visualize/logging_advanced.rst b/docs/source/visualize/logging_advanced.rst index ca11e3957fdb2..69bd58838f033 100644 --- a/docs/source/visualize/logging_advanced.rst +++ b/docs/source/visualize/logging_advanced.rst @@ -46,26 +46,6 @@ To change this behaviour, set the *log_every_n_steps* :class:`~pytorch_lightning ---- -Modify flushing frequency -========================= - -Metrics are kept in memory for N steps to improve training efficiency. Every N steps, metrics flush to disk. To change the frequency of this flushing, use the *flush_logs_every_n_steps* Trainer argument. - -.. code-block:: python - - # faster training, high memory - Trainer(flush_logs_every_n_steps=500) - - # slower training, low memory - Trainer(flush_logs_every_n_steps=500) - -The higher *flush_logs_every_n_steps* is, the faster the model will train but the memory will build up until the next flush. -The smaller *flush_logs_every_n_steps* is, the slower the model will train but memory will be kept to a minimum. - -TODO: chart - ----- - ****************** Customize self.log ****************** From ec911ccc6e2f08514b87217a66cba89a2d2ef882 Mon Sep 17 00:00:00 2001 From: shenoynikhil Date: Sun, 15 May 2022 00:11:48 +0530 Subject: [PATCH 2/5] updated training_epoch_loop, logger_connector, trainer and test --- pytorch_lightning/loops/epoch/training_epoch_loop.py | 3 +-- .../connectors/logger_connector/logger_connector.py | 9 --------- pytorch_lightning/trainer/trainer.py | 9 +-------- tests/deprecated_api/test_remove_1-7.py | 5 ----- 4 files changed, 2 insertions(+), 24 deletions(-) diff --git a/pytorch_lightning/loops/epoch/training_epoch_loop.py b/pytorch_lightning/loops/epoch/training_epoch_loop.py index 599bf45deec9b..9c554514a781d 100644 --- a/pytorch_lightning/loops/epoch/training_epoch_loop.py +++ b/pytorch_lightning/loops/epoch/training_epoch_loop.py @@ -529,8 +529,7 @@ def _should_check_val_fx(self, batch_idx: int, is_last_batch: bool) -> bool: def _save_loggers_on_train_batch_end(self) -> None: """Flushes loggers to disk.""" # this assumes that `batches_that_stepped` was increased before - should_flush = self._batches_that_stepped % self.trainer.flush_logs_every_n_steps == 0 - if should_flush or self.trainer.should_stop: + if self.trainer.should_stop: for logger in self.trainer.loggers: logger.save() diff --git a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index e22eb579e610b..db34f0a77e71d 100644 --- a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -43,19 +43,10 @@ def __init__(self, trainer: "pl.Trainer") -> None: def on_trainer_init( self, logger: Union[bool, Logger, Iterable[Logger]], - flush_logs_every_n_steps: Optional[int], log_every_n_steps: int, move_metrics_to_cpu: bool, ) -> None: self.configure_logger(logger) - if flush_logs_every_n_steps is not None: - rank_zero_deprecation( - f"Setting `Trainer(flush_logs_every_n_steps={flush_logs_every_n_steps})` is deprecated in v1.5 " - "and will be removed in v1.7. Please configure flushing in the logger instead." - ) - else: - flush_logs_every_n_steps = 100 # original default parameter - self.trainer.flush_logs_every_n_steps = flush_logs_every_n_steps self.trainer.log_every_n_steps = log_every_n_steps self.trainer.move_metrics_to_cpu = move_metrics_to_cpu for logger in self.trainer.loggers: diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index f2d5398173593..8f48816a74f3b 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -161,7 +161,6 @@ def __init__( limit_test_batches: Optional[Union[int, float]] = None, limit_predict_batches: Optional[Union[int, float]] = None, val_check_interval: Optional[Union[int, float]] = None, - flush_logs_every_n_steps: Optional[int] = None, log_every_n_steps: int = 50, accelerator: Optional[Union[str, Accelerator]] = None, strategy: Optional[Union[str, Strategy]] = None, @@ -262,12 +261,6 @@ def __init__( of train, val and test to find any bugs (ie: a sort of unit test). Default: ``False``. - flush_logs_every_n_steps: How often to flush logs to disk (defaults to every 100 steps). - - .. deprecated:: v1.5 - ``flush_logs_every_n_steps`` has been deprecated in v1.5 and will be removed in v1.7. - Please configure flushing directly in the logger instead. - gpus: Number of GPUs to train on (int) or which GPUs to train on (list or str) applied per node Default: ``None``. @@ -565,7 +558,7 @@ def __init__( # init logger flags self._loggers: List[Logger] - self._logger_connector.on_trainer_init(logger, flush_logs_every_n_steps, log_every_n_steps, move_metrics_to_cpu) + self._logger_connector.on_trainer_init(logger, log_every_n_steps, move_metrics_to_cpu) # init debugging flags self.val_check_interval: Union[int, float] diff --git a/tests/deprecated_api/test_remove_1-7.py b/tests/deprecated_api/test_remove_1-7.py index 6b90af9897eaf..7d1cc72a3e269 100644 --- a/tests/deprecated_api/test_remove_1-7.py +++ b/tests/deprecated_api/test_remove_1-7.py @@ -66,11 +66,6 @@ def test_v1_7_0_process_position_trainer_constructor(tmpdir): _ = Trainer(process_position=5) -def test_v1_7_0_flush_logs_every_n_steps_trainer_constructor(tmpdir): - with pytest.deprecated_call(match=r"Setting `Trainer\(flush_logs_every_n_steps=10\)` is deprecated in v1.5"): - _ = Trainer(flush_logs_every_n_steps=10) - - class BoringCallbackDDPSpawnModel(BoringModel): def add_to_queue(self, queue): ... From 6ede9403b9af28fd31381305c161c9dc477eceda Mon Sep 17 00:00:00 2001 From: shenoynikhil Date: Sun, 15 May 2022 01:05:02 +0530 Subject: [PATCH 3/5] updated changelog.md --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3cd7495990bc4..a70a8752c4d67 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -112,6 +112,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Removed +- Removed the deprecated `flush_logs_every_n_steps` argument from the `Trainer` constructor ([#13074](https://github.com/PyTorchLightning/pytorch-lightning/pull/13074)) + + - Removed the deprecated `checkpoint_callback` argument from the `Trainer` constructor ([#13027](https://github.com/PyTorchLightning/pytorch-lightning/pull/13027)) From a73c93393f492ece05ffc31dd5afc620c3b7d71a Mon Sep 17 00:00:00 2001 From: shenoynikhil Date: Sun, 22 May 2022 23:23:45 +0530 Subject: [PATCH 4/5] Updated based on @awaelchli comment --- docs/source/visualize/logging_advanced.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/docs/source/visualize/logging_advanced.rst b/docs/source/visualize/logging_advanced.rst index 69bd58838f033..f67a921d5fed5 100644 --- a/docs/source/visualize/logging_advanced.rst +++ b/docs/source/visualize/logging_advanced.rst @@ -46,6 +46,25 @@ To change this behaviour, set the *log_every_n_steps* :class:`~pytorch_lightning ---- +Modify flushing frequency +========================= + +Some loggers keep logged metrics in memory for N steps and only periodically flush them to disk to improve training efficiency. +Every logger handles this a bit differently. For example, here is how to fine-tune flushing for the TensorBoard logger: + +.. code-block:: python + + # Default used by TensorBoard: Write to disk after 10 logging events or every two minutes + logger = TensorBoardLogger(..., max_queue=10, flush_secs=120) + + # Faster training, more memory used + logger = TensorBoardLogger(..., max_queue=100) + + # Slower training, less memory used + logger = TensorBoardLogger(..., max_queue=1) + +---- + ****************** Customize self.log ****************** From 574776c60769a60bf0211cb423e4e54b6f967b38 Mon Sep 17 00:00:00 2001 From: Nikhil Shenoy Date: Sun, 22 May 2022 23:27:06 +0530 Subject: [PATCH 5/5] Update pytorch_lightning/loops/epoch/training_epoch_loop.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos MocholĂ­ --- pytorch_lightning/loops/epoch/training_epoch_loop.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/loops/epoch/training_epoch_loop.py b/pytorch_lightning/loops/epoch/training_epoch_loop.py index 9c554514a781d..04e9d070a6d8e 100644 --- a/pytorch_lightning/loops/epoch/training_epoch_loop.py +++ b/pytorch_lightning/loops/epoch/training_epoch_loop.py @@ -528,7 +528,6 @@ def _should_check_val_fx(self, batch_idx: int, is_last_batch: bool) -> bool: def _save_loggers_on_train_batch_end(self) -> None: """Flushes loggers to disk.""" - # this assumes that `batches_that_stepped` was increased before if self.trainer.should_stop: for logger in self.trainer.loggers: logger.save()