From 8d58ae988a9aebc4225e12a6b268abadde706bb1 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Wed, 2 Mar 2022 17:10:40 +0530 Subject: [PATCH 01/13] fix val step logging with multiple dataloaders --- .../loops/dataloader/evaluation_loop.py | 10 ++++++- .../loops/epoch/evaluation_epoch_loop.py | 15 +++++++++-- pytorch_lightning/loops/fit_loop.py | 3 ++- .../logger_connector/logger_connector.py | 27 ++----------------- 4 files changed, 26 insertions(+), 29 deletions(-) diff --git a/pytorch_lightning/loops/dataloader/evaluation_loop.py b/pytorch_lightning/loops/dataloader/evaluation_loop.py index 8f35b39d60fdd..19490f6bc5c41 100644 --- a/pytorch_lightning/loops/dataloader/evaluation_loop.py +++ b/pytorch_lightning/loops/dataloader/evaluation_loop.py @@ -63,7 +63,7 @@ def __init__(self, verbose: bool = True) -> None: @property def num_dataloaders(self) -> int: """Returns the total number of dataloaders.""" - # case where user does: + # case where user does:_get_max_batches # return dl1, dl2 dataloaders = self.dataloaders if dataloaders is None: @@ -153,6 +153,7 @@ def advance(self, *args: Any, **kwargs: Any) -> None: if self.num_dataloaders > 1: kwargs["dataloader_idx"] = dataloader_idx dl_outputs = self.epoch_loop.run(self._data_fetcher, dl_max_batches, kwargs) + self.epoch_loop._increment_batch_idx_tracker(dataloader_idx, dl_max_batches) # store batch level output per dataloader self._outputs.append(dl_outputs) @@ -221,10 +222,17 @@ def _get_max_batches(self) -> List[int]: def _reload_evaluation_dataloaders(self) -> None: """Reloads dataloaders if necessary.""" + dataloaders = None + if self.trainer.testing: self.trainer.reset_test_dataloader() + dataloaders = self.trainer.test_dataloaders elif self.trainer.val_dataloaders is None or self.trainer._data_connector._should_reload_val_dl: self.trainer.reset_val_dataloader() + dataloaders = self.trainer.val_dataloaders + + if dataloaders: + self.epoch_loop._initialize_batch_idx_tracker(len(dataloaders)) def _on_evaluation_start(self, *args: Any, **kwargs: Any) -> None: """Runs ``on_{validation/test}_start`` hooks.""" diff --git a/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py b/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py index 10388615fcc94..df8fdbcd73b2d 100644 --- a/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py +++ b/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py @@ -14,7 +14,7 @@ from collections import OrderedDict from functools import lru_cache -from typing import Any, cast, Dict, Optional +from typing import Any, cast, Dict, List, Optional from deprecate import void from torch.utils.data import DataLoader @@ -49,6 +49,7 @@ def __init__(self) -> None: self._dl_max_batches = 0 self._data_fetcher: Optional[AbstractDataFetcher] = None self._dataloader_state_dict: Dict[str, Any] = {} + self._seen_batches: List[int] = [] @property def done(self) -> bool: @@ -135,7 +136,10 @@ def advance( # type: ignore[override] self.batch_progress.increment_completed() # log batch metrics - self.trainer.logger_connector.update_eval_step_metrics() + if not self.trainer.sanity_checking: + dl_idx = kwargs.get("dataloader_idx", 0) + dl_eval_step = self._seen_batches[dl_idx] + self.batch_progress.current.completed - 1 + self.trainer.logger_connector.update_eval_step_metrics(dl_eval_step) # track epoch level outputs if self._should_track_batch_outputs_for_epoch_end() and output is not None: @@ -287,3 +291,10 @@ def _should_track_batch_outputs_for_epoch_end(self) -> bool: if self.trainer.testing: return is_overridden("test_epoch_end", model) return is_overridden("validation_epoch_end", model) + + def _initialize_batch_idx_tracker(self, num_dataloaders): + self._seen_batches = [0] * num_dataloaders + + def _increment_batch_idx_tracker(self, dataloader_idx, max_batches): + if not self.trainer.sanity_checking: + self._seen_batches[dataloader_idx] = max_batches diff --git a/pytorch_lightning/loops/fit_loop.py b/pytorch_lightning/loops/fit_loop.py index 0c9c68b24f4a0..8b95616bc9337 100644 --- a/pytorch_lightning/loops/fit_loop.py +++ b/pytorch_lightning/loops/fit_loop.py @@ -218,7 +218,8 @@ def reset(self) -> None: def on_run_start(self) -> None: # type: ignore[override] """Calls the ``on_train_start`` hook.""" # reset train dataloader and val dataloader - self.trainer.reset_train_val_dataloaders(self.trainer.lightning_module) + self.trainer.reset_train_dataloader(self.trainer.lightning_module) + self.epoch_loop.val_loop._reload_evaluation_dataloaders() data_fetcher_cls = _select_data_fetcher(self.trainer) self._data_fetcher = data_fetcher_cls(prefetch_batches=self.prefetch_batches) diff --git a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index 428713ff3347e..23a0448427f87 100644 --- a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -20,7 +20,6 @@ from pytorch_lightning.loggers import LightningLoggerBase, TensorBoardLogger from pytorch_lightning.plugins.environments.slurm_environment import SLURMEnvironment from pytorch_lightning.trainer.connectors.logger_connector.result import _METRICS, _OUT_DICT, _PBAR_DICT -from pytorch_lightning.trainer.states import RunningStage from pytorch_lightning.utilities import memory from pytorch_lightning.utilities.apply_func import apply_to_collection, move_data_to_device from pytorch_lightning.utilities.metrics import metrics_to_scalars @@ -37,8 +36,6 @@ def __init__(self, trainer: "pl.Trainer", log_gpu_memory: Optional[str] = None) "Please monitor GPU stats with the `DeviceStatsMonitor` callback directly instead." ) self.log_gpu_memory = log_gpu_memory - self._val_log_step: int = 0 - self._test_log_step: int = 0 self._progress_bar_metrics: _PBAR_DICT = {} self._logged_metrics: _OUT_DICT = {} self._callback_metrics: _OUT_DICT = {} @@ -137,35 +134,15 @@ def log_metrics(self, metrics: _OUT_DICT, step: Optional[int] = None) -> None: Evaluation metric updates """ - @property - def _eval_log_step(self) -> Optional[int]: - if self.trainer.state.stage is RunningStage.VALIDATING: - return self._val_log_step - if self.trainer.state.stage is RunningStage.TESTING: - return self._test_log_step - return None - - def _increment_eval_log_step(self) -> None: - if self.trainer.state.stage is RunningStage.VALIDATING: - self._val_log_step += 1 - elif self.trainer.state.stage is RunningStage.TESTING: - self._test_log_step += 1 - def _evaluation_epoch_end(self) -> None: results = self.trainer._results assert results is not None results.dataloader_idx = None - def update_eval_step_metrics(self) -> None: + def update_eval_step_metrics(self, step) -> None: assert not self._epoch_end_reached - if self.trainer.sanity_checking: - return - # logs user requested information to logger - self.log_metrics(self.metrics["log"], step=self._eval_log_step) - - # increment the step even if nothing was logged - self._increment_eval_log_step() + self.log_metrics(self.metrics["log"], step=step) def update_eval_epoch_metrics(self) -> _OUT_DICT: assert self._epoch_end_reached From 10dabf1718d0759a36b68b28b2e8e952a964ee09 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Wed, 2 Mar 2022 21:04:34 +0530 Subject: [PATCH 02/13] add test --- .../logging_/test_eval_loop_logging.py | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/tests/trainer/logging_/test_eval_loop_logging.py b/tests/trainer/logging_/test_eval_loop_logging.py index 5d7e64e373ee9..e8f23a0cd8589 100644 --- a/tests/trainer/logging_/test_eval_loop_logging.py +++ b/tests/trainer/logging_/test_eval_loop_logging.py @@ -25,6 +25,7 @@ from pytorch_lightning import callbacks, Trainer from pytorch_lightning.loggers import TensorBoardLogger +from pytorch_lightning.loggers.csv_logs import CSVLogger from pytorch_lightning.loops.dataloader import EvaluationLoop from pytorch_lightning.trainer.states import RunningStage from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -924,3 +925,57 @@ def test_rich_print_results(inputs, expected): EvaluationLoop._print_results(*inputs, file=out) expected = expected[1:] # remove the initial line break from the """ string assert out.getvalue() == expected.lstrip() + + +@pytest.mark.parametrize("num_dataloaders", [1, 2]) +def test_eval_step_logging(tmpdir, num_dataloaders): + class CustomLogger(CSVLogger): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.logged_metrics = collections.defaultdict(lambda: collections.defaultdict(list)) + + def log_metrics(self, metrics, step): + for key in metrics: + self.logged_metrics[key]["logged_values"].append(metrics[key]) + self.logged_metrics[key]["logged_steps"].append(step) + + class CustomBoringModel(BoringModel): + def validation_step(self, batch, batch_idx, dataloader_idx=None): + self.log(f"val_log_{self.trainer.state.fn}", batch_idx, on_step=True, on_epoch=False) + + def test_step(self, batch, batch_idx, dataloader_idx=None): + self.log("test_log", batch_idx, on_step=True, on_epoch=False) + + def val_dataloader(self): + return [super().val_dataloader()] * num_dataloaders + + def test_dataloader(self): + return [super().test_dataloader()] * num_dataloaders + + logger = CustomLogger(tmpdir) + limit_batches = 4 + max_epochs = 2 + trainer = Trainer( + default_root_dir=tmpdir, + max_epochs=max_epochs, + limit_train_batches=1, + limit_val_batches=limit_batches, + limit_test_batches=limit_batches, + logger=logger, + ) + model = CustomBoringModel() + model.validation_epoch_end = None + model.test_epoch_end = None + + trainer.fit(model) + trainer.validate(model) + trainer.test(model) + + for dl_idx in range(num_dataloaders): + suffix = f"/dataloader_idx_{dl_idx}" if num_dataloaders == 2 else "" + assert logger.logged_metrics[f"val_log_fit{suffix}"]["logged_values"] == list(range(limit_batches)) * 2 + assert logger.logged_metrics[f"val_log_fit{suffix}"]["logged_steps"] == list(range(limit_batches * 2)) + assert logger.logged_metrics[f"val_log_validate{suffix}"]["logged_values"] == list(range(limit_batches)) + assert logger.logged_metrics[f"val_log_validate{suffix}"]["logged_values"] == list(range(limit_batches)) + assert logger.logged_metrics[f"test_log{suffix}"]["logged_steps"] == list(range(limit_batches)) + assert logger.logged_metrics[f"test_log{suffix}"]["logged_steps"] == list(range(limit_batches)) From 6b99ee7f20d3e8b2c5de5a3a75b2c46c2dd31be7 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Wed, 2 Mar 2022 21:12:31 +0530 Subject: [PATCH 03/13] chlog --- CHANGELOG.md | 3 +++ pytorch_lightning/loops/dataloader/evaluation_loop.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bba4b8ff892bd..ceb12355def86 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -723,6 +723,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed to avoid common hook warning if no hook is overridden ([#12131](https://github.com/PyTorchLightning/pytorch-lightning/pull/12131)) +- Fixed logging on step level for eval mode ([#12184](https://github.com/PyTorchLightning/pytorch-lightning/pull/12184)) + + ## [1.5.10] - 2022-02-08 ### Fixed diff --git a/pytorch_lightning/loops/dataloader/evaluation_loop.py b/pytorch_lightning/loops/dataloader/evaluation_loop.py index 19490f6bc5c41..5b8c7f9b893fc 100644 --- a/pytorch_lightning/loops/dataloader/evaluation_loop.py +++ b/pytorch_lightning/loops/dataloader/evaluation_loop.py @@ -63,7 +63,7 @@ def __init__(self, verbose: bool = True) -> None: @property def num_dataloaders(self) -> int: """Returns the total number of dataloaders.""" - # case where user does:_get_max_batches + # case where user does: # return dl1, dl2 dataloaders = self.dataloaders if dataloaders is None: From 6bb76be3bde557dabc096b5ffac181e779c3056d Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Wed, 2 Mar 2022 21:32:37 +0530 Subject: [PATCH 04/13] fix for inf dataloaders --- pytorch_lightning/loops/dataloader/evaluation_loop.py | 2 +- pytorch_lightning/loops/epoch/evaluation_epoch_loop.py | 6 +++--- .../trainer/connectors/logger_connector/logger_connector.py | 2 +- tests/trainer/logging_/test_eval_loop_logging.py | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pytorch_lightning/loops/dataloader/evaluation_loop.py b/pytorch_lightning/loops/dataloader/evaluation_loop.py index 5b8c7f9b893fc..fc736edea6fd3 100644 --- a/pytorch_lightning/loops/dataloader/evaluation_loop.py +++ b/pytorch_lightning/loops/dataloader/evaluation_loop.py @@ -153,7 +153,7 @@ def advance(self, *args: Any, **kwargs: Any) -> None: if self.num_dataloaders > 1: kwargs["dataloader_idx"] = dataloader_idx dl_outputs = self.epoch_loop.run(self._data_fetcher, dl_max_batches, kwargs) - self.epoch_loop._increment_batch_idx_tracker(dataloader_idx, dl_max_batches) + self.epoch_loop._increment_batch_idx_tracker(dataloader_idx) # store batch level output per dataloader self._outputs.append(dl_outputs) diff --git a/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py b/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py index df8fdbcd73b2d..67e7031cd646d 100644 --- a/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py +++ b/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py @@ -292,9 +292,9 @@ def _should_track_batch_outputs_for_epoch_end(self) -> bool: return is_overridden("test_epoch_end", model) return is_overridden("validation_epoch_end", model) - def _initialize_batch_idx_tracker(self, num_dataloaders): + def _initialize_batch_idx_tracker(self, num_dataloaders: int) -> None: self._seen_batches = [0] * num_dataloaders - def _increment_batch_idx_tracker(self, dataloader_idx, max_batches): + def _increment_batch_idx_tracker(self, dataloader_idx: int) -> None: if not self.trainer.sanity_checking: - self._seen_batches[dataloader_idx] = max_batches + self._seen_batches[dataloader_idx] += self.batch_progress.current.completed diff --git a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index 23a0448427f87..aa6e560293f42 100644 --- a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -139,7 +139,7 @@ def _evaluation_epoch_end(self) -> None: assert results is not None results.dataloader_idx = None - def update_eval_step_metrics(self, step) -> None: + def update_eval_step_metrics(self, step: int) -> None: assert not self._epoch_end_reached # logs user requested information to logger self.log_metrics(self.metrics["log"], step=step) diff --git a/tests/trainer/logging_/test_eval_loop_logging.py b/tests/trainer/logging_/test_eval_loop_logging.py index e8f23a0cd8589..d4d1170cc089c 100644 --- a/tests/trainer/logging_/test_eval_loop_logging.py +++ b/tests/trainer/logging_/test_eval_loop_logging.py @@ -954,7 +954,7 @@ def test_dataloader(self): logger = CustomLogger(tmpdir) limit_batches = 4 - max_epochs = 2 + max_epochs = 3 trainer = Trainer( default_root_dir=tmpdir, max_epochs=max_epochs, @@ -973,8 +973,8 @@ def test_dataloader(self): for dl_idx in range(num_dataloaders): suffix = f"/dataloader_idx_{dl_idx}" if num_dataloaders == 2 else "" - assert logger.logged_metrics[f"val_log_fit{suffix}"]["logged_values"] == list(range(limit_batches)) * 2 - assert logger.logged_metrics[f"val_log_fit{suffix}"]["logged_steps"] == list(range(limit_batches * 2)) + assert logger.logged_metrics[f"val_log_fit{suffix}"]["logged_values"] == list(range(limit_batches)) * max_epochs + assert logger.logged_metrics[f"val_log_fit{suffix}"]["logged_steps"] == list(range(limit_batches * max_epochs)) assert logger.logged_metrics[f"val_log_validate{suffix}"]["logged_values"] == list(range(limit_batches)) assert logger.logged_metrics[f"val_log_validate{suffix}"]["logged_values"] == list(range(limit_batches)) assert logger.logged_metrics[f"test_log{suffix}"]["logged_steps"] == list(range(limit_batches)) From 73b4f2806e8a9261259a7e522bc6d89aa4cfd3c8 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Wed, 2 Mar 2022 21:36:20 +0530 Subject: [PATCH 05/13] update test --- tests/trainer/test_dataloaders.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/trainer/test_dataloaders.py b/tests/trainer/test_dataloaders.py index 08d54e05bfffe..aef318f173465 100644 --- a/tests/trainer/test_dataloaders.py +++ b/tests/trainer/test_dataloaders.py @@ -1238,7 +1238,7 @@ def test_dataloaders_load_only_once_passed_loaders(tmpdir): assert tracker.mock_calls == [ call.reset_val_dataloader(), - call.reset_train_dataloader(model=model), + call.reset_train_dataloader(model), call.reset_test_dataloader(), ] From f908104da85b55b1cd52b50c4a3e37b78feaa423 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Thu, 24 Mar 2022 17:47:31 +0530 Subject: [PATCH 06/13] update test --- tests/trainer/logging_/test_eval_loop_logging.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/trainer/logging_/test_eval_loop_logging.py b/tests/trainer/logging_/test_eval_loop_logging.py index d4d1170cc089c..90f151683242f 100644 --- a/tests/trainer/logging_/test_eval_loop_logging.py +++ b/tests/trainer/logging_/test_eval_loop_logging.py @@ -976,6 +976,6 @@ def test_dataloader(self): assert logger.logged_metrics[f"val_log_fit{suffix}"]["logged_values"] == list(range(limit_batches)) * max_epochs assert logger.logged_metrics[f"val_log_fit{suffix}"]["logged_steps"] == list(range(limit_batches * max_epochs)) assert logger.logged_metrics[f"val_log_validate{suffix}"]["logged_values"] == list(range(limit_batches)) - assert logger.logged_metrics[f"val_log_validate{suffix}"]["logged_values"] == list(range(limit_batches)) - assert logger.logged_metrics[f"test_log{suffix}"]["logged_steps"] == list(range(limit_batches)) + assert logger.logged_metrics[f"val_log_validate{suffix}"]["logged_steps"] == list(range(limit_batches)) + assert logger.logged_metrics[f"test_log{suffix}"]["logged_values"] == list(range(limit_batches)) assert logger.logged_metrics[f"test_log{suffix}"]["logged_steps"] == list(range(limit_batches)) From 9798804160218a069c9ad6695d6852f914ec8e97 Mon Sep 17 00:00:00 2001 From: Rohit Gupta Date: Mon, 28 Mar 2022 20:20:43 +0530 Subject: [PATCH 07/13] Apply suggestions from code review Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> --- pytorch_lightning/loops/dataloader/evaluation_loop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/loops/dataloader/evaluation_loop.py b/pytorch_lightning/loops/dataloader/evaluation_loop.py index 7ab23c34d0d56..eb1c74fc08064 100644 --- a/pytorch_lightning/loops/dataloader/evaluation_loop.py +++ b/pytorch_lightning/loops/dataloader/evaluation_loop.py @@ -229,7 +229,7 @@ def _reload_evaluation_dataloaders(self) -> None: self.trainer.reset_val_dataloader() dataloaders = self.trainer.val_dataloaders - if dataloaders: + if dataloaders is not None: self.epoch_loop._initialize_batch_idx_tracker(len(dataloaders)) def _on_evaluation_start(self, *args: Any, **kwargs: Any) -> None: From dcd8ab87aada495f412c1eba895d07bd047e9dc9 Mon Sep 17 00:00:00 2001 From: rohitgr7 Date: Tue, 29 Mar 2022 16:15:07 +0530 Subject: [PATCH 08/13] mock logger --- .../logging_/test_eval_loop_logging.py | 45 ++++++++++--------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/tests/trainer/logging_/test_eval_loop_logging.py b/tests/trainer/logging_/test_eval_loop_logging.py index 8474ba380097a..4007a27b0d120 100644 --- a/tests/trainer/logging_/test_eval_loop_logging.py +++ b/tests/trainer/logging_/test_eval_loop_logging.py @@ -25,7 +25,6 @@ from pytorch_lightning import callbacks, Trainer from pytorch_lightning.loggers import TensorBoardLogger -from pytorch_lightning.loggers.csv_logs import CSVLogger from pytorch_lightning.loops.dataloader import EvaluationLoop from pytorch_lightning.trainer.states import RunningStage from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -940,17 +939,10 @@ def test_rich_print_results(inputs, expected): assert out.getvalue() == expected.lstrip() +@mock.patch("pytorch_lightning.loggers.TensorBoardLogger.log_metrics") @pytest.mark.parametrize("num_dataloaders", [1, 2]) -def test_eval_step_logging(tmpdir, num_dataloaders): - class CustomLogger(CSVLogger): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.logged_metrics = collections.defaultdict(lambda: collections.defaultdict(list)) - - def log_metrics(self, metrics, step): - for key in metrics: - self.logged_metrics[key]["logged_values"].append(metrics[key]) - self.logged_metrics[key]["logged_steps"].append(step) +def test_eval_step_logging(mock_log_metrics, tmpdir, num_dataloaders): + """Test that eval step during fit/validate/test is updated correctly.""" class CustomBoringModel(BoringModel): def validation_step(self, batch, batch_idx, dataloader_idx=None): @@ -965,7 +957,6 @@ def val_dataloader(self): def test_dataloader(self): return [super().test_dataloader()] * num_dataloaders - logger = CustomLogger(tmpdir) limit_batches = 4 max_epochs = 3 trainer = Trainer( @@ -974,7 +965,6 @@ def test_dataloader(self): limit_train_batches=1, limit_val_batches=limit_batches, limit_test_batches=limit_batches, - logger=logger, ) model = CustomBoringModel() model.validation_epoch_end = None @@ -984,11 +974,24 @@ def test_dataloader(self): trainer.validate(model) trainer.test(model) - for dl_idx in range(num_dataloaders): - suffix = f"/dataloader_idx_{dl_idx}" if num_dataloaders == 2 else "" - assert logger.logged_metrics[f"val_log_fit{suffix}"]["logged_values"] == list(range(limit_batches)) * max_epochs - assert logger.logged_metrics[f"val_log_fit{suffix}"]["logged_steps"] == list(range(limit_batches * max_epochs)) - assert logger.logged_metrics[f"val_log_validate{suffix}"]["logged_values"] == list(range(limit_batches)) - assert logger.logged_metrics[f"val_log_validate{suffix}"]["logged_steps"] == list(range(limit_batches)) - assert logger.logged_metrics[f"test_log{suffix}"]["logged_values"] == list(range(limit_batches)) - assert logger.logged_metrics[f"test_log{suffix}"]["logged_steps"] == list(range(limit_batches)) + def get_suffix(dl_idx): + return f"/dataloader_idx_{dl_idx}" if num_dataloaders == 2 else "" + + eval_steps = range(limit_batches) + fit_calls = [ + call(metrics={f"val_log_fit{get_suffix(dl_idx)}": float(step)}, step=step + (limit_batches * epoch)) + for epoch in range(max_epochs) + for dl_idx in range(num_dataloaders) + for step in eval_steps + ] + validate_calls = [ + call(metrics={f"val_log_validate{get_suffix(dl_idx)}": float(val)}, step=val) + for dl_idx in range(num_dataloaders) + for val in eval_steps + ] + test_calls = [ + call(metrics={f"test_log{get_suffix(dl_idx)}": float(val)}, step=val) + for dl_idx in range(num_dataloaders) + for val in eval_steps + ] + assert mock_log_metrics.mock_calls == fit_calls + validate_calls + test_calls From 46604d9c1680c82e905c8bfe606cbd87a44d36d2 Mon Sep 17 00:00:00 2001 From: Rohit Gupta Date: Tue, 26 Apr 2022 15:58:50 +0530 Subject: [PATCH 09/13] bad merge --- .../trainer/connectors/logger_connector/logger_connector.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index a9d3e9d03e620..4aaa071da15da 100644 --- a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -19,7 +19,6 @@ from pytorch_lightning.loggers import Logger, TensorBoardLogger from pytorch_lightning.plugins.environments.slurm_environment import SLURMEnvironment from pytorch_lightning.trainer.connectors.logger_connector.result import _METRICS, _OUT_DICT, _PBAR_DICT -from pytorch_lightning.utilities import memory from pytorch_lightning.utilities.apply_func import apply_to_collection, move_data_to_device from pytorch_lightning.utilities.metrics import metrics_to_scalars from pytorch_lightning.utilities.model_helpers import is_overridden From 5a09dc704c9039c234b4f2267b349e783dd45575 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Wed, 25 May 2022 19:41:53 +0200 Subject: [PATCH 10/13] Deprecate unused method --- CHANGELOG.md | 2 +- pytorch_lightning/trainer/trainer.py | 8 ++++++++ tests/deprecated_api/test_remove_1-9.py | 9 ++++++++- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c917a29211664..1d61412327e54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -116,7 +116,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Deprecated `pytorch_lightning.core.lightning.LightningModule` in favor of `pytorch_lightning.core.module.LightningModule` ([#12740](https://github.com/PyTorchLightning/pytorch-lightning/pull/12740)) -- +- Deprecated `Trainer.reset_train_val_dataloaderrs()` in favor of `Trainer.reset_{train,val}_dataloader` ([#12184](https://github.com/PyTorchLightning/pytorch-lightning/pull/12184)) ### Removed diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 3b298911ed209..a7592ffaac1af 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1950,7 +1950,15 @@ def reset_train_val_dataloaders(self, model: Optional["pl.LightningModule"] = No Args: model: The ``LightningModule`` if called outside of the trainer scope. + + .. deprecated:: v1.7 + This method is deprecated in v1.7 and will be removed in v1.9. + Please use ``Trainer.reset_{train,val}_dataloader`` instead. """ + rank_zero_deprecation( + "`Trainer.reset_train_val_dataloaders` has been deprecated in v1.7 and will be removed in v1.9." + " Use `Trainer.reset_{train,val}_dataloader` instead" + ) if self.train_dataloader is None: self.reset_train_dataloader(model=model) if self.val_dataloaders is None: diff --git a/tests/deprecated_api/test_remove_1-9.py b/tests/deprecated_api/test_remove_1-9.py index 5d7b0b6260e3b..0ad4d7a7410fd 100644 --- a/tests/deprecated_api/test_remove_1-9.py +++ b/tests/deprecated_api/test_remove_1-9.py @@ -17,6 +17,7 @@ import pytest import pytorch_lightning.loggers.base as logger_base +from pytorch_lightning import Trainer from pytorch_lightning.core.module import LightningModule from pytorch_lightning.utilities.cli import LightningCLI from pytorch_lightning.utilities.rank_zero import rank_zero_only @@ -106,6 +107,12 @@ def test_old_callback_path(): from pytorch_lightning.callbacks.base import Callback with pytest.deprecated_call( - match="pytorch_lightning.callbacks.base.Callback has been deprecated in v1.7" " and will be removed in v1.9." + match="pytorch_lightning.callbacks.base.Callback has been deprecated in v1.7 and will be removed in v1.9." ): Callback() + + +def test_deprecated_dataloader_reset(): + trainer = Trainer() + with pytest.deprecated_call(match="reset_train_val_dataloaders` has been deprecated in v1.7"): + trainer.reset_train_val_dataloaders() From f1f1aeb7673191acc460f5ad7f289c7d79f6e452 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Wed, 25 May 2022 19:48:49 +0200 Subject: [PATCH 11/13] Minor changes --- pytorch_lightning/loops/fit_loop.py | 2 +- tests/trainer/logging_/test_eval_loop_logging.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pytorch_lightning/loops/fit_loop.py b/pytorch_lightning/loops/fit_loop.py index d5e86cccf0994..ab14a7aec23cf 100644 --- a/pytorch_lightning/loops/fit_loop.py +++ b/pytorch_lightning/loops/fit_loop.py @@ -204,8 +204,8 @@ def on_run_start(self) -> None: # type: ignore[override] if not self._iteration_based_training(): self.epoch_progress.current.completed = self.epoch_progress.current.processed - # reset train dataloader and val dataloader self.trainer.reset_train_dataloader(self.trainer.lightning_module) + # reload the evaluation dataloaders too for proper display in the progress bar self.epoch_loop.val_loop._reload_evaluation_dataloaders() data_fetcher_cls = _select_data_fetcher(self.trainer) diff --git a/tests/trainer/logging_/test_eval_loop_logging.py b/tests/trainer/logging_/test_eval_loop_logging.py index 66fd754f00676..9f94b38b5c0f1 100644 --- a/tests/trainer/logging_/test_eval_loop_logging.py +++ b/tests/trainer/logging_/test_eval_loop_logging.py @@ -976,7 +976,7 @@ def test_rich_print_results(inputs, expected): @mock.patch("pytorch_lightning.loggers.TensorBoardLogger.log_metrics") -@pytest.mark.parametrize("num_dataloaders", [1, 2]) +@pytest.mark.parametrize("num_dataloaders", (1, 2)) def test_eval_step_logging(mock_log_metrics, tmpdir, num_dataloaders): """Test that eval step during fit/validate/test is updated correctly.""" @@ -993,6 +993,9 @@ def val_dataloader(self): def test_dataloader(self): return [super().test_dataloader()] * num_dataloaders + validation_epoch_end = None + test_epoch_end = None + limit_batches = 4 max_epochs = 3 trainer = Trainer( @@ -1003,8 +1006,6 @@ def test_dataloader(self): limit_test_batches=limit_batches, ) model = CustomBoringModel() - model.validation_epoch_end = None - model.test_epoch_end = None trainer.fit(model) trainer.validate(model) From 4eb0badbe057f57dfcb98a93b45e9b1eabcb0d69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Wed, 25 May 2022 19:59:22 +0200 Subject: [PATCH 12/13] Implement my suggestion --- .../loops/dataloader/evaluation_loop.py | 1 - .../loops/epoch/evaluation_epoch_loop.py | 14 +++++--------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/pytorch_lightning/loops/dataloader/evaluation_loop.py b/pytorch_lightning/loops/dataloader/evaluation_loop.py index acc40abff3a46..3c10f979dc912 100644 --- a/pytorch_lightning/loops/dataloader/evaluation_loop.py +++ b/pytorch_lightning/loops/dataloader/evaluation_loop.py @@ -152,7 +152,6 @@ def advance(self, *args: Any, **kwargs: Any) -> None: if self.num_dataloaders > 1: kwargs["dataloader_idx"] = dataloader_idx dl_outputs = self.epoch_loop.run(self._data_fetcher, dl_max_batches, kwargs) - self.epoch_loop._increment_batch_idx_tracker(dataloader_idx) # store batch level output per dataloader self._outputs.append(dl_outputs) diff --git a/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py b/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py index 99f871cbf8793..d93faf283cbc1 100644 --- a/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py +++ b/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py @@ -14,7 +14,7 @@ from collections import OrderedDict from functools import lru_cache -from typing import Any, Dict, List, Optional +from typing import Any, Dict, Optional from deprecate import void from torch.utils.data import DataLoader @@ -49,7 +49,7 @@ def __init__(self) -> None: self._dl_max_batches = 0 self._data_fetcher: Optional[AbstractDataFetcher] = None self._dataloader_state_dict: Dict[str, Any] = {} - self._seen_batches: List[int] = [] + self._seen_batches = [0] @property def done(self) -> bool: @@ -152,9 +152,9 @@ def advance( # type: ignore[override] # log batch metrics if not self.trainer.sanity_checking: - dl_idx = kwargs.get("dataloader_idx", 0) - dl_eval_step = self._seen_batches[dl_idx] + self.batch_progress.current.completed - 1 - self.trainer._logger_connector.update_eval_step_metrics(dl_eval_step) + dataloader_idx = kwargs.get("dataloader_idx", 0) + self.trainer._logger_connector.update_eval_step_metrics(self._seen_batches[dataloader_idx]) + self._seen_batches[dataloader_idx] += 1 # track epoch level outputs if self._should_track_batch_outputs_for_epoch_end() and output is not None: @@ -308,7 +308,3 @@ def _should_track_batch_outputs_for_epoch_end(self) -> bool: def _initialize_batch_idx_tracker(self, num_dataloaders: int) -> None: self._seen_batches = [0] * num_dataloaders - - def _increment_batch_idx_tracker(self, dataloader_idx: int) -> None: - if not self.trainer.sanity_checking: - self._seen_batches[dataloader_idx] += self.batch_progress.current.completed From b592a9347b60a6648d4362a54e5d3bba7e71bead Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Wed, 25 May 2022 20:02:22 +0200 Subject: [PATCH 13/13] Rename and fix CHANGELOG --- CHANGELOG.md | 6 +++--- pytorch_lightning/loops/dataloader/evaluation_loop.py | 4 +--- pytorch_lightning/loops/epoch/evaluation_epoch_loop.py | 10 +++++----- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d61412327e54..bfcf2b252f869 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -235,6 +235,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed issue where the CLI could not pass a `Profiler` to the `Trainer` ([#13084](https://github.com/PyTorchLightning/pytorch-lightning/pull/13084)) +- Fixed logging on step level for eval mode ([#12184](https://github.com/PyTorchLightning/pytorch-lightning/pull/12184)) + + - @@ -268,9 +271,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed support for `ModelCheckpoint` monitors with dots ([#12783](https://github.com/PyTorchLightning/pytorch-lightning/pull/12783)) -- Fixed logging on step level for eval mode ([#12184](https://github.com/PyTorchLightning/pytorch-lightning/pull/12184)) - - ## [1.6.1] - 2022-04-13 ### Changed diff --git a/pytorch_lightning/loops/dataloader/evaluation_loop.py b/pytorch_lightning/loops/dataloader/evaluation_loop.py index 3c10f979dc912..85f8ce8a9c47b 100644 --- a/pytorch_lightning/loops/dataloader/evaluation_loop.py +++ b/pytorch_lightning/loops/dataloader/evaluation_loop.py @@ -235,16 +235,14 @@ def _get_max_batches(self) -> List[int]: def _reload_evaluation_dataloaders(self) -> None: """Reloads dataloaders if necessary.""" dataloaders = None - if self.trainer.testing: self.trainer.reset_test_dataloader() dataloaders = self.trainer.test_dataloaders elif self.trainer.val_dataloaders is None or self.trainer._data_connector._should_reload_val_dl: self.trainer.reset_val_dataloader() dataloaders = self.trainer.val_dataloaders - if dataloaders is not None: - self.epoch_loop._initialize_batch_idx_tracker(len(dataloaders)) + self.epoch_loop._reset_dl_batch_idx(len(dataloaders)) def _on_evaluation_start(self, *args: Any, **kwargs: Any) -> None: """Runs ``on_{validation/test}_start`` hooks.""" diff --git a/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py b/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py index d93faf283cbc1..9317546e0c0ee 100644 --- a/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py +++ b/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py @@ -49,7 +49,7 @@ def __init__(self) -> None: self._dl_max_batches = 0 self._data_fetcher: Optional[AbstractDataFetcher] = None self._dataloader_state_dict: Dict[str, Any] = {} - self._seen_batches = [0] + self._dl_batch_idx = [0] @property def done(self) -> bool: @@ -153,8 +153,8 @@ def advance( # type: ignore[override] # log batch metrics if not self.trainer.sanity_checking: dataloader_idx = kwargs.get("dataloader_idx", 0) - self.trainer._logger_connector.update_eval_step_metrics(self._seen_batches[dataloader_idx]) - self._seen_batches[dataloader_idx] += 1 + self.trainer._logger_connector.update_eval_step_metrics(self._dl_batch_idx[dataloader_idx]) + self._dl_batch_idx[dataloader_idx] += 1 # track epoch level outputs if self._should_track_batch_outputs_for_epoch_end() and output is not None: @@ -306,5 +306,5 @@ def _should_track_batch_outputs_for_epoch_end(self) -> bool: return is_overridden("test_epoch_end", model) return is_overridden("validation_epoch_end", model) - def _initialize_batch_idx_tracker(self, num_dataloaders: int) -> None: - self._seen_batches = [0] * num_dataloaders + def _reset_dl_batch_idx(self, num_dataloaders: int) -> None: + self._dl_batch_idx = [0] * num_dataloaders