Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Fix] Fix the bug that the training log and evaluating log are mixed #1252

Merged
merged 10 commits into from
Aug 11, 2021
26 changes: 18 additions & 8 deletions mmcv/runner/hooks/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from mmcv.utils import is_seq_of
from .hook import Hook
from .logger import LoggerHook


class EvalHook(Hook):
Expand Down Expand Up @@ -211,19 +212,31 @@ def before_train_epoch(self, runner):

def after_train_iter(self, runner):
"""Called after every training iter to evaluate the results."""
if not self.by_epoch:
if not self.by_epoch and self._should_evaluate(runner):
# Because the priority of EvalHook is higher than LoggerHook, the
# training log and the evaluating log are mixed. Therefore,
# we need to dump the training log and clear it before evaluating
# log is generated. In addition, this problem will only appear in
# `IterBasedRunner` whose `self.by_epoch` is False, because
# `EpochBasedRunner` whose `self.by_epoch` is True calls
# `_do_evaluate` in `after_train_epoch` stage, and at this stage
# the training log has been printed, so it will not cause any
# problem. more details at
# https://github.com/open-mmlab/mmsegmentation/issues/694
for hook in runner._hooks:
if isinstance(hook, LoggerHook):
hook.after_train_iter(runner)
runner.log_buffer.clear()
zhouzaida marked this conversation as resolved.
Show resolved Hide resolved

self._do_evaluate(runner)

def after_train_epoch(self, runner):
"""Called after every training epoch to evaluate the results."""
if self.by_epoch:
if self.by_epoch and self._should_evaluate(runner):
self._do_evaluate(runner)

def _do_evaluate(self, runner):
"""perform evaluation and save ckpt."""
if not self._should_evaluate(runner):
return

results = self.test_fn(runner.model, self.dataloader)
runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)
key_score = self.evaluate(runner, results)
Expand Down Expand Up @@ -418,9 +431,6 @@ def _do_evaluate(self, runner):
dist.broadcast(module.running_var, 0)
dist.broadcast(module.running_mean, 0)

if not self._should_evaluate(runner):
return

tmpdir = self.tmpdir
if tmpdir is None:
tmpdir = osp.join(runner.work_dir, '.eval_hook')
Expand Down