Skip to content
This repository has been archived by the owner on Jul 1, 2024. It is now read-only.

[classy_vision] Revamp logging #478

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 11 additions & 26 deletions classy_vision/hooks/loss_lr_meter_logging_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ class LossLrMeterLoggingHook(ClassyHook):
Logs the loss, optimizer LR, and meters. Logs at the end of a phase.
"""

on_start = ClassyHook._noop
on_phase_start = ClassyHook._noop
on_end = ClassyHook._noop

Expand All @@ -35,6 +34,9 @@ def __init__(self, log_freq: Optional[int] = None) -> None:
), "log_freq must be an int or None"
self.log_freq: Optional[int] = log_freq

def on_start(self, task) -> None:
logging.info(f"Starting training. Task: {task}")

def on_phase_end(self, task) -> None:
"""
Log the loss, optimizer LR, and meters for the phase.
Expand All @@ -45,10 +47,7 @@ def on_phase_end(self, task) -> None:
# do not explicitly state this since it is possible for a
# trainer to implement an unsynced end of phase meter or
# for meters to not provide a sync function.
logging.info("End of phase metric values:")
self._log_loss_meters(task)
if task.train:
self._log_lr(task)
self._log_loss_meters(task, prefix="Synced meters: ")

def on_step(self, task) -> None:
"""
Expand All @@ -58,18 +57,9 @@ def on_step(self, task) -> None:
return
batches = len(task.losses)
if batches and batches % self.log_freq == 0:
self._log_lr(task)
logging.info("Local unsynced metric values:")
self._log_loss_meters(task)

def _log_lr(self, task) -> None:
"""
Compute and log the optimizer LR.
"""
optimizer_lr = task.optimizer.parameters.lr
logging.info("Learning Rate: {}\n".format(optimizer_lr))
self._log_loss_meters(task, prefix="Approximate meter values: ")

def _log_loss_meters(self, task) -> None:
def _log_loss_meters(self, task, prefix="") -> None:
"""
Compute and log the loss and meters.
"""
Expand All @@ -80,14 +70,9 @@ def _log_loss_meters(self, task) -> None:

# Loss for the phase
loss = sum(task.losses) / (batches * task.get_batchsize_per_replica())
phase_pct = batches / task.num_batches_per_phase

log_strs = [
"Rank: {}, {} phase: {}, processed batches: {}".format(
get_rank(), phase_type, phase_type_idx, batches
),
"{} loss: {}".format(phase_type, loss),
"Meters:",
]
for meter in task.meters:
log_strs.append("{}".format(meter))
logging.info("\n".join(log_strs))
logging.info(
f"{prefix}{phase_type} phase {phase_type_idx} ({phase_pct*100:.2f}% done), "
f"loss: {loss:.4f}, meters: {task.meters}"
)
5 changes: 2 additions & 3 deletions classy_vision/hooks/model_complexity_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,9 @@ def on_start(self, task) -> None:
)
except NotImplementedError:
logging.warning(
"""Model contains unsupported modules:
Could not compute FLOPs for model forward pass. Exception:""",
exc_info=True,
"Model contains unsupported modules, could not compute FLOPs for model forward pass."
)
logging.debug("Exception:", exc_info=True)
try:
self.num_activations = compute_activations(
task.base_model,
Expand Down
3 changes: 0 additions & 3 deletions classy_vision/meters/accuracy_meter.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,6 @@ def set_classy_state(self, state):
self._curr_correct_predictions_k = state["curr_correct_predictions_k"].clone()
self._curr_sample_count = state["curr_sample_count"].clone()

def __repr__(self):
return repr({"name": self.name, "value": self.value})

def update(self, model_output, target, **kwargs):
"""
args:
Expand Down
4 changes: 4 additions & 0 deletions classy_vision/meters/classy_meter.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,7 @@ def set_classy_state(self, state: Dict[str, Any]) -> None:
This is used to load the state of the meter from a checkpoint.
"""
raise NotImplementedError

def __repr__(self):
values = ",".join([f"{key}={value:.6f}" for key, value in self.value.items()])
return f"{self.name}_meter({values})"
3 changes: 0 additions & 3 deletions classy_vision/meters/precision_meter.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,6 @@ def set_classy_state(self, state):
self._curr_correct_predictions_k = state["curr_correct_predictions_k"].clone()
self._curr_sample_count = state["curr_sample_count"].clone()

def __repr__(self):
return repr({"name": self.name, "value": self.value})

def update(self, model_output, target, **kwargs):
"""
args:
Expand Down
3 changes: 0 additions & 3 deletions classy_vision/meters/recall_meter.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,6 @@ def set_classy_state(self, state):
self._curr_correct_predictions_k = state["curr_correct_predictions_k"].clone()
self._curr_correct_targets = state["curr_correct_targets"].clone()

def __repr__(self):
return repr({"name": self.name, "value": self.value})

def update(self, model_output, target, **kwargs):
"""
args:
Expand Down
3 changes: 0 additions & 3 deletions classy_vision/meters/video_meter.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,6 @@ def set_classy_state(self, state):
self.reset()
self.meter.set_classy_state(state["meter_state"])

def __repr__(self):
return repr({"name": self.name, "value": self.value})

def update(self, model_output, target, is_train, **kwargs):
"""Updates any internal state of meter with new model output and target.

Expand Down
18 changes: 14 additions & 4 deletions classy_vision/tasks/classification_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import copy
import enum
import json
import logging
import math
import time
Expand Down Expand Up @@ -413,6 +414,10 @@ def from_config(cls, config: Dict[str, Any]) -> "ClassificationTask":
for phase_type in phase_types:
task.set_dataset(datasets[phase_type], phase_type)

# NOTE: this is a private member and only meant to be used for
# logging/debugging purposes. See __repr__ implementation
task._config = config

return task

@property
Expand Down Expand Up @@ -854,7 +859,7 @@ def advance_phase(self):
resets counters, shuffles dataset, rebuilds iterators, and
sets the train / test state for phase.
"""
logging.info("Advancing phase")
logging.debug("Advancing phase")
# Reset meters for next phase / epoch
for meter in self.meters:
meter.reset()
Expand Down Expand Up @@ -893,7 +898,7 @@ def _recreate_data_loader_from_dataset(self, phase_type=None):
if phase_type is None:
phase_type = self.phase_type

logging.info("Recreating data loader for new phase")
logging.debug("Recreating data loader for new phase")
num_workers = 0
if hasattr(self.dataloaders[phase_type], "num_workers"):
num_workers = self.dataloaders[phase_type].num_workers
Expand Down Expand Up @@ -979,10 +984,10 @@ def on_phase_start(self):
def on_phase_end(self):
self.log_phase_end("train")

logging.info("Syncing meters on phase end...")
logging.debug("Syncing meters on phase end...")
for meter in self.meters:
meter.sync_state()
logging.info("...meters synced")
logging.debug("...meters synced")
barrier()

for hook in self.hooks:
Expand Down Expand Up @@ -1016,3 +1021,8 @@ def log_phase_end(self, tag):
"im_per_sec": im_per_sec,
}
)

def __repr__(self):
if hasattr(self, "_config"):
return json.dumps(self._config, indent=4)
return super().__repr__()