From 0e0aff0bbfeb3e67175ccdab391d539284831ae2 Mon Sep 17 00:00:00 2001 From: Parth Sarthi <39787228+parthsarthi03@users.noreply.github.com> Date: Wed, 12 Jun 2024 13:39:17 -0700 Subject: [PATCH] Fix WandBLogger to allow resuming runs with updated config values (#1081) --- torchtune/utils/metric_logging.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/torchtune/utils/metric_logging.py b/torchtune/utils/metric_logging.py index d67f9c8bd9..59c9f83d34 100644 --- a/torchtune/utils/metric_logging.py +++ b/torchtune/utils/metric_logging.py @@ -203,6 +203,8 @@ def __init__( self._wandb.define_metric("global_step") self._wandb.define_metric("*", step_metric="global_step", step_sync=True) + self.config_allow_val_change = kwargs.get("allow_val_change", False) + def log_config(self, config: DictConfig) -> None: """Saves the config locally and also logs the config to W&B. The config is stored in the same directory as the checkpoint. You can @@ -214,7 +216,9 @@ def log_config(self, config: DictConfig) -> None: """ if self._wandb.run: resolved = OmegaConf.to_container(config, resolve=True) - self._wandb.config.update(resolved) + self._wandb.config.update( + resolved, allow_val_change=self.config_allow_val_change + ) try: output_config_fname = Path( os.path.join(