Skip to content

Commit

Permalink
runs on one gpu but logging (and possibly training) seems to be buggy…
Browse files Browse the repository at this point in the history
… with more gpus
  • Loading branch information
jmercat committed Jul 9, 2024
1 parent 6a2eda5 commit d363fde
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 3 deletions.
2 changes: 1 addition & 1 deletion risk_biased/config/learning_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@
batch_size = 512
num_epochs_cvae = 100
num_epochs_bias = 100
gpus = [0, 1, 2]
gpus = [0]
seed = 0 # Give an integer value to seed will set seed for pseudo-random number generators in: pytorch, numpy, python.random
early_stopping = False
accumulate_grad_batches = 1
Expand Down
2 changes: 1 addition & 1 deletion risk_biased/predictors/biased_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def log_with_prefix(

for (metric, value) in log_dict.items():
metric = prefix + metric
self.log(metric, value, on_step=on_step, on_epoch=on_epoch)
self.log(metric, value, on_step=on_step, on_epoch=on_epoch, rank_zero_only=True, sync_dist=True)

def configure_optimizers(
self,
Expand Down
1 change: 1 addition & 0 deletions scripts/scripts_utils/load_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def get_config(log_dir: str, is_interaction: bool = False) -> Config:
dir=log_dir,
resume="allow",
config=dict(cfg),
group="DDP"
)

# Allow WandB to update the config
Expand Down
2 changes: 1 addition & 1 deletion scripts/scripts_utils/train_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def get_trainer(cfg: Config, logger: WandbLogger, callbacks: list) -> Trainer:


def main(is_interaction: bool = False):

log_dir = create_log_dir()
cfg = get_config(log_dir, is_interaction)

Expand Down

0 comments on commit d363fde

Please sign in to comment.