Visual-Behavior · Data-Iab · Mar 13, 2023 · Mar 12, 2023 · Mar 12, 2023 · Mar 12, 2023
diff --git a/alonet/callbacks/metrics_callback.py b/alonet/callbacks/metrics_callback.py
@@ -121,7 +121,6 @@ def _process_train_metrics(self, outputs):
     def _log_train_metrics(self, pl_module, trainer):
         # Log the results
         for key in self.metrics:
-
             if "histogram" in key and len(self.metrics[key]) > 0:
                 hist = torch.cat(self.metrics[key]).to("cpu")
                 log_hist(trainer, f"train/{key}", hist)
@@ -138,7 +137,7 @@ def _log_train_metrics(self, pl_module, trainer):
                 )
 
     @rank_zero_only
-    def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx):
+    def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx):
         """Method called after each training batch. This class is a pytorch lightning callback, therefore
         this method will by automatically called by pytorch lightning.
 
@@ -176,7 +175,8 @@ def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, data
             )
 
         self._process_train_metrics(outputs)
-        if trainer.fit_loop.should_accumulate() or (trainer.global_step + 1) % trainer.log_every_n_steps != 0:
+        should_accumulate = trainer.fit_loop._should_accumulate()
+        if should_accumulate or (trainer.global_step + 1) % trainer.log_every_n_steps != 0:
             return
 
         self._log_train_metrics(pl_module, trainer)

diff --git a/alonet/common/pl_helpers.py b/alonet/common/pl_helpers.py
@@ -169,7 +169,6 @@ def load_training(
     weights_path = getattr(args, "weights", None) if args is not None else None
     no_run_id = args.no_run_id if no_run_id is None and "no_run_id" in args else no_run_id
 
-
     if "weights" in kwargs and kwargs["weights"] is not None:  # Highest priority
         weights_path = kwargs["weights"]
 
@@ -273,13 +272,12 @@ def run_pl_training(
     # Init trainer and run training
     trainer = pl.Trainer.from_argparse_args(
         args,
-        # default_root_dir=expe_dir,
-        gpus=-1 if not args.cpu else 0,
+        accelerator="gpu" if not args.cpu else "cpu",
         auto_select_gpus=not args.cpu,
         logger=logger,
         callbacks=callbacks,
         resume_from_checkpoint=resume_from_checkpoint,
-        accelerator=None if torch.cuda.device_count() <= 1 else "ddp",
+        strategy="ddp" if torch.cuda.device_count() >= 2 else None,
         **pl_trainer,
     )