feat(train): add optional accumulate_grad_batches config param (#306)

* feat(train): add accumulate_grad_batches hparam Add an `accumulate_grad_batches` param to the `train` part of the config to allow for gradient accumulation. This updates the gradients once every `accumulate_grad_batches` batches, with a default value of 1 to not break any existing configs. * fix(train): normalize loss when using gradient accumulation
voicepaw · Apr 13, 2023 · 1172b23 · 1172b23
1 parent 0f6794a
commit 1172b23
Showing 1 changed file with 10 additions and 6 deletions.
diff --git a/src/so_vits_svc_fork/train.py b/src/so_vits_svc_fork/train.py
@@ -392,10 +392,13 @@ def training_step(self, batch: dict[str, torch.Tensor], batch_idx: int) -> None:
                 }
             )
 
+        accumulate_grad_batches = self.hparams.train.get("accumulate_grad_batches", 1)
+        should_update = (batch_idx + 1) % accumulate_grad_batches == 0
         # optimizer
-        optim_g.zero_grad()
-        self.manual_backward(loss_gen_all)
-        optim_g.step()
+        self.manual_backward(loss_gen_all / accumulate_grad_batches)
+        if should_update:
+            optim_g.step()
+            optim_g.zero_grad()
         self.untoggle_optimizer(optim_g)
 
         # Discriminator
@@ -417,9 +420,10 @@ def training_step(self, batch: dict[str, torch.Tensor], batch_idx: int) -> None:
         )
 
         # optimizer
-        optim_d.zero_grad()
-        self.manual_backward(loss_disc_all)
-        optim_d.step()
+        self.manual_backward(loss_disc_all / accumulate_grad_batches)
+        if should_update:
+            optim_d.step()
+            optim_d.zero_grad()
         self.untoggle_optimizer(optim_d)
 
         # end of epoch