Skip to content

Commit

Permalink
Correctly calculate distributed loss average (#269)
Browse files Browse the repository at this point in the history
Co-authored-by: Abhishek Das <das.abhshk@gmail.com>
  • Loading branch information
Johannes Klicpera and abhshkdz authored Sep 18, 2021
1 parent d6529b8 commit ef98b27
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 1 deletion.
24 changes: 24 additions & 0 deletions ocpmodels/modules/loss.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import torch
from torch import nn

from ocpmodels.common import distutils


class L2MAELoss(nn.Module):
def __init__(self, reduction="mean"):
Expand All @@ -14,3 +16,25 @@ def forward(self, input: torch.Tensor, target: torch.Tensor):
return torch.mean(dists)
elif self.reduction == "sum":
return torch.sum(dists)


class DDPLoss(nn.Module):
def __init__(self, loss_fn, reduction="mean"):
super().__init__()
self.loss_fn = loss_fn
self.loss_fn.reduction = "sum"
self.reduction = reduction
assert reduction in ["mean", "sum"]

def forward(self, input: torch.Tensor, target: torch.Tensor):
loss = self.loss_fn(input, target)
if self.reduction == "mean":
num_samples = input.shape[0]
num_samples = distutils.all_reduce(
num_samples, device=input.device
)
# Multiply by world size since gradients are averaged
# across DDP replicas
return loss * distutils.get_world_size() / num_samples
else:
return loss
4 changes: 3 additions & 1 deletion ocpmodels/trainers/base_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
from ocpmodels.modules.exponential_moving_average import (
ExponentialMovingAverage,
)
from ocpmodels.modules.loss import L2MAELoss
from ocpmodels.modules.loss import DDPLoss, L2MAELoss
from ocpmodels.modules.normalizer import Normalizer
from ocpmodels.modules.scheduler import LRScheduler

Expand Down Expand Up @@ -366,6 +366,8 @@ def load_loss(self):
raise NotImplementedError(
f"Unknown loss function name: {loss_name}"
)
if distutils.initialized():
self.loss_fn[loss] = DDPLoss(self.loss_fn[loss])

def load_optimizer(self):
optimizer = self.config["optim"].get("optimizer", "AdamW")
Expand Down

0 comments on commit ef98b27

Please sign in to comment.