From eda8f94329865df8de692642fdabae97b8e121db Mon Sep 17 00:00:00 2001
From: Julien Herzen <julien@unit8.co>
Date: Sat, 19 Mar 2022 21:40:44 +0100
Subject: [PATCH] Feat/more losses (#845)

* added some loss functions

* remove M3 loss which seems bogus

* add unit tests for losses

* correct unit test

* correct unit test

* correct unit test

* added NINF case

* add optional denominator computation in MAPE loss

* better MAPE/MAE split

* simplify MAPE loss

* simplify loss tests

* remove a print statement

* Update darts/utils/losses.py

Co-authored-by: Dennis Bader <dennis.bader@gmx.ch>

Co-authored-by: Dennis Bader <dennis.bader@gmx.ch>
---
 darts/tests/utils/test_losses.py | 40 ++++++++++++++
 darts/utils/losses.py            | 95 ++++++++++++++++++++++++++++++++
 2 files changed, 135 insertions(+)
 create mode 100644 darts/tests/utils/test_losses.py
 create mode 100644 darts/utils/losses.py

diff --git a/darts/tests/utils/test_losses.py b/darts/tests/utils/test_losses.py
new file mode 100644
index 0000000000..3374e0bb2e
--- /dev/null
+++ b/darts/tests/utils/test_losses.py
@@ -0,0 +1,40 @@
+import torch
+
+from darts.tests.base_test_class import DartsBaseTestClass
+from darts.utils.losses import MAELoss, MapeLoss, SmapeLoss
+
+
+class LossesTestCase(DartsBaseTestClass):
+    x = torch.tensor([1.1, 2.2, 0.6345, -1.436])
+    y = torch.tensor([1.5, 0.5])
+
+    def helper_test_loss(self, exp_loss_val, exp_w_grad, loss_fn):
+        W = torch.tensor([[0.1, -0.2, 0.3, -0.4], [-0.8, 0.7, -0.6, 0.5]])
+        W.requires_grad = True
+        y_hat = W @ self.x
+        lval = loss_fn(y_hat, self.y)
+        lval.backward()
+
+        self.assertTrue(torch.allclose(lval, exp_loss_val, atol=1e-3))
+        self.assertTrue(torch.allclose(W.grad, exp_w_grad, atol=1e-3))
+
+    def test_smape_loss(self):
+        exp_val = torch.tensor(0.7753)
+        exp_grad = torch.tensor(
+            [[-0.2843, -0.5685, -0.1640, 0.3711], [-0.5859, -1.1718, -0.3380, 0.7649]]
+        )
+        self.helper_test_loss(exp_val, exp_grad, SmapeLoss())
+
+    def test_mape_loss(self):
+        exp_val = torch.tensor(1.2937)
+        exp_grad = torch.tensor(
+            [[-0.3667, -0.7333, -0.2115, 0.4787], [-1.1000, -2.2000, -0.6345, 1.4360]]
+        )
+        self.helper_test_loss(exp_val, exp_grad, MapeLoss())
+
+    def test_mae_loss(self):
+        exp_val = torch.tensor(1.0020)
+        exp_grad = torch.tensor(
+            [[-0.5500, -1.1000, -0.3173, 0.7180], [-0.5500, -1.1000, -0.3173, 0.7180]]
+        )
+        self.helper_test_loss(exp_val, exp_grad, MAELoss())
diff --git a/darts/utils/losses.py b/darts/utils/losses.py
new file mode 100644
index 0000000000..c99b760423
--- /dev/null
+++ b/darts/utils/losses.py
@@ -0,0 +1,95 @@
+"""
+PyTorch Loss Functions
+----------------------
+"""
+# Inspiration: https://github.com/ElementAI/N-BEATS/blob/master/common/torch/losses.py
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+
+def _divide_no_nan(a, b):
+    """
+    a/b where the resulted NaN or Inf are replaced by 0.
+    """
+    result = a / b
+    result[result != result] = 0.0
+    result[result == np.inf] = 0.0
+    result[result == np.NINF] = 0.0
+    return result
+
+
+class SmapeLoss(nn.Module):
+    def __init__(self, block_denom_grad: bool = True):
+        """
+        sMAPE loss as defined in https://robjhyndman.com/hyndsight/smape/ (Chen and Yang 2004)
+
+        Given a time series of actual values :math:`y_t` and a time series of predicted values :math:`\\hat{y}_t`
+        both of length :math:`T`, it is computed as
+
+        .. math::
+            \\frac{1}{T}
+            \\sum_{t=1}^{T}{\\frac{\\left| y_t - \\hat{y}_t \\right|}
+                                  {\\left| y_t \\right| + \\left| \\hat{y}_t \\right|} }.
+
+        The results of divisions yielding NaN or Inf are replaced by 0. Note that we drop the coefficient of
+        200 usually used for computing sMAPE values, as it impacts only the magnitude of the gradients
+        and not their direction.
+
+        Parameters
+        ----------
+        block_denom_grad
+            Whether to stop the gradient in the denomitator
+        """
+        super().__init__()
+        self.block_denom_grad = block_denom_grad
+
+    def forward(self, inpt, tgt):
+        num = torch.abs(tgt - inpt)
+        denom = torch.abs(tgt) + torch.abs(inpt)
+        if self.block_denom_grad:
+            denom = denom.detach()
+        return torch.mean(_divide_no_nan(num, denom))
+
+
+class MapeLoss(nn.Module):
+    def __init__(self):
+        """
+        MAPE loss as defined in: https://en.wikipedia.org/wiki/Mean_absolute_percentage_error.
+
+        Given a time series of actual values :math:`y_t` and a time series of predicted values :math:`\\hat{y}_t`
+        both of length :math:`T`, it is computed as
+
+        .. math::
+            \\frac{1}{T}
+            \\sum_{t=1}^{T}{\\frac{\\left| y_t - \\hat{y}_t \\right|}{y_t}}.
+
+        The results of divisions yielding NaN or Inf are replaced by 0. Note that we drop the coefficient of
+        100 usually used for computing MAPE values, as it impacts only the magnitude of the gradients
+        and not their direction.
+        """
+        super().__init__()
+
+    def forward(self, inpt, tgt):
+        return torch.mean(torch.abs(_divide_no_nan(tgt - inpt, tgt)))
+
+
+class MAELoss(nn.Module):
+    def __init__(self):
+        """
+        MAE loss as defined in: https://en.wikipedia.org/wiki/Mean_absolute_error.
+
+        Given a time series of actual values :math:`y_t` and a time series of predicted values :math:`\\hat{y}_t`
+        both of length :math:`T`, it is computed as
+
+        .. math::
+            \\frac{1}{T}
+            \\sum_{t=1}^{T}{\\left| y_t - \\hat{y}_t \\right|}.
+
+        Note that this is the same as torch.nn.L1Loss.
+        """
+        super().__init__()
+
+    def forward(self, inpt, tgt):
+        return torch.mean(torch.abs(tgt - inpt))