Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/metrics #129

Merged
merged 12 commits into from
Jul 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 63 additions & 5 deletions darts/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,9 +286,57 @@ def mape(actual_series: TimeSeries,
return 100. * np.mean(np.abs((y_true - y_hat) / y_true))


@multivariate_support
def smape(actual_series: TimeSeries,
pred_series: TimeSeries,
intersect: bool = True,
reduction: Callable[[np.ndarray], float] = np.mean) -> float:
""" symmetric Mean Absolute Percentage Error (sMAPE).

Given a time series of actual values :math:`y_t` and a time series of predicted values :math:`\\hat{y}_t`
both of length :math:`T`, it is a percentage value computed as

.. math::
200 \\cdot \\frac{1}{T}
\\sum_{t=1}^{T}{\\frac{\\left| y_t - \\hat{y}_t \\right|}{\\left| y_t \\right| + \\left| \\hat{y}_t \\right|} }.

Note that it will raise a `ValueError` if :math:`\\left| y_t \\right| + \\left| \\hat{y}_t \\right| = 0`
for some :math:`t`. Consider using the Mean Absolute Scaled Error (MASE) in these cases.

Parameters
----------
actual_series
The series of actual values
pred_series
The series of predicted values
intersect
For time series that are overlapping in time without having the same time index, setting `intersect=True`
will consider the values only over their common time interval (intersection in time).
reduction
Function taking as input a np.ndarray and returning a scalar value. This function is used to aggregate
the metrics of different components in case of multivariate TimeSeries instances.

Raises
------
ValueError
If the actual series and the pred series contains some zeros at the same time index.

Returns
-------
float
The symmetric Mean Absolute Percentage Error (sMAPE)
"""

y_true, y_hat = _get_values_or_raise(actual_series, pred_series, intersect)
raise_if_not(np.logical_or(y_true != 0, y_hat != 0).all(),
'The actual series must be strictly positive to compute the sMAPE.', logger)
return 200. * np.mean(np.abs((y_true - y_hat) / (np.abs(y_true) + np.abs(y_hat))))


@multivariate_support
def mase(actual_series: TimeSeries,
pred_series: TimeSeries,
insample: TimeSeries,
m: Optional[int] = 1,
intersect: bool = True,
reduction: Callable[[np.ndarray], float] = np.mean) -> float:
Expand All @@ -303,6 +351,9 @@ def mase(actual_series: TimeSeries,
The series of actual values
pred_series
The series of predicted values
insample
The training series used to forecast `pred_series` .
This series serves to compute the scale of the error obtained by a naive forecaster on the training data.
m
Optionally, the seasonality to use for differencing.
`m=1` corresponds to the non-seasonal MASE, whereas `m>1` corresponds to seasonal MASE.
Expand All @@ -315,23 +366,30 @@ def mase(actual_series: TimeSeries,
Function taking as input a np.ndarray and returning a scalar value. This function is used to aggregate
the metrics of different components in case of multivariate TimeSeries instances.

Raises
------
ValueError
If the `insample` series is periodic ( :math:`X_t = X_{t-m}` )

Returns
-------
float
The Mean Absolute Scaled Error (MASE)
"""

raise_if_not(insample.end_time() + insample.freq() == pred_series.start_time(),
"The pred_series must be the forecast of the insample series", logger)
if m is None:
test_season, m = check_seasonality(actual_series)
test_season, m = check_seasonality(insample)
if not test_season:
warn("No seasonality found when computing MASE. Fixing the period to 1.", UserWarning)
m = 1
y_true, y_hat = _get_values_or_raise(actual_series, pred_series, intersect)
errors = np.sum(np.abs(y_true - y_hat))
t = y_true.size
scale = t / (t - m) * np.sum(np.abs(y_true[m:] - y_true[:-m]))
x_t = insample.values()
errors = np.abs(y_true - y_hat)
scale = np.mean(np.abs(x_t[m:] - x_t[:-m]))
raise_if_not(not np.isclose(scale, 0), "cannot use MASE with periodical signals", logger)
return errors / scale
return np.mean(errors / scale)


@multivariate_support
Expand Down
33 changes: 24 additions & 9 deletions darts/tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@

class MetricsTestCase(unittest.TestCase):

pd_train = pd.Series(np.sin(np.pi * np.arange(31) / 4) + 1, index=pd.date_range('20121201', '20121231'))
pd_series1 = pd.Series(range(10), index=pd.date_range('20130101', '20130110'))
pd_series2 = pd.Series(np.random.rand(10) * 10 + 1, index=pd.date_range('20130101', '20130110'))
pd_series3 = pd.Series(np.sin(np.pi * np.arange(20) / 4) + 1, index=pd.date_range('20130101', '20130120'))
series_train = TimeSeries.from_series(pd_train)
series1: TimeSeries = TimeSeries.from_series(pd_series1)
pd_series1[:] = pd_series1.mean()
series0: TimeSeries = TimeSeries.from_series(pd_series1)
Expand All @@ -30,15 +32,22 @@ def test_zero(self):
with self.assertRaises(ValueError):
metrics.mape(self.series1, self.series1)

with self.assertRaises(ValueError):
metrics.smape(self.series1, self.series1)

Comment on lines +35 to +37
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly to my comment on @pennfranc PR I believe that we should split these tests and try to keep a single test for a single assert or at least a single test for a single failure reason. (each test method name should describe what it is testing). The problem with the current tests is that if mape is failing we have no idea if it is only mape or other methods will also fail like smape also I believe the reason for failing is not very clear each time. Let me know what you think :)

with self.assertRaises(ValueError):
metrics.mape(self.series12, self.series12)

with self.assertRaises(ValueError):
metrics.smape(self.series12, self.series12)

with self.assertRaises(ValueError):
metrics.ope(self.series1 - self.series1.pd_series().mean(), self.series1 - self.series1.pd_series().mean())

def test_same(self):
self.assertEqual(metrics.mape(self.series1 + 1, self.series1 + 1), 0)
self.assertEqual(metrics.mase(self.series1 + 1, self.series1 + 1, 1), 0)
self.assertEqual(metrics.smape(self.series1 + 1, self.series1 + 1), 0)
self.assertEqual(metrics.mase(self.series1 + 1, self.series1 + 1, self.series_train, 1), 0)
self.assertEqual(metrics.marre(self.series1 + 1, self.series1 + 1), 0)
self.assertEqual(metrics.r2_score(self.series1 + 1, self.series1 + 1), 1)
self.assertEqual(metrics.ope(self.series1 + 1, self.series1 + 1), 0)
Expand All @@ -47,15 +56,15 @@ def helper_test_shape_equality(self, metric):
self.assertAlmostEqual(metric(self.series12, self.series21),
metric(self.series1.append(self.series2b), self.series2.append(self.series1b)))

def helper_test_multivariate_duplication_equality(self, metric):
def helper_test_multivariate_duplication_equality(self, metric, **kwargs):
series11 = self.series1.stack(self.series1) + 1
series22 = self.series2.stack(self.series2)
series33 = self.series3.stack(self.series3)
self.assertAlmostEqual(metric(series11, series22), metric(self.series1 + 1, self.series2))
self.assertAlmostEqual(metric(series11, series33), metric(self.series1 + 1, self.series3))
self.assertAlmostEqual(metric(series22, series33), metric(self.series2, self.series3))
self.assertAlmostEqual(metric(series22, series33, reduction=(lambda x: x[0])),
metric(self.series2, self.series3, reduction=(lambda x: x[0])))
self.assertAlmostEqual(metric(series11, series22, **kwargs), metric(self.series1 + 1, self.series2, **kwargs))
self.assertAlmostEqual(metric(series11, series33, **kwargs), metric(self.series1 + 1, self.series3, **kwargs))
self.assertAlmostEqual(metric(series22, series33, **kwargs), metric(self.series2, self.series3, **kwargs))
self.assertAlmostEqual(metric(series22, series33, reduction=(lambda x: x[0]), **kwargs),
metric(self.series2, self.series3, reduction=(lambda x: x[0]), **kwargs))

def test_r2(self):
from sklearn.metrics import r2_score
Expand All @@ -71,7 +80,7 @@ def test_marre(self):

def test_season(self):
with self.assertRaises(ValueError):
metrics.mase(self.series3, self.series3 * 1.3, 8)
metrics.mase(self.series3, self.series3 * 1.3, self.series_train, 8)

def test_mse(self):
self.helper_test_shape_equality(metrics.mse)
Expand All @@ -94,8 +103,14 @@ def test_coefficient_of_variation(self):
def test_mape(self):
self.helper_test_multivariate_duplication_equality(metrics.mape)

def test_smape(self):
self.helper_test_multivariate_duplication_equality(metrics.smape)

def test_mase(self):
self.helper_test_multivariate_duplication_equality(metrics.mase)
self.helper_test_multivariate_duplication_equality(metrics.mase, insample=self.series_train)

with self.assertRaises(ValueError):
metrics.mase(self.series1, self.series2, self.series3, 1)

def test_ope(self):
self.helper_test_multivariate_duplication_equality(metrics.ope)
Expand Down