Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix backtest and residuals for multi series with different number of historical forecasts #2604

Merged
merged 2 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co

**Fixed**

- Fixed a bug which raised an error when computing residuals (or backtest with "per time step" metrics) on multiple series with corresponding historical forecasts of different lengths. [#2604](https://github.com/unit8co/darts/pull/2604) by [Dennis Bader](https://github.com/dennisbader).
- Fixed a bug when using `darts.utils.data.tabularization.create_lagged_component_names()` with target `lags=None`, that did not return any lagged target label component names. [#2576](https://github.com/unit8co/darts/pull/2576) by [Dennis Bader](https://github.com/dennisbader).
- Fixed a bug when using `num_samples > 1` with a deterministic regression model and the optimized `historical_forecasts()` method, an exception was not raised. [#2576](https://github.com/unit8co/darts/pull/2588) by [Antoine Madrona](https://github.com/madtoinou).

Expand Down
15 changes: 13 additions & 2 deletions darts/models/forecasting/forecasting_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1504,13 +1504,24 @@ def __getitem__(self, index) -> TimeSeries:
kwargs["insample"] = series_gen

errors.append(metric_f(series_gen, forecasts_list, **kwargs))
errors = np.array(errors)
try:
# multiple series can result in different number of forecasts; try if we can run it efficiently
errors = np.array(errors)
is_arr = True
except ValueError:
# otherwise, compute array later
is_arr = False

# get errors for each input `series`
backtest_list = []
for i in range(len(cum_len) - 1):
# errors_series with shape `(n metrics, n series specific historical forecasts, *)`
errors_series = errors[:, cum_len[i] : cum_len[i + 1]]
if is_arr:
errors_series = errors[:, cum_len[i] : cum_len[i + 1]]
else:
errors_series = np.array([
errors_[cum_len[i] : cum_len[i + 1]] for errors_ in errors
])

if reduction is not None:
# shape `(n metrics, n forecasts, *)` -> `(n metrics, *)`
Expand Down
20 changes: 14 additions & 6 deletions darts/tests/models/forecasting/test_residuals.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,8 @@ def test_output_single_series_hfc_lpo_false(self, config):
@pytest.mark.parametrize(
"config",
itertools.product(
[True, False],
[True, False], # is univariate
[True, False], # same lengths
[
(metrics.err, ((0.0, 0.0), (-1.0, -2.0))),
(metrics.ape, ((0.0, 0.0), (100.0, 100.0))),
Expand All @@ -159,10 +160,12 @@ def test_output_single_series_hfc_lpo_false(self, config):
)
def test_output_multi_series_hfc_lpo_true(self, config):
"""Tests residuals based on historical forecasts generated on multiple `series` with last_points_only=True"""
is_univariate, (metric, score_exp) = config
is_univariate, same_lengths, (metric, score_exp) = config
n_ts = 10
y = ct(value=1.0, length=n_ts)
hfc = ct(value=2.0, length=n_ts)
if not same_lengths:
y = y.append_values([1.0])
if not is_univariate:
y = y.stack(y + 1.0)
hfc = hfc.stack(hfc + 2.0)
Expand All @@ -173,8 +176,9 @@ def test_output_multi_series_hfc_lpo_true(self, config):
# expected residuals values of shape (n time steps, n components, n samples=1) per forecast
scores_exp = []
for i in range(len(hfc)):
num_fcs = len(hfc[i])
scores_exp.append(
np.array([score_exp[i][:n_comps]] * 10).reshape(n_ts, -1, 1)
np.array([score_exp[i][:n_comps]] * num_fcs).reshape(num_fcs, -1, 1)
)

model = NaiveDrift()
Expand Down Expand Up @@ -208,7 +212,8 @@ def test_output_multi_series_hfc_lpo_true(self, config):
@pytest.mark.parametrize(
"config",
itertools.product(
[True, False],
[True, False], # is univariate
[True, False], # same lengths
[
(metrics.err, ((0.0, 0.0), (-1.0, -2.0))),
(metrics.ape, ((0.0, 0.0), (100.0, 100.0))),
Expand All @@ -219,10 +224,12 @@ def test_output_multi_series_hfc_lpo_false(self, config):
"""Tests residuals based on historical forecasts generated on multiple `series` with
last_points_only=False.
"""
is_univariate, (metric, score_exp) = config
is_univariate, same_lengths, (metric, score_exp) = config
n_ts = 10
y = ct(value=1.0, length=n_ts)
hfc = ct(value=2.0, length=n_ts)
if not same_lengths:
y = y.append_values([1.0])
if not is_univariate:
y = y.stack(y + 1.0)
hfc = hfc.stack(hfc + 2.0)
Expand All @@ -233,8 +240,9 @@ def test_output_multi_series_hfc_lpo_false(self, config):
# expected residuals values of shape (n time steps, n components, n samples=1) per forecast
scores_exp = []
for i in range(len(hfc)):
num_fcs = len(hfc[i][0])
scores_exp.append(
np.array([score_exp[i][:n_comps]] * 10).reshape(n_ts, -1, 1)
np.array([score_exp[i][:n_comps]] * num_fcs).reshape(num_fcs, -1, 1)
)

model = NaiveDrift()
Expand Down
Loading