Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/quantile-multiple-ts-prediction #853

Merged
merged 6 commits into from
Mar 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 19 additions & 18 deletions darts/models/forecasting/gradient_boosted_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ def __init__(
)

def __str__(self):
if self.likelihood:
return f"LGBModel(lags={self.lags}, likelihood={self.likelihood})"
return f"LGBModel(lags={self.lags})"

def fit(
Expand Down Expand Up @@ -200,27 +202,26 @@ def predict(
"""

if self.likelihood == "quantile":
model_outputs = []
for quantile, fitted in self._model_container.items():
self.model = fitted
prediction = super().predict(
n, series, past_covariates, future_covariates, **kwargs
)
model_outputs.append(prediction.all_values(copy=False))
model_outputs = np.concatenate(model_outputs, axis=-1)
samples = self._sample_quantiles(model_outputs, num_samples)
# build timeseries from samples
return self._ts_like(prediction, samples)
return self._predict_quantiles(
superfun=super().predict,
n=n,
series=series,
past_covariates=past_covariates,
future_covariates=future_covariates,
num_samples=num_samples,
**kwargs,
)

if self.likelihood == "poisson":
prediction = super().predict(
n, series, past_covariates, future_covariates, **kwargs
)
samples = self._sample_poisson(
np.array(prediction.all_values(copy=False)), num_samples
return self._predict_poisson(
superfun=super().predict,
n=n,
series=series,
past_covariates=past_covariates,
future_covariates=future_covariates,
num_samples=num_samples,
**kwargs,
)
# build timeseries from samples
return self._ts_like(prediction, samples)

return super().predict(
n, series, past_covariates, future_covariates, num_samples, **kwargs
Expand Down
45 changes: 22 additions & 23 deletions darts/models/forecasting/linear_regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ def __init__(
)

def __str__(self):
if self.likelihood:
return f"LinearRegression(lags={self.lags}, likelihood={self.likelihood})"
return f"LinearRegression(lags={self.lags})"

def fit(
Expand Down Expand Up @@ -134,9 +136,6 @@ def fit(
"""

if self.likelihood == "quantile":
# empty model container in case of multiple calls to fit, e.g. when backtesting
self._model_container.clear()

# set solver for linear program
if "solver" not in self.kwargs:
# set default fast solver
Expand All @@ -153,6 +152,9 @@ def fit(
# set solver to slow legacy
self.kwargs["solver"] = "interior-point"

# empty model container in case of multiple calls to fit, e.g. when backtesting
self._model_container.clear()

for quantile in self.quantiles:
self.kwargs["quantile"] = quantile
self.model = QuantileRegressor(**self.kwargs)
Expand Down Expand Up @@ -212,30 +214,27 @@ def predict(
"""

if self.likelihood == "quantile":
model_outputs = []
for quantile, fitted in self._model_container.items():
self.model = fitted
prediction = super().predict(
n, series, past_covariates, future_covariates, **kwargs
)
model_outputs.append(prediction.all_values(copy=False))
model_outputs = np.concatenate(model_outputs, axis=-1)
samples = self._sample_quantiles(model_outputs, num_samples)

# build timeseries from samples
return self._ts_like(prediction, samples)
return self._predict_quantiles(
superfun=super().predict,
n=n,
series=series,
past_covariates=past_covariates,
future_covariates=future_covariates,
num_samples=num_samples,
**kwargs,
)

elif self.likelihood == "poisson":
prediction = super().predict(
n, series, past_covariates, future_covariates, **kwargs
)
samples = self._sample_poisson(
np.array(prediction.all_values(copy=False)), num_samples
return self._predict_poisson(
superfun=super().predict,
n=n,
series=series,
past_covariates=past_covariates,
future_covariates=future_covariates,
num_samples=num_samples,
**kwargs,
)

# build timeseries from samples
return self._ts_like(prediction, samples)

else:
return super().predict(
n, series, past_covariates, future_covariates, num_samples, **kwargs
Expand Down
51 changes: 50 additions & 1 deletion darts/models/forecasting/regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

import math
from collections import OrderedDict
from typing import List, Optional, Sequence, Tuple, Union
from typing import Callable, List, Optional, Sequence, Tuple, Union

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -646,6 +646,49 @@ def _prepare_quantiles(quantiles):

return quantiles, median_idx

def _predict_quantiles(
self, superfun: Callable, num_samples: int, **kwargs
) -> Union[TimeSeries, List[TimeSeries]]:
predictions = []
for quantile, fitted in self._model_container.items():
self.model = fitted
prediction = superfun(**kwargs)
if not isinstance(prediction, Sequence): # handles the single series case
prediction = [prediction]
predictions.append([p.all_values(copy=False) for p in prediction])
model_outputs = [
np.concatenate([m[i] for m in predictions], axis=-1)
for i in range(len(prediction))
]
samples = [self._sample_quantiles(m, num_samples) for m in model_outputs]
# build timeseries from samples
return self._build_ts_from_samples(prediction, samples)

def _predict_poisson(
self, superfun: Callable, num_samples: int, **kwargs
) -> Union[TimeSeries, List[TimeSeries]]:
prediction = superfun(**kwargs)
if not isinstance(prediction, Sequence): # handles the single series case
prediction = [prediction]

samples = [
self._sample_poisson(np.array(p.all_values(copy=False)), num_samples)
for p in prediction
]
# build timeseries from samples
return self._build_ts_from_samples(prediction, samples)

def _build_ts_from_samples(
self, prediction: List[TimeSeries], samples: List[np.ndarray]
) -> Union[TimeSeries, List[TimeSeries]]:
ts_list = [
self._ts_like(pred, sample) for pred, sample in zip(prediction, samples)
]
if len(ts_list) == 1:
return ts_list[0]
else:
return ts_list

def _sample_quantiles(
self, model_output: np.ndarray, num_samples: int
) -> np.ndarray:
Expand All @@ -670,10 +713,16 @@ def _sample_quantiles(
quantile_idxs <= self._median_idx, quantile_idxs, quantile_idxs - 1
)

if num_samples == 1: # return median
return model_output[:, :, [self._median_idx]]

return model_output[:, :, quantile_idxs]

def _sample_poisson(self, model_output: np.ndarray, num_samples: int) -> np.ndarray:
raise_if_not(all([isinstance(num_samples, int), num_samples > 0]))
if num_samples == 1: # return mean
return model_output

return self._rng.poisson(
lam=model_output, size=(*model_output.shape[:2], num_samples)
).astype(float)
Expand Down
62 changes: 51 additions & 11 deletions darts/tests/models/forecasting/test_regression_models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import functools
import math
from unittest.mock import patch

Expand Down Expand Up @@ -142,6 +143,14 @@ def dummy_timeseries(
return targets, pcovs, fcovs


# helper function used to register LightGBMModel/LinearRegressionModel with likelihood
def partialclass(cls, *args, **kwargs):
class NewCls(cls):
__init__ = functools.partialmethod(cls.__init__, *args, **kwargs)

return NewCls


# Regression models rely on PyTorch for the Datasets
if TORCH_AVAILABLE:

Expand All @@ -152,6 +161,35 @@ class RegressionModelsTestCase(DartsBaseTestClass):
# default regression models
models = [RandomForest, LinearRegressionModel, RegressionModel, LightGBMModel]

# register likelihood regression models
QuantileLightGBMModel = partialclass(
LightGBMModel,
likelihood="quantile",
quantiles=[0.05, 0.5, 0.95],
random_state=42,
)
PoissonLightGBMModel = partialclass(
LightGBMModel, likelihood="poisson", random_state=42
)
QuantileLinearRegressionModel = partialclass(
LinearRegressionModel,
likelihood="quantile",
quantiles=[0.05, 0.5, 0.95],
random_state=42,
)
PoissonLinearRegressionModel = partialclass(
LinearRegressionModel, likelihood="poisson", random_state=42
)
# targets for poisson regression must be positive, so we exclude them for some tests
models.extend(
[
QuantileLightGBMModel,
QuantileLinearRegressionModel,
PoissonLightGBMModel,
PoissonLinearRegressionModel,
]
)

# dummy feature and target TimeSeries instances
target_series, past_covariates, future_covariates = dummy_timeseries(
length=100,
Expand All @@ -163,13 +201,13 @@ class RegressionModelsTestCase(DartsBaseTestClass):
pcov_offset=0,
fcov_offset=0,
)

sine_univariate1 = tg.sine_timeseries(length=100)
sine_univariate2 = tg.sine_timeseries(length=100, value_phase=1.5705)
sine_univariate3 = tg.sine_timeseries(length=100, value_phase=0.78525)
sine_univariate4 = tg.sine_timeseries(length=100, value_phase=0.392625)
sine_univariate5 = tg.sine_timeseries(length=100, value_phase=0.1963125)
sine_univariate6 = tg.sine_timeseries(length=100, value_phase=0.09815625)
# shift sines to positive values for poisson regressors
sine_univariate1 = tg.sine_timeseries(length=100) + 1.5
sine_univariate2 = tg.sine_timeseries(length=100, value_phase=1.5705) + 1.5
sine_univariate3 = tg.sine_timeseries(length=100, value_phase=0.78525) + 1.5
sine_univariate4 = tg.sine_timeseries(length=100, value_phase=0.392625) + 1.5
sine_univariate5 = tg.sine_timeseries(length=100, value_phase=0.1963125) + 1.5
sine_univariate6 = tg.sine_timeseries(length=100, value_phase=0.09815625) + 1.5
sine_multivariate1 = sine_univariate1.stack(sine_univariate2)
sine_multivariate2 = sine_univariate2.stack(sine_univariate3)
sine_multiseries1 = [sine_univariate1, sine_univariate2, sine_univariate3]
Expand All @@ -178,7 +216,6 @@ class RegressionModelsTestCase(DartsBaseTestClass):
lags_1 = {"target": [-3, -2, -1], "past": [-4, -2], "future": [-5, 2]}

def test_model_construction(self):

for model in self.models:
# TESTING SINGLE INT
# testing lags
Expand Down Expand Up @@ -470,6 +507,7 @@ def test_models_runnability(self):

def test_fit(self):
for model in self.models:

# test fitting both on univariate and multivariate timeseries
for series in [self.sine_univariate1, self.sine_multivariate2]:
with self.assertRaises(ValueError):
Expand Down Expand Up @@ -546,7 +584,9 @@ def test_models_accuracy_univariate(self):
# for every model, and different output_chunk_lengths test whether it predicts the univariate time series
# as well as expected
self.helper_test_models_accuracy(
self.sine_univariate1, self.sine_univariate2, [0.03, 1e-13, 1e-13, 0.3]
self.sine_univariate1,
self.sine_univariate2,
[0.03, 1e-13, 1e-13, 0.3, 0.5, 0.8, 0.4, 0.4],
)

def test_models_accuracy_multivariate(self):
Expand All @@ -555,7 +595,7 @@ def test_models_accuracy_multivariate(self):
self.helper_test_models_accuracy(
self.sine_multivariate1,
self.sine_multivariate2,
[0.3, 1e-13, 1e-13, 0.4],
[0.3, 1e-13, 1e-13, 0.4, 0.4, 0.8, 0.4, 0.4],
)

def test_models_accuracy_multiseries_multivariate(self):
Expand All @@ -564,7 +604,7 @@ def test_models_accuracy_multiseries_multivariate(self):
self.helper_test_models_accuracy(
self.sine_multiseries1,
self.sine_multiseries2,
[0.05, 1e-13, 1e-13, 0.05],
[0.05, 1e-13, 1e-13, 0.05, 0.4, 0.8, 0.4, 0.4],
)

def test_historical_forecast(self):
Expand Down