Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Apply statsmodels-based ARIMA/VARIMA to new TS #1036

Merged
merged 36 commits into from
Aug 8, 2022
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
91a8372
Added new base class and adjusted ARIMA + tests
piaz97 Jun 22, 2022
8a6bdce
[ARIMA] Added docstrings and tests
piaz97 Jun 22, 2022
76fa8ce
Adapted VARIMA as well + tests
piaz97 Jun 22, 2022
de892db
Keeping training state after forecasting new TS, refactoring
piaz97 Jun 24, 2022
684568f
Merge branch 'master' into feat/apply-arima-to-new-ts
piaz97 Jun 24, 2022
97bc489
Updated docstrings
piaz97 Jun 24, 2022
fd36b3a
Merge remote-tracking branch 'origin/feat/apply-arima-to-new-ts' into…
piaz97 Jun 24, 2022
53ce146
Fixed some formatting and added one last test
piaz97 Jun 24, 2022
2c62af2
Restored deleted check
piaz97 Jun 24, 2022
0eceb7a
Fixed a logic issue with current training_series param
piaz97 Jun 27, 2022
a0c1d8f
Merge branch 'master' into feat/apply-arima-to-new-ts
piaz97 Jun 27, 2022
c30aee2
Cleaning
piaz97 Jun 27, 2022
83aa416
Merge remote-tracking branch 'origin/feat/apply-arima-to-new-ts' into…
piaz97 Jun 27, 2022
3583acb
Merge branch 'master' into feat/apply-arima-to-new-ts
piaz97 Jun 27, 2022
1a96c53
Update darts/models/forecasting/forecasting_model.py
piaz97 Jun 28, 2022
1d0271d
Update darts/models/forecasting/forecasting_model.py
piaz97 Jun 28, 2022
6cb4676
Merge branch 'master' into feat/apply-arima-to-new-ts
hrzn Jun 29, 2022
ea54a0e
Merge branch 'master' into feat/apply-arima-to-new-ts
piaz97 Jul 11, 2022
107dbdd
Added VARIMA prob forecasting support
piaz97 Jul 12, 2022
d0b59fc
Merge remote-tracking branch 'origin/feat/apply-arima-to-new-ts' into…
piaz97 Jul 12, 2022
382c626
Merge branch 'master' into feat/apply-arima-to-new-ts
piaz97 Jul 12, 2022
5d54911
Added missing build.gradle
piaz97 Jul 12, 2022
10752f6
Merge branch 'master' into feat/apply-arima-to-new-ts
hrzn Jul 17, 2022
d132ec8
Merge branch 'master' into feat/apply-arima-to-new-ts
hrzn Jul 18, 2022
510786c
Merge branch 'master' into feat/apply-arima-to-new-ts
hrzn Jul 18, 2022
5493c8f
Merge branch 'master' into feat/apply-arima-to-new-ts
hrzn Jul 18, 2022
8c3e5ed
Merge branch 'master' into feat/apply-arima-to-new-ts
piaz97 Jul 21, 2022
3f544e9
Apply suggestions from code review (copy=False)
piaz97 Jul 21, 2022
20c30c8
Replaced ignore_axes -> ignore_axis
piaz97 Jul 21, 2022
d54618d
Added backtest with retrain=False support
piaz97 Jul 21, 2022
dbf72f2
Small fixes
piaz97 Jul 21, 2022
335fb32
Merge branch 'master' into feat/apply-arima-to-new-ts
piaz97 Jul 21, 2022
90883ef
Added some missing values(copy=False)
piaz97 Jul 21, 2022
f4dda3d
Merge branch 'master' into feat/apply-arima-to-new-ts
hrzn Aug 7, 2022
934b204
Merge branch 'master' into feat/apply-arima-to-new-ts
hrzn Aug 7, 2022
c712f22
Merge branch 'master' into feat/apply-arima-to-new-ts
hrzn Aug 8, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 32 additions & 5 deletions darts/models/forecasting/arima.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@
from statsmodels.tsa.arima.model import ARIMA as staARIMA

from darts.logging import get_logger
from darts.models.forecasting.forecasting_model import DualCovariatesForecastingModel
from darts.models.forecasting.forecasting_model import (
StatsmodelsDualCovariatesForecastingModel,
)
from darts.timeseries import TimeSeries

logger = get_logger(__name__)


class ARIMA(DualCovariatesForecastingModel):
class ARIMA(StatsmodelsDualCovariatesForecastingModel):
def __init__(
self,
p: int = 12,
Expand Down Expand Up @@ -66,10 +68,13 @@ def __str__(self):
return f"SARIMA{self.order}x{self.seasonal_order}"

def _fit(self, series: TimeSeries, future_covariates: Optional[TimeSeries] = None):

super()._fit(series, future_covariates)

# storing to restore the statsmodels model results object
self.training_historic_future_covariates = future_covariates

m = staARIMA(
self.training_series.values(),
series.values(),
piaz97 marked this conversation as resolved.
Show resolved Hide resolved
exog=future_covariates.values() if future_covariates else None,
order=self.order,
seasonal_order=self.seasonal_order,
Expand All @@ -82,6 +87,8 @@ def _fit(self, series: TimeSeries, future_covariates: Optional[TimeSeries] = Non
def _predict(
self,
n: int,
series: Optional[TimeSeries] = None,
historic_future_covariates: Optional[TimeSeries] = None,
future_covariates: Optional[TimeSeries] = None,
num_samples: int = 1,
) -> TimeSeries:
Expand All @@ -93,7 +100,18 @@ def _predict(
"your model."
)

super()._predict(n, future_covariates, num_samples)
super()._predict(
n, series, historic_future_covariates, future_covariates, num_samples
)

# updating statsmodels results object state with the new ts and covariates
if series is not None:
self.model = self.model.apply(
series.values(),
piaz97 marked this conversation as resolved.
Show resolved Hide resolved
exog=historic_future_covariates.values()
piaz97 marked this conversation as resolved.
Show resolved Hide resolved
if historic_future_covariates
else None,
)

if num_samples == 1:
forecast = self.model.forecast(
Expand All @@ -107,6 +125,15 @@ def _predict(
exog=future_covariates.values() if future_covariates else None,
)

# restoring statsmodels results object state
piaz97 marked this conversation as resolved.
Show resolved Hide resolved
if series is not None:
self.model = self.model.apply(
self._orig_training_series.values(),
exog=self.training_historic_future_covariates.values()
if self.training_historic_future_covariates
else None,
)

return self._build_forecast_series(forecast)

def _is_probabilistic(self) -> bool:
Expand Down
134 changes: 130 additions & 4 deletions darts/models/forecasting/forecasting_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1077,7 +1077,7 @@ class DualCovariatesForecastingModel(ForecastingModel, ABC):
Among other things, it lets Darts forecasting models wrap around statsmodels models
having a `future_covariates` parameter, which corresponds to future-known covariates.

All implementations have to implement the `fit()` and `predict()` methods defined below.
All implementations have to implement the `_fit()` and `_predict()` methods defined below.
"""

_expect_covariate = False
Expand Down Expand Up @@ -1131,6 +1131,7 @@ def predict(
n: int,
future_covariates: Optional[TimeSeries] = None,
num_samples: int = 1,
**kwargs,
) -> TimeSeries:
"""Forecasts values for `n` time steps after the end of the training series.

Expand All @@ -1153,8 +1154,7 @@ def predict(
TimeSeries, a single time series containing the `n` next points after then end of the training series.
"""

if future_covariates is None:
super().predict(n, num_samples)
super().predict(n, num_samples)

if self._expect_covariate and future_covariates is None:
raise_log(
Expand Down Expand Up @@ -1194,7 +1194,7 @@ def predict(
)

return self._predict(
n, future_covariates=future_covariates, num_samples=num_samples
n, future_covariates=future_covariates, num_samples=num_samples, **kwargs
)

@abstractmethod
Expand Down Expand Up @@ -1228,3 +1228,129 @@ def _predict_wrapper(
return self.predict(
n, future_covariates=future_covariates, num_samples=num_samples
)


class StatsmodelsDualCovariatesForecastingModel(DualCovariatesForecastingModel, ABC):
piaz97 marked this conversation as resolved.
Show resolved Hide resolved
"""The base class for the forecasting models that are not global, but support future covariates, and can
additionally be applied to new data unrelated to the original series used for fitting the model. Currently,
all the derives classes wrap statsmodels models.
piaz97 marked this conversation as resolved.
Show resolved Hide resolved

All implementations have to implement the `_fit()`, `_predict()` methods.
"""

def predict(
self,
n: int,
series: Optional[TimeSeries] = None,
future_covariates: Optional[TimeSeries] = None,
num_samples: int = 1,
**kwargs,
) -> TimeSeries:
"""If the `series` parameter is not set, forecasts values for `n` time steps after the end of the training
series. If some future covariates were specified during the training, they must also be specified here.

If the `series` parameter is set, forecasts values for `n` time steps after the end of the new target
series. If some future covariates were specified during the training, they must also be specified here.

Parameters
----------
n
Forecast horizon - the number of time steps after the end of the series for which to produce predictions.
series
Optionally, a new target series whose future values will be predicted. Defaults to `None`, meaning that the
model will forecast the future value of the training series.
future_covariates
The time series of future-known covariates which can be fed as input to the model. It must correspond to
the covariate time series that has been used with the :func:`fit()` method for training.

If `series` is not set, it must contain at least the next `n` time steps/indices after the end of the
training target series. If `series` is set, it must contain at least the time steps/indices corresponding
to the new target series (historic future covariates), plus the next `n` time steps/indices after the end.
num_samples
Number of times a prediction is sampled from a probabilistic model. Should be left set to 1
for deterministic models.

Returns
-------
TimeSeries, a single time series containing the `n` next points after then end of the training series.
"""

if self._expect_covariate and future_covariates is None:
raise_log(
ValueError(
"The model has been trained with `future_covariates` variable. Some matching "
"`future_covariates` variables have to be provided to `predict()`."
)
)

historic_future_covariates = None

if series is not None and future_covariates:
raise_if_not(
future_covariates.start_time() <= series.start_time()
and future_covariates.end_time() >= series.end_time() + n * series.freq,
"The provided `future_covariates` related to the new target series must contain at least the same time"
"steps/indices as the target `series` + `n`.",
logger,
)
# splitting the future covariates
(
historic_future_covariates,
future_covariates,
) = future_covariates.split_after(series.end_time())

# in case future covariate have more values on the left end side that we don't need
if not series.has_same_time_as(historic_future_covariates):
historic_future_covariates = historic_future_covariates.slice_intersect(
series
)

# DualCovariatesForecastingModel performs some checks on self.training_series. We temporary replace that with
# the new ts
if series is not None:
self._orig_training_series = self.training_series
self.training_series = series

result = super().predict(
n=n,
series=series,
historic_future_covariates=historic_future_covariates,
future_covariates=future_covariates,
num_samples=num_samples,
**kwargs,
)

# restoring the original training ts
if series is not None:
self.training_series = self._orig_training_series

return result

@abstractmethod
def _predict(
self,
n: int,
series: Optional[TimeSeries] = None,
historic_future_covariates: Optional[TimeSeries] = None,
future_covariates: Optional[TimeSeries] = None,
num_samples: int = 1,
) -> TimeSeries:
"""Forecasts values for a certain number of time steps after the end of the series.
ExtendedDualCovariatesModels must implement the predict logic in this method.
piaz97 marked this conversation as resolved.
Show resolved Hide resolved
"""
pass

def _predict_wrapper(
self,
n: int,
series: TimeSeries,
past_covariates: Optional[TimeSeries],
future_covariates: Optional[TimeSeries],
num_samples: int,
) -> TimeSeries:
return self.predict(
n=n,
series=series,
future_covariates=future_covariates,
num_samples=num_samples,
)
74 changes: 63 additions & 11 deletions darts/models/forecasting/varima.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@
from statsmodels.tsa.api import VARMAX as staVARMA

from darts.logging import get_logger, raise_if
from darts.models.forecasting.forecasting_model import DualCovariatesForecastingModel
from darts.models.forecasting.forecasting_model import (
StatsmodelsDualCovariatesForecastingModel,
)
from darts.timeseries import TimeSeries

logger = get_logger(__name__)


class VARIMA(DualCovariatesForecastingModel):
class VARIMA(StatsmodelsDualCovariatesForecastingModel):
def __init__(self, p: int = 1, d: int = 0, q: int = 0, trend: Optional[str] = None):
"""VARIMA

Expand Down Expand Up @@ -57,17 +59,24 @@ def __str__(self):
return f"VARMA({self.p},{self.q})"
return f"VARIMA({self.p},{self.d},{self.q})"

def fit(self, series: TimeSeries, future_covariates: Optional[TimeSeries] = None):
# for VARIMA we need to process target `series` before calling DualForecastingModels' fit() method
self._last_values = (
series.last_values()
) # needed for back-transformation when d=1
def _differentiate_series(self, series: TimeSeries) -> TimeSeries:
"""Differentiate the series self.d times"""
for _ in range(self.d):
series = TimeSeries.from_dataframe(
df=series.pd_dataframe(copy=False).diff().dropna(),
static_covariates=series.static_covariates,
hierarchy=series.hierarchy,
)
return series

def fit(self, series: TimeSeries, future_covariates: Optional[TimeSeries] = None):
# for VARIMA we need to process target `series` before calling StatsmodelsDualCovariatesForecastingModel'
# fit() method
self._last_values = (
series.last_values()
) # needed for back-transformation when d=1

series = self._differentiate_series(series)

super().fit(series, future_covariates)

Expand All @@ -77,12 +86,13 @@ def _fit(
self, series: TimeSeries, future_covariates: Optional[TimeSeries] = None
) -> None:
super()._fit(series, future_covariates)
series = self.training_series
future_covariates = future_covariates.values() if future_covariates else None

# storing to restore the statsmodels model results object
self.training_historic_future_covariates = future_covariates

m = staVARMA(
endog=series.pd_dataframe(copy=False),
exog=future_covariates,
exog=future_covariates.values() if future_covariates else None,
piaz97 marked this conversation as resolved.
Show resolved Hide resolved
order=(self.p, self.q),
trend=self.trend,
)
Expand All @@ -92,15 +102,57 @@ def _fit(
def _predict(
self,
n: int,
series: Optional[TimeSeries] = None,
historic_future_covariates: Optional[TimeSeries] = None,
future_covariates: Optional[TimeSeries] = None,
num_samples: int = 1,
) -> TimeSeries:

super()._predict(n, future_covariates, num_samples)
super()._predict(
n, series, historic_future_covariates, future_covariates, num_samples
)

if series is not None:
self._training_last_values = self._last_values
# store new _last_values of the new target series
self._last_values = (
series.last_values()
) # needed for back-transformation when d=1

series = self._differentiate_series(series)

# if the series is differentiated, the new len will be = len - 1, we have to adjust the future covariates
if historic_future_covariates and self.d > 0:
historic_future_covariates = historic_future_covariates.slice_intersect(
series
)

# updating statsmodels results object state
self.model = self.model.apply(
series.values(),
piaz97 marked this conversation as resolved.
Show resolved Hide resolved
exog=historic_future_covariates.values()
piaz97 marked this conversation as resolved.
Show resolved Hide resolved
if historic_future_covariates
else None,
)

# forecast before restoring the training state
forecast = self.model.forecast(
piaz97 marked this conversation as resolved.
Show resolved Hide resolved
steps=n, exog=future_covariates.values() if future_covariates else None
)

forecast = self._invert_transformation(forecast)

# restoring statsmodels results object state and last values
if series is not None:
self.model = self.model.apply(
self._orig_training_series.values(),
piaz97 marked this conversation as resolved.
Show resolved Hide resolved
exog=self.training_historic_future_covariates.values()
piaz97 marked this conversation as resolved.
Show resolved Hide resolved
if self.training_historic_future_covariates
else None,
)

self._last_values = self._training_last_values

return self._build_forecast_series(np.array(forecast))

def _invert_transformation(self, series_df: pd.DataFrame):
Expand Down
Loading