Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/static covs regression #1412

Merged
merged 19 commits into from
Dec 16, 2022
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions darts/models/forecasting/regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,8 +325,68 @@ def _create_lagged_data(
multi_models=self.multi_models,
)

training_samples = self._add_static_covariates(target_series, training_samples)

return training_samples, training_labels

def _add_static_covariates(self, series, features):
eliane-maalouf marked this conversation as resolved.
Show resolved Hide resolved
"""
Add static covariates to the features. Accounts for series with potentially different static covariates
by padding with 0 to accomodate for the maximum number of available static_covariates in any of the given
series in the sequence. If no static covariates are provided for a given series, its corresponding features
are padded with 0.
eliane-maalouf marked this conversation as resolved.
Show resolved Hide resolved
"""

series = series2seq(series)
reps = features.shape[0] // len(series)
# collect static covariates info
map = {"covs_width": [], "values": []}
for ts in series:
if ts.static_covariates is not None:
# reshape with order="F" to ensure that the covariates are read column wise
scovs = ts.static_covariates_values(copy=False).reshape(
1, -1, order="F"
)
map["covs_width"].append(scovs.shape[1])
map["values"].append(scovs)
else:
map["covs_width"].append(0)
map["values"].append(np.array([]))

max_width = max(map["covs_width"])

if max_width == 0:
if (
hasattr(self.model, "n_features_in_")
and self.model.n_features_in_ is not None
and self.model.n_features_in_ > features.shape[1]
):
# for when series in prediction do not have static covariates but some of the training series did
pad_zeros = np.zeros((1, self.model.n_features_in_ - features.shape[1]))
return np.concatenate(
[features, np.tile(pad_zeros, reps=(reps, 1))], axis=1
)
else:
return features

else:
# at least one series in the sequence has static covariates
static_covs = []

# build static covariates array
for i in range(len(series)):
pad_zeros = np.zeros((1, max_width - map["covs_width"][i]))
scovs = (
np.concatenate((map["values"][i], pad_zeros), axis=1)
if map["covs_width"][i] > 0
else pad_zeros
)
static_covs.append(np.tile(scovs, reps=(reps, 1)))
static_covs = np.concatenate(static_covs, axis=0)

# concatenate static covariates to features
return np.concatenate([features, static_covs], axis=1)

def _fit_model(
self,
target_series,
Expand Down Expand Up @@ -655,6 +715,8 @@ def predict(

# concatenate retrieved lags
X = np.concatenate(np_X, axis=1)
X = self._add_static_covariates(series, X)

# X has shape (n_series * n_samples, n_regression_features)
prediction = self._predict_and_sample(X, num_samples, **kwargs)
# prediction shape (n_series * n_samples, output_chunk_length, n_components)
Expand Down Expand Up @@ -687,6 +749,10 @@ def _predict_and_sample(
def __str__(self):
return self.model.__str__()

@staticmethod
def _supports_static_covariates() -> bool:
return True


class _LikelihoodMixin:
"""
Expand Down
79 changes: 79 additions & 0 deletions darts/tests/models/forecasting/test_regression_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
import pandas as pd
from sklearn.ensemble import HistGradientBoostingRegressor, RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder

import darts
from darts import TimeSeries
from darts.dataprocessing.encoders import (
FutureCyclicEncoder,
PastDatetimeAttributeEncoder,
)
from darts.dataprocessing.transformers import StaticCovariatesTransformer
from darts.logging import get_logger
from darts.metrics import mae, rmse
from darts.models import (
Expand Down Expand Up @@ -638,6 +640,83 @@ def test_prediction_data_creation(self):
[44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0],
)

def test_static_covs_addition(self):

static_covs1 = pd.DataFrame(
data={
"cont": [0.1, 0.2, 0.3],
"cat": ["a", "b", "c"], # should lead to 9 one-hot encoded columns
}
).astype(dtype={"cat": "category"})

static_covs2 = pd.DataFrame(data={"cont": [0.1, 0.2, 0.3]})

# default transformer_num = MinMaxScaler()
scaler = StaticCovariatesTransformer(transformer_cat=OneHotEncoder())
ref_series = tg.linear_timeseries(length=10)
series1 = TimeSeries.from_times_and_values(
times=ref_series.time_index,
values=np.concatenate([ref_series.values()] * 3, axis=1),
columns=["comp1", "comp2", "comp3"],
static_covariates=static_covs1,
)
series1 = scaler.fit_transform(series1)

series2 = TimeSeries.from_times_and_values(
times=ref_series.time_index,
values=np.concatenate([ref_series.values() * 100] * 3, axis=1),
columns=["comp1", "comp2", "comp3"],
static_covariates=static_covs2,
)

series3 = TimeSeries.from_times_and_values(
times=ref_series.time_index,
values=np.concatenate([ref_series.values() * 200] * 3, axis=1),
columns=["comp1", "comp2", "comp3"],
)

series4 = TimeSeries.from_times_and_values(
times=ref_series.time_index,
values=np.concatenate([ref_series.values()] * 3, axis=1),
columns=["comp1", "comp2", "comp3"],
)

reg_model = RegressionModel(lags=1, output_chunk_length=1)
all_series = [series1, series2, series3]
max_samples = 5
all_series_width = series1.n_components
max_scovs_width = max(
[
s.static_covariates_values(copy=False).reshape(1, -1).shape[1]
for s in all_series
if s.has_static_covariates
]
)

# no static covs
features = reg_model._create_lagged_data(
series3, None, None, max_samples_per_ts=max_samples
)[0]
self.assertEqual(features.shape, (5, 3))

# static covs with different dims
features = reg_model._create_lagged_data(
all_series, None, None, max_samples_per_ts=max_samples
)[0]
self.assertEqual(
features.shape,
(max_samples * len(all_series), all_series_width + max_scovs_width),
)

# no static covs at prediction but static covs at training
reg_model.fit(all_series)
pred_features = reg_model._create_lagged_data(
series4, None, None, max_samples_per_ts=1
)[
0
] # simulates features prep at prediction time
self.assertEqual(pred_features.shape, (1, all_series_width + max_scovs_width))
eliane-maalouf marked this conversation as resolved.
Show resolved Hide resolved

def test_models_runnability(self):
train_y, test_y = self.sine_univariate1.split_before(0.7)
multi_models_modes = [True, False]
Expand Down
2 changes: 1 addition & 1 deletion darts/utils/data/inference_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ def __init__(
use_static_covariates=use_static_covariates,
)

# This dataset is in charge of serving historic and future future covariates
# This dataset is in charge of serving historic and future covariates
self.ds_future = DualCovariatesInferenceDataset(
target_series=target_series,
covariates=future_covariates,
Expand Down