From 0481acdee373a4e48cd5b7c503dd4ede8e127014 Mon Sep 17 00:00:00 2001 From: eliane-maalouf <112691612+eliane-maalouf@users.noreply.github.com> Date: Sat, 3 Dec 2022 15:58:10 +0100 Subject: [PATCH 01/12] - adds support for static covariates with regression models - corrects typo in inference_dataset.py --- darts/models/forecasting/regression_model.py | 58 ++++++++++++++++++++ darts/utils/data/inference_dataset.py | 2 +- 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py index 1d5a8d7d2e..44aeda941a 100644 --- a/darts/models/forecasting/regression_model.py +++ b/darts/models/forecasting/regression_model.py @@ -28,6 +28,7 @@ from typing import List, Optional, Sequence, Tuple, Union import numpy as np +import pandas as pd from sklearn.linear_model import LinearRegression from darts.logging import get_logger, raise_if, raise_if_not, raise_log @@ -325,8 +326,59 @@ def _create_lagged_data( multi_models=self.multi_models, ) + training_samples = self._add_static_covariates(target_series, training_samples) + return training_samples, training_labels + def _add_static_covariates(self, series, features): + """Add static covariates to the features. Accounts for series with different static covariates by padding + with 0 to accomodate for the maximum number of available static_covariates in any of the given series in the + sequence. If no static covariates are provided for a given series, its corresponding features are padded with 0. + """ + reps = features.shape[0] // len(series) + # collect static covariates info + map = {"covs_width": [], "values": []} + for ts in series: + if ts.static_covariates is not None: + scovs = ts.static_covariates_values().reshape(1, -1) + map["covs_width"].append(scovs.shape[1]) + map["values"].append(scovs) + else: + map["covs_width"].append(0) + map["values"].append(np.array([])) + + max_width = max(map["covs_width"]) + + if max_width == 0 and self.model.n_features_in_ == features.shape[1]: + # model was not trained with static covariates + # and no static covariates in any of the series in the sequence + return features + elif max_width == 0 and self.model.n_features_in_ != features.shape[1]: + # model was trained with static covariates but is predicting on series without static covariates + pad_zeros = np.zeros((1, self.model.n_features_in_ - features.shape[1])) + features = np.concatenate( + [features, np.tile(pad_zeros, reps=(reps, 1))], axis=1 + ) + return features + else: + # at least one series in the sequence has static covariates + static_covs = [] + + # build static covariates array + for i in range(len(series)): + pad_zeros = np.zeros((1, max_width - map["covs_width"][i])) + scovs = ( + np.concatenate((map["values"][i], pad_zeros), axis=1) + if map["covs_width"][i] > 0 + else pad_zeros + ) + static_covs.append(np.tile(scovs, reps=(reps, 1))) + static_covs = np.concatenate(static_covs, axis=0) + + # concatenate static covariates to features + features = np.concatenate([features, static_covs], axis=1) + return features + def _fit_model( self, target_series, @@ -655,6 +707,8 @@ def predict( # concatenate retrieved lags X = np.concatenate(np_X, axis=1) + X = self._add_static_covariates(series, X) + # X has shape (n_series * n_samples, n_regression_features) prediction = self._predict_and_sample(X, num_samples, **kwargs) # prediction shape (n_series * n_samples, output_chunk_length, n_components) @@ -687,6 +741,10 @@ def _predict_and_sample( def __str__(self): return self.model.__str__() + @staticmethod + def _supports_static_covariates() -> bool: + return True + class _LikelihoodMixin: """ diff --git a/darts/utils/data/inference_dataset.py b/darts/utils/data/inference_dataset.py index 8885a2b9e9..1c75dde120 100644 --- a/darts/utils/data/inference_dataset.py +++ b/darts/utils/data/inference_dataset.py @@ -476,7 +476,7 @@ def __init__( use_static_covariates=use_static_covariates, ) - # This dataset is in charge of serving historic and future future covariates + # This dataset is in charge of serving historic and future covariates self.ds_future = DualCovariatesInferenceDataset( target_series=target_series, covariates=future_covariates, From fd1ebaa942f58d15e217bace518f9356bbbc0a3f Mon Sep 17 00:00:00 2001 From: eliane-maalouf <112691612+eliane-maalouf@users.noreply.github.com> Date: Sun, 4 Dec 2022 10:07:39 +0100 Subject: [PATCH 02/12] - support case where training with series with static covs but predicting on series without static covs. --- darts/models/forecasting/regression_model.py | 28 ++++++++++++-------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py index 44aeda941a..aa913bd04f 100644 --- a/darts/models/forecasting/regression_model.py +++ b/darts/models/forecasting/regression_model.py @@ -30,6 +30,8 @@ import numpy as np import pandas as pd from sklearn.linear_model import LinearRegression +from sklearn.utils.validation import check_is_fitted +from sklearn.exceptions import NotFittedError from darts.logging import get_logger, raise_if, raise_if_not, raise_log from darts.models.forecasting.forecasting_model import GlobalForecastingModel @@ -349,17 +351,21 @@ def _add_static_covariates(self, series, features): max_width = max(map["covs_width"]) - if max_width == 0 and self.model.n_features_in_ == features.shape[1]: - # model was not trained with static covariates - # and no static covariates in any of the series in the sequence - return features - elif max_width == 0 and self.model.n_features_in_ != features.shape[1]: - # model was trained with static covariates but is predicting on series without static covariates - pad_zeros = np.zeros((1, self.model.n_features_in_ - features.shape[1])) - features = np.concatenate( - [features, np.tile(pad_zeros, reps=(reps, 1))], axis=1 - ) - return features + if max_width == 0: + if ( + hasattr(self.model, "n_features_in_") + and self.model.n_features_in_ is not None + and self.model.n_features_in_ > features.shape[1] + ): + # for when series in prediction do not have static covariates but some of the training series did + pad_zeros = np.zeros((1, self.model.n_features_in_ - features.shape[1])) + features = np.concatenate( + [features, np.tile(pad_zeros, reps=(reps, 1))], axis=1 + ) + return features + else: + return features + else: # at least one series in the sequence has static covariates static_covs = [] From d9c64327406357dc9a99a946374560c68a690a79 Mon Sep 17 00:00:00 2001 From: eliane-maalouf <112691612+eliane-maalouf@users.noreply.github.com> Date: Tue, 6 Dec 2022 22:09:28 +0100 Subject: [PATCH 03/12] - removed unused imports --- darts/models/forecasting/regression_model.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py index aa913bd04f..88b0f3a801 100644 --- a/darts/models/forecasting/regression_model.py +++ b/darts/models/forecasting/regression_model.py @@ -28,10 +28,7 @@ from typing import List, Optional, Sequence, Tuple, Union import numpy as np -import pandas as pd from sklearn.linear_model import LinearRegression -from sklearn.utils.validation import check_is_fitted -from sklearn.exceptions import NotFittedError from darts.logging import get_logger, raise_if, raise_if_not, raise_log from darts.models.forecasting.forecasting_model import GlobalForecastingModel From db994a9f232654df74a762ef35ed9f678a273f42 Mon Sep 17 00:00:00 2001 From: eliane-maalouf <112691612+eliane-maalouf@users.noreply.github.com> Date: Wed, 14 Dec 2022 00:01:05 +0100 Subject: [PATCH 04/12] - reading order --- darts/models/forecasting/regression_model.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py index 88b0f3a801..43011cf2a3 100644 --- a/darts/models/forecasting/regression_model.py +++ b/darts/models/forecasting/regression_model.py @@ -330,16 +330,19 @@ def _create_lagged_data( return training_samples, training_labels def _add_static_covariates(self, series, features): - """Add static covariates to the features. Accounts for series with different static covariates by padding - with 0 to accomodate for the maximum number of available static_covariates in any of the given series in the - sequence. If no static covariates are provided for a given series, its corresponding features are padded with 0. + """ + Add static covariates to the features. Accounts for series with potentially different static covariates + by padding with 0 to accomodate for the maximum number of available static_covariates in any of the given + series in the sequence. If no static covariates are provided for a given series, its corresponding features + are padded with 0. """ reps = features.shape[0] // len(series) # collect static covariates info map = {"covs_width": [], "values": []} for ts in series: if ts.static_covariates is not None: - scovs = ts.static_covariates_values().reshape(1, -1) + # reshape with order="F" to ensure that the covariates are read column wise + scovs = ts.static_covariates_values(copy=False).reshape(1, -1, order = "F") map["covs_width"].append(scovs.shape[1]) map["values"].append(scovs) else: From 577a819692f64a35b12a458a60514a2ba456e51d Mon Sep 17 00:00:00 2001 From: eliane-maalouf <112691612+eliane-maalouf@users.noreply.github.com> Date: Wed, 14 Dec 2022 00:53:02 +0100 Subject: [PATCH 05/12] - unittest --- darts/models/forecasting/regression_model.py | 6 +- .../forecasting/test_regression_models.py | 77 +++++++++++++++++++ 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py index 43011cf2a3..cae35e0113 100644 --- a/darts/models/forecasting/regression_model.py +++ b/darts/models/forecasting/regression_model.py @@ -336,13 +336,17 @@ def _add_static_covariates(self, series, features): series in the sequence. If no static covariates are provided for a given series, its corresponding features are padded with 0. """ + + series = series2seq(series) reps = features.shape[0] // len(series) # collect static covariates info map = {"covs_width": [], "values": []} for ts in series: if ts.static_covariates is not None: # reshape with order="F" to ensure that the covariates are read column wise - scovs = ts.static_covariates_values(copy=False).reshape(1, -1, order = "F") + scovs = ts.static_covariates_values(copy=False).reshape( + 1, -1, order="F" + ) map["covs_width"].append(scovs.shape[1]) map["values"].append(scovs) else: diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py index 8ceb577a72..af25cafd00 100644 --- a/darts/tests/models/forecasting/test_regression_models.py +++ b/darts/tests/models/forecasting/test_regression_models.py @@ -7,6 +7,8 @@ import pandas as pd from sklearn.ensemble import HistGradientBoostingRegressor, RandomForestRegressor from sklearn.linear_model import LinearRegression +from sklearn.preprocessing import OneHotEncoder +from darts.dataprocessing.transformers import StaticCovariatesTransformer import darts from darts import TimeSeries @@ -638,6 +640,81 @@ def test_prediction_data_creation(self): [44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0], ) + def test_static_covs_addition(self): + + static_covs1 = pd.DataFrame( + data={ + "cont": [0.1, 0.2, 0.3], + "cat": ["a", "b", "c"], # should lead to 9 one-hot encoded columns + } + ).astype(dtype={"cat": "category"}) + + static_covs2 = pd.DataFrame(data={"cont": [0.1, 0.2, 0.3]}) + + # default transformer_num = MinMaxScaler() + scaler = StaticCovariatesTransformer(transformer_cat=OneHotEncoder()) + ref_series = tg.linear_timeseries(length=10) + series1 = TimeSeries.from_times_and_values( + times=ref_series.time_index, + values=np.concatenate([ref_series.values()] * 3, axis=1), + columns=["comp1", "comp2", "comp3"], + static_covariates=static_covs1, + ) + series1 = scaler.fit_transform(series1) + + series2 = TimeSeries.from_times_and_values( + times=ref_series.time_index, + values=np.concatenate([ref_series.values() * 100] * 3, axis=1), + columns=["comp1", "comp2", "comp3"], + static_covariates=static_covs2, + ) + + series3 = TimeSeries.from_times_and_values( + times=ref_series.time_index, + values=np.concatenate([ref_series.values() * 200] * 3, axis=1), + columns=["comp1", "comp2", "comp3"], + ) + + series4 = TimeSeries.from_times_and_values( + times=ref_series.time_index, + values=np.concatenate([ref_series.values()] * 3, axis=1), + columns=["comp1", "comp2", "comp3"], + ) + + reg_model = RegressionModel(lags=1, output_chunk_length=1) + all_series = [series1, series2, series3] + max_samples = 5 + all_series_width = series1.n_components + max_scovs_width = max( + [ + s.static_covariates_values(copy=False).reshape(1, -1).shape[1] + for s in all_series + if s.has_static_covariates + ] + ) + + # no static covs + features = reg_model._create_lagged_data( + series3, None, None, max_samples_per_ts=max_samples + )[0] + self.assertEqual(features.shape, (5, 3)) + + # static covs with different dims + features = reg_model._create_lagged_data( + all_series, None, None, max_samples_per_ts=max_samples + )[0] + self.assertEqual( + features.shape, + (max_samples * len(all_series), all_series_width + max_scovs_width), + ) + + # no static covs at prediction but static covs at training + reg_model.fit(all_series) + pred_features = reg_model._create_lagged_data( + series4, None, None, max_samples_per_ts=1 + )[0] # simulates features prep at prediction time + self.assertEqual(pred_features.shape, (1, all_series_width + max_scovs_width)) + def test_models_runnability(self): train_y, test_y = self.sine_univariate1.split_before(0.7) multi_models_modes = [True, False] From 4a28634de0933541a459ebd811b41eeb19ef2185 Mon Sep 17 00:00:00 2001 From: eliane-maalouf <112691612+eliane-maalouf@users.noreply.github.com> Date: Wed, 14 Dec 2022 00:55:38 +0100 Subject: [PATCH 06/12] - formatting --- darts/tests/models/forecasting/test_regression_models.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py index af25cafd00..ed83132e73 100644 --- a/darts/tests/models/forecasting/test_regression_models.py +++ b/darts/tests/models/forecasting/test_regression_models.py @@ -712,7 +712,9 @@ def test_static_covs_addition(self): reg_model.fit(all_series) pred_features = reg_model._create_lagged_data( series4, None, None, max_samples_per_ts=1 - )[0] # simulates features prep at prediction time + )[ + 0 + ] # simulates features prep at prediction time self.assertEqual(pred_features.shape, (1, all_series_width + max_scovs_width)) def test_models_runnability(self): From ee36f01c6c3d6bc5a250b5dbe93379fddfde5997 Mon Sep 17 00:00:00 2001 From: eliane-maalouf <112691612+eliane-maalouf@users.noreply.github.com> Date: Wed, 14 Dec 2022 00:57:44 +0100 Subject: [PATCH 07/12] - fix imports --- darts/tests/models/forecasting/test_regression_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py index ed83132e73..509c61daf4 100644 --- a/darts/tests/models/forecasting/test_regression_models.py +++ b/darts/tests/models/forecasting/test_regression_models.py @@ -8,7 +8,6 @@ from sklearn.ensemble import HistGradientBoostingRegressor, RandomForestRegressor from sklearn.linear_model import LinearRegression from sklearn.preprocessing import OneHotEncoder -from darts.dataprocessing.transformers import StaticCovariatesTransformer import darts from darts import TimeSeries @@ -16,6 +15,7 @@ FutureCyclicEncoder, PastDatetimeAttributeEncoder, ) +from darts.dataprocessing.transformers import StaticCovariatesTransformer from darts.logging import get_logger from darts.metrics import mae, rmse from darts.models import ( From ad44d30ea4b0272a7f6da12043ad9b1d54efaf9b Mon Sep 17 00:00:00 2001 From: Julien Herzen Date: Fri, 16 Dec 2022 17:00:41 +0100 Subject: [PATCH 08/12] small code simplification --- darts/models/forecasting/regression_model.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py index cae35e0113..798020387e 100644 --- a/darts/models/forecasting/regression_model.py +++ b/darts/models/forecasting/regression_model.py @@ -363,10 +363,9 @@ def _add_static_covariates(self, series, features): ): # for when series in prediction do not have static covariates but some of the training series did pad_zeros = np.zeros((1, self.model.n_features_in_ - features.shape[1])) - features = np.concatenate( + return np.concatenate( [features, np.tile(pad_zeros, reps=(reps, 1))], axis=1 ) - return features else: return features @@ -386,8 +385,7 @@ def _add_static_covariates(self, series, features): static_covs = np.concatenate(static_covs, axis=0) # concatenate static covariates to features - features = np.concatenate([features, static_covs], axis=1) - return features + return np.concatenate([features, static_covs], axis=1) def _fit_model( self, From c0d605401ed9288dfcbe09eeb7eff493b4638284 Mon Sep 17 00:00:00 2001 From: eliane-maalouf <112691612+eliane-maalouf@users.noreply.github.com> Date: Fri, 16 Dec 2022 17:56:35 +0100 Subject: [PATCH 09/12] moved _add_static_covariates() to tabularization.py --- darts/models/forecasting/regression_model.py | 64 +------------------- darts/utils/data/tabularization.py | 60 ++++++++++++++++++ 2 files changed, 63 insertions(+), 61 deletions(-) diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py index 798020387e..cb4c3355bc 100644 --- a/darts/models/forecasting/regression_model.py +++ b/darts/models/forecasting/regression_model.py @@ -33,7 +33,7 @@ from darts.logging import get_logger, raise_if, raise_if_not, raise_log from darts.models.forecasting.forecasting_model import GlobalForecastingModel from darts.timeseries import TimeSeries -from darts.utils.data.tabularization import _create_lagged_data +from darts.utils.data.tabularization import _create_lagged_data, _add_static_covariates from darts.utils.multioutput import MultiOutputRegressor from darts.utils.utils import _check_quantiles, seq2series, series2seq @@ -325,68 +325,10 @@ def _create_lagged_data( multi_models=self.multi_models, ) - training_samples = self._add_static_covariates(target_series, training_samples) + training_samples = _add_static_covariates(self.model, target_series, training_samples) return training_samples, training_labels - def _add_static_covariates(self, series, features): - """ - Add static covariates to the features. Accounts for series with potentially different static covariates - by padding with 0 to accomodate for the maximum number of available static_covariates in any of the given - series in the sequence. If no static covariates are provided for a given series, its corresponding features - are padded with 0. - """ - - series = series2seq(series) - reps = features.shape[0] // len(series) - # collect static covariates info - map = {"covs_width": [], "values": []} - for ts in series: - if ts.static_covariates is not None: - # reshape with order="F" to ensure that the covariates are read column wise - scovs = ts.static_covariates_values(copy=False).reshape( - 1, -1, order="F" - ) - map["covs_width"].append(scovs.shape[1]) - map["values"].append(scovs) - else: - map["covs_width"].append(0) - map["values"].append(np.array([])) - - max_width = max(map["covs_width"]) - - if max_width == 0: - if ( - hasattr(self.model, "n_features_in_") - and self.model.n_features_in_ is not None - and self.model.n_features_in_ > features.shape[1] - ): - # for when series in prediction do not have static covariates but some of the training series did - pad_zeros = np.zeros((1, self.model.n_features_in_ - features.shape[1])) - return np.concatenate( - [features, np.tile(pad_zeros, reps=(reps, 1))], axis=1 - ) - else: - return features - - else: - # at least one series in the sequence has static covariates - static_covs = [] - - # build static covariates array - for i in range(len(series)): - pad_zeros = np.zeros((1, max_width - map["covs_width"][i])) - scovs = ( - np.concatenate((map["values"][i], pad_zeros), axis=1) - if map["covs_width"][i] > 0 - else pad_zeros - ) - static_covs.append(np.tile(scovs, reps=(reps, 1))) - static_covs = np.concatenate(static_covs, axis=0) - - # concatenate static covariates to features - return np.concatenate([features, static_covs], axis=1) - def _fit_model( self, target_series, @@ -715,7 +657,7 @@ def predict( # concatenate retrieved lags X = np.concatenate(np_X, axis=1) - X = self._add_static_covariates(series, X) + X = _add_static_covariates(self.model, series, X) # X has shape (n_series * n_samples, n_regression_features) prediction = self._predict_and_sample(X, num_samples, **kwargs) diff --git a/darts/utils/data/tabularization.py b/darts/utils/data/tabularization.py index a9875bc7a7..6a58d37275 100644 --- a/darts/utils/data/tabularization.py +++ b/darts/utils/data/tabularization.py @@ -5,6 +5,7 @@ from darts.logging import raise_if from darts.timeseries import TimeSeries +from darts.utils.utils import series2seq def _create_lagged_data( @@ -161,3 +162,62 @@ def _create_lagged_data( X = np.concatenate(Xs, axis=0) y = np.concatenate(ys, axis=0) return X, y, Ts + + +def _add_static_covariates(model, series, features): + """ + Add static covariates to the features. Accounts for series with potentially different static covariates + by padding with 0 to accomodate for the maximum number of available static_covariates in any of the given + series in the sequence. If no static covariates are provided for a given series, its corresponding features + are padded with 0. + """ + + series = series2seq(series) + reps = features.shape[0] // len(series) + # collect static covariates info + map = {"covs_width": [], "values": []} + for ts in series: + if ts.static_covariates is not None: + # reshape with order="F" to ensure that the covariates are read column wise + scovs = ts.static_covariates_values(copy=False).reshape( + 1, -1, order="F" + ) + map["covs_width"].append(scovs.shape[1]) + map["values"].append(scovs) + else: + map["covs_width"].append(0) + map["values"].append(np.array([])) + + max_width = max(map["covs_width"]) + + if max_width == 0: + if ( + hasattr(model, "n_features_in_") + and model.n_features_in_ is not None + and model.n_features_in_ > features.shape[1] + ): + # for when series in prediction do not have static covariates but some of the training series did + pad_zeros = np.zeros((1, model.n_features_in_ - features.shape[1])) + return np.concatenate( + [features, np.tile(pad_zeros, reps=(reps, 1))], axis=1 + ) + else: + return features + + else: + # at least one series in the sequence has static covariates + static_covs = [] + + # build static covariates array + for i in range(len(series)): + pad_zeros = np.zeros((1, max_width - map["covs_width"][i])) + scovs = ( + np.concatenate((map["values"][i], pad_zeros), axis=1) + if map["covs_width"][i] > 0 + else pad_zeros + ) + static_covs.append(np.tile(scovs, reps=(reps, 1))) + static_covs = np.concatenate(static_covs, axis=0) + + # concatenate static covariates to features + return np.concatenate([features, static_covs], axis=1) \ No newline at end of file From 0c9c8c9414aa7fa9e07e5028a3d076a623632323 Mon Sep 17 00:00:00 2001 From: eliane-maalouf <112691612+eliane-maalouf@users.noreply.github.com> Date: Fri, 16 Dec 2022 18:09:40 +0100 Subject: [PATCH 10/12] update docstring about static covariates --- darts/models/forecasting/regression_model.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py index cb4c3355bc..e43120b4d3 100644 --- a/darts/models/forecasting/regression_model.py +++ b/darts/models/forecasting/regression_model.py @@ -1,7 +1,7 @@ """ Regression Model ---------------- -A `RegressionModel` forecasts future values of a target series based on lagged values of +A `RegressionModel` forecasts future values of a target series based on * The target series (past lags only) @@ -9,6 +9,8 @@ * An optional future_covariates series (possibly past and future lags) +* Available static covariates + The regression models are learned in a supervised way, and they can wrap around any "scikit-learn like" regression model acting on tabular data having ``fit()`` and ``predict()`` methods. @@ -21,6 +23,8 @@ The lags can be specified either using an integer - in which case it represents the _number_ of (past or future) lags to take into consideration, or as a list - in which case the lags have to be enumerated (strictly negative values denoting past lags and positive values including 0 denoting future lags). +When static covariates are present, they are appended to the lagged features. When mulitple time series are passed, +if their static covariates do not have the same size, the shorter ones are padded with 0 valued features. """ import math From cd89ead9a1e56b68ed808caef36b33eb0b459cdd Mon Sep 17 00:00:00 2001 From: eliane-maalouf <112691612+eliane-maalouf@users.noreply.github.com> Date: Fri, 16 Dec 2022 20:04:39 +0100 Subject: [PATCH 11/12] added accuracy test formatting and sorting imports --- darts/models/forecasting/regression_model.py | 6 +- .../forecasting/test_regression_models.py | 59 +++++++++++++++++++ darts/utils/data/tabularization.py | 6 +- 3 files changed, 65 insertions(+), 6 deletions(-) diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py index e43120b4d3..3d334f7a21 100644 --- a/darts/models/forecasting/regression_model.py +++ b/darts/models/forecasting/regression_model.py @@ -37,7 +37,7 @@ from darts.logging import get_logger, raise_if, raise_if_not, raise_log from darts.models.forecasting.forecasting_model import GlobalForecastingModel from darts.timeseries import TimeSeries -from darts.utils.data.tabularization import _create_lagged_data, _add_static_covariates +from darts.utils.data.tabularization import _add_static_covariates, _create_lagged_data from darts.utils.multioutput import MultiOutputRegressor from darts.utils.utils import _check_quantiles, seq2series, series2seq @@ -329,7 +329,9 @@ def _create_lagged_data( multi_models=self.multi_models, ) - training_samples = _add_static_covariates(self.model, target_series, training_samples) + training_samples = _add_static_covariates( + self.model, target_series, training_samples + ) return training_samples, training_labels diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py index 509c61daf4..eed3e1ef42 100644 --- a/darts/tests/models/forecasting/test_regression_models.py +++ b/darts/tests/models/forecasting/test_regression_models.py @@ -717,6 +717,65 @@ def test_static_covs_addition(self): ] # simulates features prep at prediction time self.assertEqual(pred_features.shape, (1, all_series_width + max_scovs_width)) + def test_static_cov_accuracy(self): + # based on : https://unit8co.github.io/darts/examples/15-static-covariates.html + + # given + period = 20 + sine_series = tg.sine_timeseries( + length=4 * period, + value_frequency=1 / period, + column_name="smooth", + freq="h", + ) + + sine_vals = sine_series.values() + linear_vals = np.expand_dims(np.linspace(1, -1, num=19), -1) + + sine_vals[21:40] = linear_vals + sine_vals[61:80] = linear_vals + irregular_series = TimeSeries.from_times_and_values( + values=sine_vals, times=sine_series.time_index, columns=["irregular"] + ) + + # no static covs + train_series_no_cov = [sine_series, irregular_series] + + # categorical static covs + sine_series_st_cat = sine_series.with_static_covariates( + pd.DataFrame(data={"curve_type": ["smooth"]}) + ) + irregular_series_st_cat = irregular_series.with_static_covariates( + pd.DataFrame(data={"curve_type": ["non_smooth"]}) + ) + train_series_static_cov = [sine_series_st_cat, irregular_series_st_cat] + + scaler = StaticCovariatesTransformer(transformer_cat=OneHotEncoder()) + train_series_static_cov = scaler.fit_transform(train_series_static_cov) + + # when + model_no_static_cov = RandomForest(lags=period // 2, bootstrap=False) + model_no_static_cov.fit(train_series_no_cov) + predict_series_no_cov = [series[:60] for series in train_series_no_cov] + pred_no_static_cov = model_no_static_cov.predict( + n=int(period / 2), series=predict_series_no_cov + ) + + model_static_cov = RandomForest(lags=period // 2, bootstrap=False) + model_static_cov.fit(train_series_static_cov) + predict_series_static_cov = [series[:60] for series in train_series_static_cov] + pred_static_cov = model_static_cov.predict( + n=int(period / 2), series=predict_series_static_cov + ) + + # then + for series, ps_no_st, ps_st_cat in zip( + train_series_static_cov, pred_no_static_cov, pred_static_cov + ): + rmses = [rmse(series, ps) for ps in [ps_no_st, ps_st_cat]] + + self.assertLess(rmses[1], rmses[0]) + def test_models_runnability(self): train_y, test_y = self.sine_univariate1.split_before(0.7) multi_models_modes = [True, False] diff --git a/darts/utils/data/tabularization.py b/darts/utils/data/tabularization.py index 6a58d37275..41ae0e1205 100644 --- a/darts/utils/data/tabularization.py +++ b/darts/utils/data/tabularization.py @@ -179,9 +179,7 @@ def _add_static_covariates(model, series, features): for ts in series: if ts.static_covariates is not None: # reshape with order="F" to ensure that the covariates are read column wise - scovs = ts.static_covariates_values(copy=False).reshape( - 1, -1, order="F" - ) + scovs = ts.static_covariates_values(copy=False).reshape(1, -1, order="F") map["covs_width"].append(scovs.shape[1]) map["values"].append(scovs) else: @@ -220,4 +218,4 @@ def _add_static_covariates(model, series, features): static_covs = np.concatenate(static_covs, axis=0) # concatenate static covariates to features - return np.concatenate([features, static_covs], axis=1) \ No newline at end of file + return np.concatenate([features, static_covs], axis=1) From fb6442151af4a7db19b9209669e30e298208359d Mon Sep 17 00:00:00 2001 From: eliane-maalouf <112691612+eliane-maalouf@users.noreply.github.com> Date: Fri, 16 Dec 2022 20:08:40 +0100 Subject: [PATCH 12/12] typo --- darts/models/forecasting/regression_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py index 3d334f7a21..f49980eda0 100644 --- a/darts/models/forecasting/regression_model.py +++ b/darts/models/forecasting/regression_model.py @@ -23,7 +23,7 @@ The lags can be specified either using an integer - in which case it represents the _number_ of (past or future) lags to take into consideration, or as a list - in which case the lags have to be enumerated (strictly negative values denoting past lags and positive values including 0 denoting future lags). -When static covariates are present, they are appended to the lagged features. When mulitple time series are passed, +When static covariates are present, they are appended to the lagged features. When multiple time series are passed, if their static covariates do not have the same size, the shorter ones are padded with 0 valued features. """