From a104deacfb84e2638bce7419c545b0bd8d777d70 Mon Sep 17 00:00:00 2001 From: Bernie Wang Date: Tue, 11 Feb 2020 14:22:44 -0800 Subject: [PATCH 01/44] more features for MQDNN, and Refactoring, remove of ts-fields from data-entry. --- src/gluonts/block/encoder.py | 23 ++-- .../model/seq2seq/_forking_estimator.py | 14 +- src/gluonts/model/seq2seq/_forking_network.py | 14 +- .../model/seq2seq/_mq_dnn_estimator.py | 95 ++++++++++--- .../model/seq2seq/_seq2seq_estimator.py | 2 +- src/gluonts/model/seq2seq/_transform.py | 130 +++++++++++------- test/model/seq2seq/test_encoders.py | 20 ++- .../seq2seq/test_forking_sequence_splitter.py | 87 ++++++++++-- 8 files changed, 279 insertions(+), 106 deletions(-) diff --git a/src/gluonts/block/encoder.py b/src/gluonts/block/encoder.py index 5245639b2e..2a3e4ee3a6 100644 --- a/src/gluonts/block/encoder.py +++ b/src/gluonts/block/encoder.py @@ -93,7 +93,7 @@ def _assemble_inputs( target target time series, - shape (batch_size, sequence_length) + shape (batch_size, sequence_length, 1) static_features static features, @@ -111,7 +111,6 @@ def _assemble_inputs( num_static_features + num_dynamic_features + 1) """ - target = target.expand_dims(axis=-1) # (N, T, 1) helper_ones = F.ones_like(target) # Ones of (N, T, 1) tiled_static_features = F.batch_dot( @@ -156,7 +155,8 @@ def __init__( kernel_size_seq: List[int], channels_seq: List[int], use_residual: bool = False, - use_covariates: bool = False, + use_static_feat: bool = False, + use_dynamic_feat: bool = False, **kwargs, ) -> None: assert all( @@ -172,7 +172,8 @@ def __init__( super().__init__(**kwargs) self.use_residual = use_residual - self.use_covariates = use_covariates + self.use_static_feat = use_static_feat + self.use_dynamic_feat = use_dynamic_feat self.cnn = nn.HybridSequential() it = zip(channels_seq, kernel_size_seq, dilation_seq) @@ -203,7 +204,7 @@ def hybrid_forward( target target time series, - shape (batch_size, sequence_length) + shape (batch_size, sequence_length, 1) static_features static features, @@ -224,13 +225,17 @@ def hybrid_forward( shape (batch_size, sequence_length, num_dynamic_features) """ - if self.use_covariates: + if self.use_dynamic_feat and self.use_static_feat: inputs = Seq2SeqEncoder._assemble_inputs( F, target=target, static_features=static_features, dynamic_features=dynamic_features, ) + elif self.use_dynamic_feat: + inputs = F.concat( + target, dynamic_features, dim=2 + ) # (N, T, C) else: inputs = target @@ -302,7 +307,7 @@ def hybrid_forward( target target time series, - shape (batch_size, sequence_length) + shape (batch_size, sequence_length, 1) static_features static features, @@ -442,7 +447,7 @@ def hybrid_forward( target target time series, - shape (batch_size, sequence_length) + shape (batch_size, sequence_length, 1) static_features static features, @@ -473,4 +478,4 @@ def hybrid_forward( F.slice_axis(dynamic_code, axis=1, begin=-1, end=None), axis=1 ) - return static_code, dynamic_code + return static_code, dynamic_code \ No newline at end of file diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index 207d4a5126..9f05f7aefd 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -29,12 +29,14 @@ from gluonts.trainer import Trainer from gluonts.transform import ( AsNumpyArray, + AddAgeFeature, Chain, TestSplitSampler, Transformation, ) # Relative imports +# from transform import AddAgeFeature from ._forking_network import ( ForkingSeq2SeqPredictionNetwork, ForkingSeq2SeqTrainingNetwork, @@ -115,11 +117,21 @@ def __init__( def create_transformation(self) -> Transformation: return Chain( trans=[ - AsNumpyArray(field=FieldName.TARGET, expected_ndim=1), + AsNumpyArray( + field=FieldName.TARGET, expected_ndim=1, dtype=self.dtype + ), + AddAgeFeature( + target_field=FieldName.TARGET, + output_field=FieldName.FEAT_DYNAMIC_REAL, + log_scale=True, + pred_length=self.prediction_length, + dtype=self.dtype, + ), ForkingSequenceSplitter( train_sampler=TestSplitSampler(), enc_len=self.context_length, dec_len=self.prediction_length, + encoder_series_fields=[FieldName.FEAT_DYNAMIC_REAL], ), ] ) diff --git a/src/gluonts/model/seq2seq/_forking_network.py b/src/gluonts/model/seq2seq/_forking_network.py index 478c720435..359bc5a9ea 100644 --- a/src/gluonts/model/seq2seq/_forking_network.py +++ b/src/gluonts/model/seq2seq/_forking_network.py @@ -67,7 +67,7 @@ def __init__( class ForkingSeq2SeqTrainingNetwork(ForkingSeq2SeqNetworkBase): # noinspection PyMethodOverriding def hybrid_forward( - self, F, past_target: Tensor, future_target: Tensor + self, F, past_target: Tensor, past_feat_dynamic_real: Tensor, future_target: Tensor ) -> Tensor: """ Parameters @@ -84,9 +84,13 @@ def hybrid_forward( loss with shape (FIXME, FIXME) """ + # print(f"past target: {past_target.shape}") + # print(f"past_feat_dynamic_real: {past_feat_dynamic_real.shape}") + # print(f"future_target: {future_target.shape}") + # FIXME: can we factor out a common prefix in the base network? feat_static_real = nd_None - past_feat_dynamic_real = nd_None + # past_feat_dynamic_real = nd_None future_feat_dynamic_real = nd_None enc_output_static, enc_output_dynamic = self.encoder( @@ -100,13 +104,15 @@ def hybrid_forward( dec_output = self.decoder(dec_input_dynamic, dec_input_static) dec_dist_output = self.quantile_proj(dec_output) + # print(f"decoder output: {dec_dist_output.shape}") + loss = self.loss(future_target, dec_dist_output) return loss.mean(axis=1) class ForkingSeq2SeqPredictionNetwork(ForkingSeq2SeqNetworkBase): # noinspection PyMethodOverriding - def hybrid_forward(self, F, past_target: Tensor) -> Tensor: + def hybrid_forward(self, F, past_target: Tensor, past_feat_dynamic_real: Tensor) -> Tensor: """ Parameters ---------- @@ -122,7 +128,7 @@ def hybrid_forward(self, F, past_target: Tensor) -> Tensor: # FIXME: can we factor out a common prefix in the base network? feat_static_real = nd_None - past_feat_dynamic_real = nd_None + # past_feat_dynamic_real = nd_None future_feat_dynamic_real = nd_None enc_output_static, enc_output_dynamic = self.encoder( diff --git a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py index d7a8854258..c89854dca1 100644 --- a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py +++ b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py @@ -15,6 +15,7 @@ from typing import List, Optional # First-party imports +from gluonts.evaluation.backtest import make_evaluation_predictions from gluonts.block.decoder import ForkingMLPDecoder from gluonts.block.encoder import ( HierarchicalCausalConv1DEncoder, @@ -26,7 +27,10 @@ from gluonts.trainer import Trainer # Relative imports -from ._forking_estimator import ForkingSeq2SeqEstimator +from gluonts.model.seq2seq._forking_estimator import ForkingSeq2SeqEstimator +from gluonts.evaluation import Evaluator +import numpy as np +import mxnet as mx class MQDNNEstimator(ForkingSeq2SeqEstimator): @@ -44,9 +48,7 @@ def __init__( context_length: Optional[int], prediction_length: int, freq: str, - # FIXME: why do we have two parameters here? - mlp_final_dim: int = 20, - mlp_hidden_dimension_seq: List[int] = list(), + decoder_mlp_dim_seq: List[int] = [20], quantiles: List[float] = list(), trainer: Trainer = Trainer(), ) -> None: @@ -54,13 +56,13 @@ def __init__( prediction_length if context_length is None else context_length ) assert all( - [d > 0 for d in mlp_hidden_dimension_seq] + [d > 0 for d in decoder_mlp_dim_seq] ), "Elements of `mlp_hidden_dimension_seq` should be > 0" decoder = ForkingMLPDecoder( dec_len=prediction_length, - final_dim=mlp_final_dim, - hidden_dimension_sequence=mlp_hidden_dimension_seq, + final_dim=decoder_mlp_dim_seq[-1], + hidden_dimension_sequence=decoder_mlp_dim_seq[:-1], prefix="decoder_", ) @@ -89,25 +91,40 @@ def __init__( prediction_length: int, freq: str, context_length: Optional[int] = None, - # FIXME: prefix those so clients know that these are decoder params - mlp_final_dim: int = 20, - mlp_hidden_dimension_seq: List[int] = list(), + seed: Optional[int] = None, + decoder_mlp_dim_seq: List[int] = [20], + channels_seq: List[int] = [30, 30, 30], + dilation_seq: List[int] = [1, 3, 9], + kernel_size_seq: List[int] = [3, 3, 3], + use_residual: bool = True, quantiles: List[float] = list( [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] ), trainer: Trainer = Trainer(), ) -> None: + + if seed: + np.random.seed(seed) + mx.random.seed(seed) + + assert ( + len(channels_seq) == len(dilation_seq) == len(kernel_size_seq) + ), ( + f"mismatch CNN configurations: {len(channels_seq)} vs. " + f"{len(dilation_seq)} vs. {len(kernel_size_seq)}" + ) + encoder = HierarchicalCausalConv1DEncoder( - dilation_seq=[1, 3, 9], - kernel_size_seq=([3] * len([30, 30, 30])), - channels_seq=[30, 30, 30], - use_residual=True, + dilation_seq=dilation_seq, + kernel_size_seq=channels_seq, + channels_seq=kernel_size_seq, + use_residual=use_residual, + use_dynamic_feat=True, prefix="encoder_", ) super(MQCNNEstimator, self).__init__( encoder=encoder, - mlp_final_dim=mlp_final_dim, - mlp_hidden_dimension_seq=mlp_hidden_dimension_seq, + decoder_mlp_dim_seq=decoder_mlp_dim_seq, freq=freq, prediction_length=prediction_length, trainer=trainer, @@ -128,9 +145,7 @@ def __init__( prediction_length: int, freq: str, context_length: Optional[int] = None, - # FIXME: prefix those so clients know that these are decoder params - mlp_final_dim: int = 20, - mlp_hidden_dimension_seq: List[int] = list(), + decoder_mlp_dim_seq: List[int] = [20], trainer: Trainer = Trainer(), quantiles: List[float] = list([0.1, 0.5, 0.9]), ) -> None: @@ -143,11 +158,49 @@ def __init__( ) super(MQRNNEstimator, self).__init__( encoder=encoder, - mlp_final_dim=mlp_final_dim, - mlp_hidden_dimension_seq=mlp_hidden_dimension_seq, + decoder_mlp_dim_seq=decoder_mlp_dim_seq, freq=freq, prediction_length=prediction_length, trainer=trainer, context_length=context_length, quantiles=quantiles, ) + + +if __name__ == "__main__": + from gluonts.dataset.repository.datasets import ( + get_dataset, + dataset_recipes, + ) + + print(f"datasets available: {dataset_recipes.keys()}") + + # we pick m4_hourly as it only contains a few hundred time series + dataset = get_dataset("m4_hourly", regenerate=False) + + metrics = [] + + for _ in range(1): + estimator = MQCNNEstimator( + prediction_length=dataset.metadata.prediction_length, + seed=42, + freq=dataset.metadata.freq, + quantiles=[0.5], + trainer=Trainer( + epochs=1, num_batches_per_epoch=10, hybridize=True + ), + ) + + predictor = estimator.train(dataset.train) + + forecast_it, ts_it = make_evaluation_predictions( + dataset.test, predictor=predictor, num_samples=100 + ) + + agg_metrics, item_metrics = Evaluator()( + ts_it, forecast_it, num_series=len(dataset.test) + ) + + metrics.append(agg_metrics["wQuantileLoss[0.5]"]) + + print(metrics) diff --git a/src/gluonts/model/seq2seq/_seq2seq_estimator.py b/src/gluonts/model/seq2seq/_seq2seq_estimator.py index c3a85e9b2d..fd1b0ba90d 100644 --- a/src/gluonts/model/seq2seq/_seq2seq_estimator.py +++ b/src/gluonts/model/seq2seq/_seq2seq_estimator.py @@ -278,7 +278,7 @@ def __init__( kernel_size_seq=([3] * len([30, 30, 30])), channels_seq=[30, 30, 30], use_residual=True, - use_covariates=True, + use_dynamic_feat=True, ) super(CNN2QRForecaster, self).__init__( diff --git a/src/gluonts/model/seq2seq/_transform.py b/src/gluonts/model/seq2seq/_transform.py index 06ce4ac918..8a147b0670 100644 --- a/src/gluonts/model/seq2seq/_transform.py +++ b/src/gluonts/model/seq2seq/_transform.py @@ -12,6 +12,7 @@ # permissions and limitations under the License. # Standard library imports +from collections import Counter from typing import Iterator, List # Third-party imports @@ -23,6 +24,16 @@ from gluonts.transform import FlatMapTransformation, shift_timestamp +def pad_to_size(xs, size): + """Pads `xs` with 0 on the left on the last axis.""" + pad_length = size - xs.shape[-1] + if pad_length <= 0: + return xs + + pad_width = ([(0, 0)] * (xs.ndim - 1)) + [(pad_length, 0)] + return np.pad(xs, pad_width) + + class ForkingSequenceSplitter(FlatMapTransformation): """Forking sequence splitter.""" @@ -32,12 +43,14 @@ def __init__( train_sampler, enc_len: int, dec_len: int, - time_series_fields: List[str] = None, - target_in="target", + target_in: str = "target", + encoder_series_fields: List[str] = None, + decoder_series_fields: List[str] = [], is_pad_out: str = "is_pad", - start_in: str = "start", - forecast_start_out: str = "forecast_start", + start_input_field: str = "start", + forecast_start_output_field: str = "forecast_start", ) -> None: + assert enc_len > 0, "The value of `enc_len` should be > 0" assert dec_len > 0, "The value of `dec_len` should be > 0" @@ -45,12 +58,13 @@ def __init__( self.enc_len = enc_len self.dec_len = dec_len self.ts_fields = ( - time_series_fields if time_series_fields is not None else [] + encoder_series_fields if encoder_series_fields is not None else [] ) self.target_in = target_in self.is_pad_out = is_pad_out - self.start_in = start_in - self.forecast_start_out = forecast_start_out + self.start_in = start_input_field + self.forecast_start_out = forecast_start_output_field + self.decoder_series_fields = decoder_series_fields def _past(self, col_name): return f"past_{col_name}" @@ -61,63 +75,77 @@ def _future(self, col_name): def flatmap_transform( self, data: DataEntry, is_train: bool ) -> Iterator[DataEntry]: - dec_len = self.dec_len - slice_cols = self.ts_fields + [self.target_in] target = data[self.target_in] if is_train: + # We currently cannot handle time series that are shorter than the + # prediction length during training, so we just skip these. + # If we want to include them we would need to pad and to mask + # the loss. if len(target) < self.dec_len: - # We currently cannot handle time series that are shorter than the - # prediction length during training, so we just skip these. - # If we want to include them we would need to pad and to mask - # the loss. - sampling_indices: List[int] = [] - else: - sampling_indices = self.train_sampler( - target, 0, len(target) - self.dec_len - ) + return + + sampling_indices = self.train_sampler( + target, 0, len(target) - self.dec_len + ) else: sampling_indices = [len(target)] - for i in sampling_indices: - pad_length = max(self.enc_len - i, 0) - - d = data.copy() - for ts_field in slice_cols: - if i > self.enc_len: - # truncate to past_length - past_piece = d[ts_field][..., i - self.enc_len : i] - elif i < self.enc_len: - pad_block = np.zeros( - d[ts_field].shape[:-1] + (pad_length,) - ) - past_piece = np.concatenate( - [pad_block, d[ts_field][..., :i]], axis=-1 - ) + decoder_fields = set([self.target_in] + self.decoder_series_fields) + + ts_fields_counter = Counter( + self.ts_fields + [self.target_in] + self.decoder_series_fields + ) + + for sampling_idx in sampling_indices: + # ensure start index is not negative + start_idx = max(0, sampling_idx - self.enc_len) + + out = data.copy() + + for ts_field in list(ts_fields_counter.keys()): + + # target is 1d, this ensures ts is always 2d + ts = np.atleast_2d(out[ts_field]) + + if ts_fields_counter[ts_field] == 1: + del out[ts_field] else: - past_piece = d[ts_field][..., :i] + ts_fields_counter[ts_field] -= 1 + + # take enc_len values from ts, depending on sampling_idx + slice = ts[:, start_idx:sampling_idx] - d[self._past(ts_field)] = np.expand_dims(past_piece, -1) + # if we have less than enc_len values, pad_left with 0 + past_piece = pad_to_size(slice, self.enc_len) - if is_train and ts_field is self.target_in: + out[f"past_{ts_field}"] = past_piece.transpose() + + # in prediction mode, don't provide decode-values + if not is_train and ts_field == self.target_in: + continue + + if ts_field in decoder_fields: + d3 = () if ts_field == self.target_in else (len(ts),) forking_dec_field = np.zeros( - shape=(self.enc_len, self.dec_len) + shape=(self.enc_len, self.dec_len) + d3 ) - for j in range(self.enc_len): - start_idx = i - self.enc_len + j + 1 - if start_idx >= 0: - forking_dec_field[j, :] = d[ts_field][ - ..., start_idx : start_idx + dec_len - ] - - d[self._future(ts_field)] = forking_dec_field + skip = max(0, self.enc_len - 1 - sampling_idx) + for dec_field, idx in zip( + forking_dec_field[skip:], range(start_idx) + ): + dec_field[:] = ts[:, idx : idx + self.dec_len] - del d[ts_field] + out[self._future(ts_field)] = forking_dec_field pad_indicator = np.zeros(self.enc_len) - if pad_length > 0: - pad_indicator[:pad_length] = 1 - d[self._past(self.is_pad_out)] = pad_indicator - d[self.forecast_start_out] = shift_timestamp(d[self.start_in], i) - yield d + pad_length = max(0, self.enc_len - sampling_idx) + pad_indicator[:pad_length] = True + out[f"past_{self.is_pad_out}"] = pad_indicator + + out[self.forecast_start_out] = shift_timestamp( + out[self.start_in], sampling_idx + ) + + yield out diff --git a/test/model/seq2seq/test_encoders.py b/test/model/seq2seq/test_encoders.py index 7b8e594119..96063864de 100644 --- a/test/model/seq2seq/test_encoders.py +++ b/test/model/seq2seq/test_encoders.py @@ -21,20 +21,30 @@ nd_None = nd.array([]) -@pytest.mark.skip() -def test_hierarchical_cnn_encoders() -> None: +@pytest.mark.parametrize("use_residual", [True, False]) +@pytest.mark.parametrize("hybridize", [True, False]) +def test_hierarchical_cnn_encoders(use_residual, hybridize) -> None: num_ts = 2 ts_len = 10 + num_static_feat = 2 + num_dynamic_feat = 5 + test_data = nd.arange(num_ts * ts_len).reshape(shape=(num_ts, ts_len, 1)) + test_static_feat = nd.random.randn(num_ts, num_static_feat) + test_dynamic_feat = nd.random.randn(num_ts, ts_len, num_dynamic_feat) chl_dim = [30, 30, 30] ks_seq = [3] * len(chl_dim) dial_seq = [1, 3, 9] cnn = HierarchicalCausalConv1DEncoder( - dial_seq, ks_seq, chl_dim, use_residual=True + dial_seq, ks_seq, chl_dim, use_residual, use_dynamic_feat=True ) cnn.collect_params().initialize() - cnn.hybridize() - print(cnn(test_data, nd_None, nd_None)[1].shape) + if hybridize: + cnn.hybridize() + + true_shape = (num_ts, ts_len, 31) if use_residual else (num_ts, ts_len, 30) + + assert cnn(test_data, test_static_feat, test_dynamic_feat)[1].shape == true_shape \ No newline at end of file diff --git a/test/model/seq2seq/test_forking_sequence_splitter.py b/test/model/seq2seq/test_forking_sequence_splitter.py index f4bc9f37ec..2798074bdd 100644 --- a/test/model/seq2seq/test_forking_sequence_splitter.py +++ b/test/model/seq2seq/test_forking_sequence_splitter.py @@ -15,6 +15,8 @@ import numpy as np # First-party imports +import pytest + from gluonts import transform from gluonts.dataset.common import ListDataset from gluonts.dataset.field_names import FieldName @@ -22,23 +24,22 @@ # if we import TestSplitSampler as Test... pytest thinks it's a test from gluonts.transform import TestSplitSampler as TSplitSampler +from gluonts.time_feature import time_features_from_frequency_str -def test_forking_sequence_splitter() -> None: - def make_dataset(N, train_length): - # generates 2 ** N - 1 timeseries with constant increasing values - n = 2 ** N - 1 +def make_dataset(N, train_length): + # generates 2 ** N - 1 timeseries with constant increasing values + n = 2 ** N - 1 - targets = np.arange(n * train_length).reshape((n, train_length)) + targets = np.arange(n * train_length).reshape((n, train_length)) - return ListDataset( - [ - {"start": "2012-01-01", "target": targets[i, :]} - for i in range(n) - ], - freq="D", - ) + return ListDataset( + [{"start": "2012-01-01", "target": targets[i, :]} for i in range(n)], + freq="D", + ) + +def test_forking_sequence_splitter() -> None: ds = make_dataset(1, 20) trans = transform.Chain( @@ -50,9 +51,9 @@ def make_dataset(N, train_length): ), ForkingSequenceSplitter( train_sampler=TSplitSampler(), - time_series_fields=["age"], enc_len=5, dec_len=3, + encoder_series_fields=["age"], ), ] ) @@ -84,9 +85,9 @@ def make_dataset(N, train_length): ), ForkingSequenceSplitter( train_sampler=TSplitSampler(), - time_series_fields=["age"], enc_len=20, dec_len=20, + encoder_series_fields=["age"], ), ] ) @@ -97,3 +98,61 @@ def make_dataset(N, train_length): np.sum(transformed_data_oob["future_target"]) - np.sum(np.arange(20)) < 1e-5 ), "the forking sequence target should be computed correctly." + + +@pytest.mark.parametrize("is_train", [True, False]) +def test_forking_sequence_with_features(is_train) -> None: + def make_dataset(N, train_length): + # generates 2 ** N - 1 timeseries with constant increasing values + n = 2 ** N - 1 + + targets = np.arange(n * train_length).reshape((n, train_length)) + + return ListDataset( + [ + {"start": "2012-01-01", "target": targets[i, :]} + for i in range(n) + ], + freq="D", + ) + + ds = make_dataset(1, 20) + + trans = transform.Chain( + trans=[ + transform.AddAgeFeature( + target_field=FieldName.TARGET, + output_field=FieldName.FEAT_AGE, + pred_length=10, + ), + transform.AddTimeFeatures( + start_field=FieldName.START, + target_field=FieldName.TARGET, + output_field=FieldName.FEAT_TIME, + time_features=time_features_from_frequency_str("D"), + pred_length=10, + ), + ForkingSequenceSplitter( + train_sampler=TSplitSampler(), + enc_len=5, + dec_len=3, + target_in=FieldName.TARGET, + encoder_series_fields=[ + FieldName.FEAT_AGE, + FieldName.FEAT_TIME, + ], + decoder_series_fields=[FieldName.FEAT_TIME], + ), + ] + ) + + out = trans(iter(ds), is_train=is_train) + transformed_data = next(iter(out)) + + assert transformed_data["past_target"].shape == (5, 1) + assert transformed_data["past_feat_dynamic_age"].shape == (5, 1) + assert transformed_data["past_time_feat"].shape == (5, 3) + assert transformed_data["future_time_feat"].shape == (5, 3, 3) + + if is_train: + assert transformed_data["future_target"].shape == (5, 3) From 56b150bb0cc171c0176c74078f4953d08b86927e Mon Sep 17 00:00:00 2001 From: Bernie Wang Date: Mon, 17 Feb 2020 17:21:00 -0800 Subject: [PATCH 02/44] fix the future target calculation --- src/gluonts/model/seq2seq/_transform.py | 3 +- .../seq2seq/test_forking_sequence_splitter.py | 38 +++++++------------ 2 files changed, 15 insertions(+), 26 deletions(-) diff --git a/src/gluonts/model/seq2seq/_transform.py b/src/gluonts/model/seq2seq/_transform.py index 8a147b0670..dc0ff84664 100644 --- a/src/gluonts/model/seq2seq/_transform.py +++ b/src/gluonts/model/seq2seq/_transform.py @@ -133,7 +133,8 @@ def flatmap_transform( skip = max(0, self.enc_len - 1 - sampling_idx) for dec_field, idx in zip( - forking_dec_field[skip:], range(start_idx) + forking_dec_field[skip:], + range(start_idx + 1, start_idx + self.enc_len + 1), ): dec_field[:] = ts[:, idx : idx + self.dec_len] diff --git a/test/model/seq2seq/test_forking_sequence_splitter.py b/test/model/seq2seq/test_forking_sequence_splitter.py index 2798074bdd..2087f86551 100644 --- a/test/model/seq2seq/test_forking_sequence_splitter.py +++ b/test/model/seq2seq/test_forking_sequence_splitter.py @@ -40,19 +40,22 @@ def make_dataset(N, train_length): def test_forking_sequence_splitter() -> None: - ds = make_dataset(1, 20) + len_ts = 20 + ds = make_dataset(1, len_ts) + enc_len = 5 + dec_len = 3 trans = transform.Chain( trans=[ transform.AddAgeFeature( target_field=FieldName.TARGET, output_field="age", - pred_length=10, + pred_length=dec_len, ), ForkingSequenceSplitter( train_sampler=TSplitSampler(), - enc_len=5, - dec_len=3, + enc_len=enc_len, + dec_len=dec_len, encoder_series_fields=["age"], ), ] @@ -70,34 +73,19 @@ def test_forking_sequence_splitter() -> None: [17.0, 18.0, 19.0], ] ) - assert ( np.linalg.norm(future_target - transformed_data["future_target"]) < 1e-5 ), "the forking sequence target should be computed correctly." - trans_oob = transform.Chain( - trans=[ - transform.AddAgeFeature( - target_field=FieldName.TARGET, - output_field="age", - pred_length=10, - ), - ForkingSequenceSplitter( - train_sampler=TSplitSampler(), - enc_len=20, - dec_len=20, - encoder_series_fields=["age"], - ), - ] - ) - - transformed_data_oob = next(iter(trans_oob(iter(ds), is_train=True))) - + age = np.log10(2.0 + np.arange(len_ts)) assert ( - np.sum(transformed_data_oob["future_target"]) - np.sum(np.arange(20)) + np.linalg.norm( + age[-(enc_len + dec_len) : -dec_len] + - transformed_data["past_age"].flatten() + ) < 1e-5 - ), "the forking sequence target should be computed correctly." + ), "the forking sequence past feature should be computed correctly." @pytest.mark.parametrize("is_train", [True, False]) From 356efaac4a1826e20571add5efc50e3f97fa56b0 Mon Sep 17 00:00:00 2001 From: Jasper Schulz Date: Tue, 25 Feb 2020 15:18:26 +0100 Subject: [PATCH 03/44] Added derive_auto_fields method. --- src/gluonts/model/deepar/_estimator.py | 15 +++++++++++++-- src/gluonts/model/estimator.py | 9 +++++++++ src/gluonts/model/predictor.py | 9 +++++++++ src/gluonts/shell/train.py | 12 ++++++------ 4 files changed, 37 insertions(+), 8 deletions(-) diff --git a/src/gluonts/model/deepar/_estimator.py b/src/gluonts/model/deepar/_estimator.py index 7005f49c85..a593eb76bb 100644 --- a/src/gluonts/model/deepar/_estimator.py +++ b/src/gluonts/model/deepar/_estimator.py @@ -21,6 +21,7 @@ # First-party imports from gluonts.core.component import DType, validated from gluonts.dataset.field_names import FieldName +from gluonts.dataset.stat import calculate_dataset_statistics from gluonts.distribution import DistributionOutput, StudentTOutput from gluonts.model.estimator import GluonEstimator from gluonts.model.predictor import Predictor, RepresentableBlockPredictor @@ -146,8 +147,8 @@ def __init__( assert num_layers > 0, "The value of `num_layers` should be > 0" assert num_cells > 0, "The value of `num_cells` should be > 0" assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0" - assert (cardinality is not None and use_feat_static_cat) or ( - cardinality is None and not use_feat_static_cat + assert (cardinality and use_feat_static_cat) or ( + not (cardinality or use_feat_static_cat) ), "You should set `cardinality` if and only if `use_feat_static_cat=True`" assert cardinality is None or all( [c > 0 for c in cardinality] @@ -197,6 +198,16 @@ def __init__( self.num_parallel_samples = num_parallel_samples + @classmethod + def derive_auto_fields(cls, train_iter): + stats = calculate_dataset_statistics(train_iter) + + return { + "use_feat_dynamic_real": stats.num_feat_dynamic_real > 0, + "use_feat_static_cat": bool(stats.feat_static_cat), + "cardinality": [len(cats) for cats in stats.feat_static_cat], + } + def create_transformation(self) -> Transformation: remove_field_names = [FieldName.FEAT_DYNAMIC_CAT] if not self.use_feat_static_real: diff --git a/src/gluonts/model/estimator.py b/src/gluonts/model/estimator.py index 55e6409b2c..e18d801763 100644 --- a/src/gluonts/model/estimator.py +++ b/src/gluonts/model/estimator.py @@ -69,6 +69,15 @@ def train( def from_hyperparameters(cls, **hyperparameters): return from_hyperparameters(cls, **hyperparameters) + @classmethod + def derive_auto_fields(cls, train_iter): + return {} + + @classmethod + def from_inputs(cls, train_iter, params): + auto_params = cls.derive_auto_fields(train_iter) + return cls.from_hyperparameters(**auto_params, **params) + class DummyEstimator(Estimator): """ diff --git a/src/gluonts/model/predictor.py b/src/gluonts/model/predictor.py index bfbcd83fa6..61263e848f 100644 --- a/src/gluonts/model/predictor.py +++ b/src/gluonts/model/predictor.py @@ -156,6 +156,15 @@ def deserialize( def from_hyperparameters(cls, **hyperparameters): return from_hyperparameters(cls, **hyperparameters) + @classmethod + def derive_auto_fields(cls, train_iter): + return {} + + @classmethod + def from_inputs(cls, train_iter, params): + auto_params = cls.derive_auto_fields(train_iter) + return cls.from_hyperparameters(**auto_params, **params) + class RepresentablePredictor(Predictor): """ diff --git a/src/gluonts/shell/train.py b/src/gluonts/shell/train.py index f01f065069..e561c15ba8 100644 --- a/src/gluonts/shell/train.py +++ b/src/gluonts/shell/train.py @@ -22,7 +22,6 @@ from gluonts.core.serde import dump_code from gluonts.evaluation import Evaluator, backtest from gluonts.dataset.common import Dataset -from gluonts.dataset.stat import calculate_dataset_statistics from gluonts.model.estimator import Estimator from gluonts.model.predictor import Predictor from gluonts.transform import FilterTransformation, TransformedDataset @@ -54,13 +53,18 @@ def run_train_and_test( ) -> None: check_gpu_support() + # train_stats = calculate_dataset_statistics(env.datasets["train"]) + # log_metric("train_dataset_stats", train_stats) + forecaster_fq_name = fqname_for(forecaster_type) forecaster_version = forecaster_type.__version__ logger.info(f"Using gluonts v{gluonts.__version__}") logger.info(f"Using forecaster {forecaster_fq_name} v{forecaster_version}") - forecaster = forecaster_type.from_hyperparameters(**env.hyperparameters) + forecaster = forecaster_type.from_inputs( + env.datasets["train"], env.hyperparameters + ) logger.info( f"The forecaster can be reconstructed with the following expression: " @@ -90,10 +94,6 @@ def run_train( train_dataset: Dataset, validation_dataset: Optional[Dataset], ) -> Predictor: - log_metric( - "train_dataset_stats", calculate_dataset_statistics(train_dataset) - ) - return forecaster.train(train_dataset, validation_dataset) From eefce491591ca82159f09997a0547dd933d872ad Mon Sep 17 00:00:00 2001 From: Bernie Wang Date: Tue, 25 Feb 2020 16:28:41 +0100 Subject: [PATCH 04/44] add use_dynamic_feat option --- .../model/seq2seq/_forking_estimator.py | 65 +++++-- src/gluonts/model/seq2seq/_forking_network.py | 160 ++++++++++++++++-- .../model/seq2seq/_mq_dnn_estimator.py | 8 +- 3 files changed, 205 insertions(+), 28 deletions(-) diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index 9f05f7aefd..1db875730c 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -30,17 +30,19 @@ from gluonts.transform import ( AsNumpyArray, AddAgeFeature, + AddTimeFeatures, Chain, TestSplitSampler, Transformation, ) # Relative imports -# from transform import AddAgeFeature +from gluonts.time_feature import time_features_from_frequency_str from ._forking_network import ( ForkingSeq2SeqPredictionNetwork, ForkingSeq2SeqTrainingNetwork, -) + ForkingSeq2SeqNetwork, ForkingSeq2SeqNetworkBase, ForkingSeq2SeqTargetPredictionNetwork, + ForkingSeq2SeqTargetTrainingNetwork) from ._transform import ForkingSequenceSplitter @@ -93,6 +95,7 @@ def __init__( quantile_output: QuantileOutput, freq: str, prediction_length: int, + use_dynamic_feat: bool = False, context_length: Optional[int] = None, trainer: Trainer = Trainer(), ) -> None: @@ -110,22 +113,32 @@ def __init__( self.quantile_output = quantile_output self.prediction_length = prediction_length self.freq = freq + self.use_dynamic_feat = use_dynamic_feat self.context_length = ( context_length if context_length is not None else prediction_length ) def create_transformation(self) -> Transformation: - return Chain( + + if self.use_dynamic_feat: + feat_def = Chain( trans=[ AsNumpyArray( field=FieldName.TARGET, expected_ndim=1, dtype=self.dtype ), - AddAgeFeature( + # AddAgeFeature( + # target_field=FieldName.TARGET, + # output_field=FieldName.FEAT_DYNAMIC_REAL, + # log_scale=True, + # pred_length=self.prediction_length, + # dtype=self.dtype, + # ), + AddTimeFeatures( + start_field=FieldName.START, target_field=FieldName.TARGET, output_field=FieldName.FEAT_DYNAMIC_REAL, - log_scale=True, + time_features= time_features_from_frequency_str(self.freq), pred_length=self.prediction_length, - dtype=self.dtype, ), ForkingSequenceSplitter( train_sampler=TestSplitSampler(), @@ -133,21 +146,43 @@ def create_transformation(self) -> Transformation: dec_len=self.prediction_length, encoder_series_fields=[FieldName.FEAT_DYNAMIC_REAL], ), - ] - ) + ]) + else: + feat_def = Chain( + trans=[ + AsNumpyArray( + field=FieldName.TARGET, expected_ndim=1, dtype=self.dtype + ), + ForkingSequenceSplitter( + train_sampler=TestSplitSampler(), + enc_len=self.context_length, + dec_len=self.prediction_length, + ), + ]) + + return feat_def + + def create_training_network(self) -> ForkingSeq2SeqNetworkBase: + # return ForkingSeq2SeqTrainingNetwork( + # encoder=self.encoder, + # enc2dec=PassThroughEnc2Dec(), + # decoder=self.decoder, + # quantile_output=self.quantile_output, + # ) - def create_training_network(self) -> ForkingSeq2SeqTrainingNetwork: - return ForkingSeq2SeqTrainingNetwork( + return ForkingSeq2SeqNetwork( encoder=self.encoder, enc2dec=PassThroughEnc2Dec(), decoder=self.decoder, quantile_output=self.quantile_output, - ) + use_dynamic_real=self.use_dynamic_feat + ).get_training_network() + def create_predictor( self, transformation: Transformation, - trained_network: ForkingSeq2SeqTrainingNetwork, + trained_network: ForkingSeq2SeqNetworkBase, ) -> Predictor: # todo: this is specific to quantile output quantile_strs = [ @@ -155,12 +190,14 @@ def create_predictor( for quantile in self.quantile_output.quantiles ] - prediction_network = ForkingSeq2SeqPredictionNetwork( + + prediction_network = ForkingSeq2SeqNetwork( encoder=trained_network.encoder, enc2dec=trained_network.enc2dec, decoder=trained_network.decoder, quantile_output=trained_network.quantile_output, - ) + use_dynamic_real=self.use_dynamic_feat + ).get_prediction_network() copy_parameters(trained_network, prediction_network) diff --git a/src/gluonts/model/seq2seq/_forking_network.py b/src/gluonts/model/seq2seq/_forking_network.py index 359bc5a9ea..c805f04030 100644 --- a/src/gluonts/model/seq2seq/_forking_network.py +++ b/src/gluonts/model/seq2seq/_forking_network.py @@ -59,15 +59,79 @@ def __init__( self.decoder = decoder self.quantile_output = quantile_output + self.feat_static_real = nd_None + self.past_feat_dynamic_real = nd_None + self.future_feat_dynamic_real = nd_None + with self.name_scope(): self.quantile_proj = quantile_output.get_quantile_proj() self.loss = quantile_output.get_loss() +class ForkingSeq2SeqNetwork(): + @validated() + def __init__( + self, + encoder: Seq2SeqEncoder, + enc2dec: Seq2SeqEnc2Dec, + decoder: Seq2SeqDecoder, + quantile_output: QuantileOutput, + use_dynamic_real: bool = False, + use_static_cat: bool = False, + **kwargs, + ) -> None: + self.encoder = encoder + self.enc2dec = enc2dec + self.decoder = decoder + self.quantile_output = quantile_output + + self.use_dynamic_real = use_dynamic_real + self.use_static_cat = use_static_cat + + def get_training_network(self) -> ForkingSeq2SeqNetworkBase: + if self.use_static_cat is False and self.use_dynamic_real is False: + return ForkingSeq2SeqTargetTrainingNetwork( + encoder=self.encoder, + enc2dec=self.enc2dec, + decoder=self.decoder, + quantile_output=self.quantile_output + ) + elif self.use_static_cat is False and self.use_dynamic_real: + return ForkingSeq2SeqTrainingNetwork( + encoder=self.encoder, + enc2dec=self.enc2dec, + decoder=self.decoder, + quantile_output=self.quantile_output + ) + else: + raise("Not implemented yet!") + + def get_prediction_network(self) -> ForkingSeq2SeqNetworkBase: + if self.use_static_cat is False and self.use_dynamic_real is False: + return ForkingSeq2SeqTargetPredictionNetwork( + encoder=self.encoder, + enc2dec=self.enc2dec, + decoder=self.decoder, + quantile_output=self.quantile_output + ) + elif self.use_static_cat is False and self.use_dynamic_real: + return ForkingSeq2SeqPredictionNetwork( + encoder=self.encoder, + enc2dec=self.enc2dec, + decoder=self.decoder, + quantile_output=self.quantile_output + ) + else: + raise("Not implemented yet!") + + class ForkingSeq2SeqTrainingNetwork(ForkingSeq2SeqNetworkBase): # noinspection PyMethodOverriding def hybrid_forward( - self, F, past_target: Tensor, past_feat_dynamic_real: Tensor, future_target: Tensor + self, F, + past_target: Tensor, + past_feat_dynamic_real: Tensor, + future_target: Tensor ) -> Tensor: """ Parameters @@ -88,17 +152,13 @@ def hybrid_forward( # print(f"past_feat_dynamic_real: {past_feat_dynamic_real.shape}") # print(f"future_target: {future_target.shape}") - # FIXME: can we factor out a common prefix in the base network? - feat_static_real = nd_None - # past_feat_dynamic_real = nd_None - future_feat_dynamic_real = nd_None enc_output_static, enc_output_dynamic = self.encoder( - past_target, feat_static_real, past_feat_dynamic_real + past_target, self.feat_static_real, past_feat_dynamic_real ) dec_input_static, dec_input_dynamic, _ = self.enc2dec( - enc_output_static, enc_output_dynamic, future_feat_dynamic_real + enc_output_static, enc_output_dynamic, self.future_feat_dynamic_real ) dec_output = self.decoder(dec_input_dynamic, dec_input_static) @@ -112,7 +172,10 @@ def hybrid_forward( class ForkingSeq2SeqPredictionNetwork(ForkingSeq2SeqNetworkBase): # noinspection PyMethodOverriding - def hybrid_forward(self, F, past_target: Tensor, past_feat_dynamic_real: Tensor) -> Tensor: + def hybrid_forward(self, + F, + past_target: Tensor, + past_feat_dynamic_real: Tensor) -> Tensor: """ Parameters ---------- @@ -127,12 +190,9 @@ def hybrid_forward(self, F, past_target: Tensor, past_feat_dynamic_real: Tensor) """ # FIXME: can we factor out a common prefix in the base network? - feat_static_real = nd_None - # past_feat_dynamic_real = nd_None - future_feat_dynamic_real = nd_None enc_output_static, enc_output_dynamic = self.encoder( - past_target, feat_static_real, past_feat_dynamic_real + past_target, self.feat_static_real, past_feat_dynamic_real ) enc_output_static = ( @@ -140,7 +200,7 @@ def hybrid_forward(self, F, past_target: Tensor, past_feat_dynamic_real: Tensor) ) dec_inp_static, dec_inp_dynamic, _ = self.enc2dec( - enc_output_static, enc_output_dynamic, future_feat_dynamic_real + enc_output_static, enc_output_dynamic, self.future_feat_dynamic_real ) dec_output = self.decoder(dec_inp_dynamic, dec_inp_static) @@ -149,3 +209,77 @@ def hybrid_forward(self, F, past_target: Tensor, past_feat_dynamic_real: Tensor) predictions = self.quantile_proj(fcst_output).swapaxes(2, 1) return predictions + + +class ForkingSeq2SeqTargetTrainingNetwork(ForkingSeq2SeqNetworkBase): + # noinspection PyMethodOverriding + def hybrid_forward( + self, F, past_target: Tensor, future_target: Tensor + ) -> Tensor: + """ + Parameters + ---------- + F: mx.symbol or mx.ndarray + Gluon function space + past_target: Tensor + FIXME + future_target: Tensor + shape (num_ts, encoder_length, 1) FIXME + + Returns + ------- + loss with shape (FIXME, FIXME) + """ + + enc_output_static, enc_output_dynamic = self.encoder( + past_target, self.feat_static_real, self.past_feat_dynamic_real + ) + + dec_input_static, dec_input_dynamic, _ = self.enc2dec( + enc_output_static, enc_output_dynamic, self.future_feat_dynamic_real + ) + + dec_output = self.decoder(dec_input_dynamic, dec_input_static) + dec_dist_output = self.quantile_proj(dec_output) + + loss = self.loss(future_target, dec_dist_output) + return loss.mean(axis=1) + + +class ForkingSeq2SeqTargetPredictionNetwork(ForkingSeq2SeqNetworkBase): + # noinspection PyMethodOverriding + def hybrid_forward(self, F, past_target: Tensor) -> Tensor: + """ + Parameters + ---------- + F: mx.symbol or mx.ndarray + Gluon function space + past_target: Tensor + FIXME + + Returns + ------- + prediction tensor with shape (FIXME, FIXME) + """ + + # FIXME: can we factor out a common prefix in the base network? + + + enc_output_static, enc_output_dynamic = self.encoder( + past_target, self.feat_static_real, self.past_feat_dynamic_real + ) + + enc_output_static = ( + nd_None if enc_output_static is None else enc_output_static + ) + + dec_inp_static, dec_inp_dynamic, _ = self.enc2dec( + enc_output_static, enc_output_dynamic, self.future_feat_dynamic_real + ) + + dec_output = self.decoder(dec_inp_dynamic, dec_inp_static) + fcst_output = F.slice_axis(dec_output, axis=1, begin=-1, end=None) + fcst_output = F.squeeze(fcst_output, axis=1) + + predictions = self.quantile_proj(fcst_output).swapaxes(2, 1) + return predictions \ No newline at end of file diff --git a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py index c89854dca1..7dcac51a20 100644 --- a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py +++ b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py @@ -48,6 +48,7 @@ def __init__( context_length: Optional[int], prediction_length: int, freq: str, + use_dynamic_feat: bool = False, decoder_mlp_dim_seq: List[int] = [20], quantiles: List[float] = list(), trainer: Trainer = Trainer(), @@ -73,6 +74,7 @@ def __init__( decoder=decoder, quantile_output=quantile_output, freq=freq, + use_dynamic_feat=use_dynamic_feat, prediction_length=prediction_length, context_length=context_length, trainer=trainer, @@ -91,6 +93,7 @@ def __init__( prediction_length: int, freq: str, context_length: Optional[int] = None, + use_dynamic_feat: bool = False, seed: Optional[int] = None, decoder_mlp_dim_seq: List[int] = [20], channels_seq: List[int] = [30, 30, 30], @@ -119,11 +122,13 @@ def __init__( kernel_size_seq=channels_seq, channels_seq=kernel_size_seq, use_residual=use_residual, - use_dynamic_feat=True, + use_dynamic_feat=use_dynamic_feat, prefix="encoder_", ) + super(MQCNNEstimator, self).__init__( encoder=encoder, + use_dynamic_feat=use_dynamic_feat, decoder_mlp_dim_seq=decoder_mlp_dim_seq, freq=freq, prediction_length=prediction_length, @@ -182,6 +187,7 @@ def __init__( for _ in range(1): estimator = MQCNNEstimator( + use_dynamic_feat=True, prediction_length=dataset.metadata.prediction_length, seed=42, freq=dataset.metadata.freq, From 67d94ebe846cad15de8fdd401cd2227e63ae6493 Mon Sep 17 00:00:00 2001 From: Jasper Schulz Date: Tue, 25 Feb 2020 16:49:56 +0100 Subject: [PATCH 05/44] Added checks for dyn features. --- src/gluonts/block/encoder.py | 6 +- .../model/seq2seq/_forking_estimator.py | 84 ++++++++++--------- src/gluonts/model/seq2seq/_forking_network.py | 68 ++++++++------- 3 files changed, 83 insertions(+), 75 deletions(-) diff --git a/src/gluonts/block/encoder.py b/src/gluonts/block/encoder.py index 2a3e4ee3a6..9856097601 100644 --- a/src/gluonts/block/encoder.py +++ b/src/gluonts/block/encoder.py @@ -233,9 +233,7 @@ def hybrid_forward( dynamic_features=dynamic_features, ) elif self.use_dynamic_feat: - inputs = F.concat( - target, dynamic_features, dim=2 - ) # (N, T, C) + inputs = F.concat(target, dynamic_features, dim=2) # (N, T, C) else: inputs = target @@ -478,4 +476,4 @@ def hybrid_forward( F.slice_axis(dynamic_code, axis=1, begin=-1, end=None), axis=1 ) - return static_code, dynamic_code \ No newline at end of file + return static_code, dynamic_code diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index 1db875730c..a858030b10 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -34,6 +34,7 @@ Chain, TestSplitSampler, Transformation, + VstackFeatures, ) # Relative imports @@ -41,8 +42,11 @@ from ._forking_network import ( ForkingSeq2SeqPredictionNetwork, ForkingSeq2SeqTrainingNetwork, - ForkingSeq2SeqNetwork, ForkingSeq2SeqNetworkBase, ForkingSeq2SeqTargetPredictionNetwork, - ForkingSeq2SeqTargetTrainingNetwork) + ForkingSeq2SeqNetwork, + ForkingSeq2SeqNetworkBase, + ForkingSeq2SeqTargetPredictionNetwork, + ForkingSeq2SeqTargetTrainingNetwork, +) from ._transform import ForkingSequenceSplitter @@ -117,50 +121,52 @@ def __init__( self.context_length = ( context_length if context_length is not None else prediction_length ) + self.add_time_feature = True + + @classmethod + def derive_auto_fields(cls, train_iter): + return {} def create_transformation(self) -> Transformation: + chain = [] + dynamic_feat_fields = [] - if self.use_dynamic_feat: - feat_def = Chain( - trans=[ - AsNumpyArray( - field=FieldName.TARGET, expected_ndim=1, dtype=self.dtype - ), - # AddAgeFeature( - # target_field=FieldName.TARGET, - # output_field=FieldName.FEAT_DYNAMIC_REAL, - # log_scale=True, - # pred_length=self.prediction_length, - # dtype=self.dtype, - # ), + if self.add_time_feature: + chain.append( AddTimeFeatures( start_field=FieldName.START, target_field=FieldName.TARGET, - output_field=FieldName.FEAT_DYNAMIC_REAL, - time_features= time_features_from_frequency_str(self.freq), + output_field="time_feature", + time_features=time_features_from_frequency_str(self.freq), pred_length=self.prediction_length, ), - ForkingSequenceSplitter( - train_sampler=TestSplitSampler(), - enc_len=self.context_length, - dec_len=self.prediction_length, - encoder_series_fields=[FieldName.FEAT_DYNAMIC_REAL], - ), - ]) + ) + dynamic_feat_fields.append("time_feature") + + if self.use_dynamic_feat: + dynamic_feat_fields.append(FieldName.FEAT_DYNAMIC_REAL) + + if dynamic_feat_fields: + chain.append( + VstackFeatures( + output_field=FieldName.FEAT_TIME, + input_fields=dynamic_feat_fields, + ) + ) + output_field = [FieldName.FEAT_TIME] else: - feat_def = Chain( - trans=[ - AsNumpyArray( - field=FieldName.TARGET, expected_ndim=1, dtype=self.dtype - ), - ForkingSequenceSplitter( - train_sampler=TestSplitSampler(), - enc_len=self.context_length, - dec_len=self.prediction_length, - ), - ]) + output_field = [] + + chain.append( + ForkingSequenceSplitter( + train_sampler=TestSplitSampler(), + enc_len=self.context_length, + dec_len=self.prediction_length, + encoder_series_fields=output_field, + ), + ) - return feat_def + return Chain(chain) def create_training_network(self) -> ForkingSeq2SeqNetworkBase: # return ForkingSeq2SeqTrainingNetwork( @@ -175,10 +181,9 @@ def create_training_network(self) -> ForkingSeq2SeqNetworkBase: enc2dec=PassThroughEnc2Dec(), decoder=self.decoder, quantile_output=self.quantile_output, - use_dynamic_real=self.use_dynamic_feat + use_dynamic_real=self.use_dynamic_feat, ).get_training_network() - def create_predictor( self, transformation: Transformation, @@ -190,13 +195,12 @@ def create_predictor( for quantile in self.quantile_output.quantiles ] - prediction_network = ForkingSeq2SeqNetwork( encoder=trained_network.encoder, enc2dec=trained_network.enc2dec, decoder=trained_network.decoder, quantile_output=trained_network.quantile_output, - use_dynamic_real=self.use_dynamic_feat + use_dynamic_real=self.use_dynamic_feat, ).get_prediction_network() copy_parameters(trained_network, prediction_network) diff --git a/src/gluonts/model/seq2seq/_forking_network.py b/src/gluonts/model/seq2seq/_forking_network.py index c805f04030..e2b5bfee29 100644 --- a/src/gluonts/model/seq2seq/_forking_network.py +++ b/src/gluonts/model/seq2seq/_forking_network.py @@ -67,17 +67,18 @@ def __init__( self.quantile_proj = quantile_output.get_quantile_proj() self.loss = quantile_output.get_loss() -class ForkingSeq2SeqNetwork(): + +class ForkingSeq2SeqNetwork: @validated() def __init__( - self, - encoder: Seq2SeqEncoder, - enc2dec: Seq2SeqEnc2Dec, - decoder: Seq2SeqDecoder, - quantile_output: QuantileOutput, - use_dynamic_real: bool = False, - use_static_cat: bool = False, - **kwargs, + self, + encoder: Seq2SeqEncoder, + enc2dec: Seq2SeqEnc2Dec, + decoder: Seq2SeqDecoder, + quantile_output: QuantileOutput, + use_dynamic_real: bool = False, + use_static_cat: bool = False, + **kwargs, ) -> None: self.encoder = encoder self.enc2dec = enc2dec @@ -93,17 +94,17 @@ def get_training_network(self) -> ForkingSeq2SeqNetworkBase: encoder=self.encoder, enc2dec=self.enc2dec, decoder=self.decoder, - quantile_output=self.quantile_output + quantile_output=self.quantile_output, ) elif self.use_static_cat is False and self.use_dynamic_real: return ForkingSeq2SeqTrainingNetwork( encoder=self.encoder, enc2dec=self.enc2dec, decoder=self.decoder, - quantile_output=self.quantile_output + quantile_output=self.quantile_output, ) else: - raise("Not implemented yet!") + raise ("Not implemented yet!") def get_prediction_network(self) -> ForkingSeq2SeqNetworkBase: if self.use_static_cat is False and self.use_dynamic_real is False: @@ -111,27 +112,27 @@ def get_prediction_network(self) -> ForkingSeq2SeqNetworkBase: encoder=self.encoder, enc2dec=self.enc2dec, decoder=self.decoder, - quantile_output=self.quantile_output + quantile_output=self.quantile_output, ) elif self.use_static_cat is False and self.use_dynamic_real: return ForkingSeq2SeqPredictionNetwork( encoder=self.encoder, enc2dec=self.enc2dec, decoder=self.decoder, - quantile_output=self.quantile_output + quantile_output=self.quantile_output, ) else: - raise("Not implemented yet!") - + raise ("Not implemented yet!") class ForkingSeq2SeqTrainingNetwork(ForkingSeq2SeqNetworkBase): # noinspection PyMethodOverriding def hybrid_forward( - self, F, - past_target: Tensor, - past_feat_dynamic_real: Tensor, - future_target: Tensor + self, + F, + past_target: Tensor, + past_feat_dynamic_real: Tensor, + future_target: Tensor, ) -> Tensor: """ Parameters @@ -152,13 +153,14 @@ def hybrid_forward( # print(f"past_feat_dynamic_real: {past_feat_dynamic_real.shape}") # print(f"future_target: {future_target.shape}") - enc_output_static, enc_output_dynamic = self.encoder( past_target, self.feat_static_real, past_feat_dynamic_real ) dec_input_static, dec_input_dynamic, _ = self.enc2dec( - enc_output_static, enc_output_dynamic, self.future_feat_dynamic_real + enc_output_static, + enc_output_dynamic, + self.future_feat_dynamic_real, ) dec_output = self.decoder(dec_input_dynamic, dec_input_static) @@ -172,10 +174,9 @@ def hybrid_forward( class ForkingSeq2SeqPredictionNetwork(ForkingSeq2SeqNetworkBase): # noinspection PyMethodOverriding - def hybrid_forward(self, - F, - past_target: Tensor, - past_feat_dynamic_real: Tensor) -> Tensor: + def hybrid_forward( + self, F, past_target: Tensor, past_feat_dynamic_real: Tensor + ) -> Tensor: """ Parameters ---------- @@ -200,7 +201,9 @@ def hybrid_forward(self, ) dec_inp_static, dec_inp_dynamic, _ = self.enc2dec( - enc_output_static, enc_output_dynamic, self.future_feat_dynamic_real + enc_output_static, + enc_output_dynamic, + self.future_feat_dynamic_real, ) dec_output = self.decoder(dec_inp_dynamic, dec_inp_static) @@ -236,7 +239,9 @@ def hybrid_forward( ) dec_input_static, dec_input_dynamic, _ = self.enc2dec( - enc_output_static, enc_output_dynamic, self.future_feat_dynamic_real + enc_output_static, + enc_output_dynamic, + self.future_feat_dynamic_real, ) dec_output = self.decoder(dec_input_dynamic, dec_input_static) @@ -264,7 +269,6 @@ def hybrid_forward(self, F, past_target: Tensor) -> Tensor: # FIXME: can we factor out a common prefix in the base network? - enc_output_static, enc_output_dynamic = self.encoder( past_target, self.feat_static_real, self.past_feat_dynamic_real ) @@ -274,7 +278,9 @@ def hybrid_forward(self, F, past_target: Tensor) -> Tensor: ) dec_inp_static, dec_inp_dynamic, _ = self.enc2dec( - enc_output_static, enc_output_dynamic, self.future_feat_dynamic_real + enc_output_static, + enc_output_dynamic, + self.future_feat_dynamic_real, ) dec_output = self.decoder(dec_inp_dynamic, dec_inp_static) @@ -282,4 +288,4 @@ def hybrid_forward(self, F, past_target: Tensor) -> Tensor: fcst_output = F.squeeze(fcst_output, axis=1) predictions = self.quantile_proj(fcst_output).swapaxes(2, 1) - return predictions \ No newline at end of file + return predictions From 2e8dc066306ff0dde554487f6a40cf92a255ab7d Mon Sep 17 00:00:00 2001 From: Jasper Schulz Date: Tue, 25 Feb 2020 17:18:26 +0100 Subject: [PATCH 06/44] Fix from_hyperparameters for GluonEstimator. --- src/gluonts/model/estimator.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gluonts/model/estimator.py b/src/gluonts/model/estimator.py index e18d801763..bf38b0b19e 100644 --- a/src/gluonts/model/estimator.py +++ b/src/gluonts/model/estimator.py @@ -135,7 +135,10 @@ def from_hyperparameters(cls, **hyperparameters) -> "GluonEstimator": ) try: - trainer = from_hyperparameters(Trainer, **hyperparameters) + trainer = hyperparameters.get("trainer") + if not isinstance(trainer, Trainer): + trainer = from_hyperparameters(Trainer, **hyperparameters) + return cls( **Model(**{**hyperparameters, "trainer": trainer}).__dict__ ) From 70f3a255c4f93ca204c86fdd7c38b578a4ddc51e Mon Sep 17 00:00:00 2001 From: Bernie Wang Date: Tue, 25 Feb 2020 18:14:52 +0100 Subject: [PATCH 07/44] enable date and age features, and rts --- src/gluonts/model/estimator.py | 2 +- .../model/seq2seq/_forking_estimator.py | 51 ++++++++++++++----- src/gluonts/model/seq2seq/_forking_network.py | 4 +- .../model/seq2seq/_mq_dnn_estimator.py | 40 ++++++++++++--- src/gluonts/model/seq2seq/_transform.py | 4 +- src/gluonts/shell/train.py | 2 +- 6 files changed, 77 insertions(+), 26 deletions(-) diff --git a/src/gluonts/model/estimator.py b/src/gluonts/model/estimator.py index bf38b0b19e..3a5784fce6 100644 --- a/src/gluonts/model/estimator.py +++ b/src/gluonts/model/estimator.py @@ -74,7 +74,7 @@ def derive_auto_fields(cls, train_iter): return {} @classmethod - def from_inputs(cls, train_iter, params): + def from_inputs(cls, train_iter, **params): auto_params = cls.derive_auto_fields(train_iter) return cls.from_hyperparameters(**auto_params, **params) diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index a858030b10..77e49e0f47 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -35,6 +35,7 @@ TestSplitSampler, Transformation, VstackFeatures, + RenameFields, ) # Relative imports @@ -100,6 +101,8 @@ def __init__( freq: str, prediction_length: int, use_dynamic_feat: bool = False, + add_time_feature: bool = False, + add_age_feature: bool = False, context_length: Optional[int] = None, trainer: Trainer = Trainer(), ) -> None: @@ -121,11 +124,14 @@ def __init__( self.context_length = ( context_length if context_length is not None else prediction_length ) - self.add_time_feature = True + self.add_time_feature = add_time_feature + self.add_age_feature = add_age_feature - @classmethod - def derive_auto_fields(cls, train_iter): - return {} + # is target only network or not? + self.dynamic_network = ( + use_dynamic_feat or add_time_feature or add_age_feature + ) + print(f"use_dynamic_network: {self.dynamic_network}") def create_transformation(self) -> Transformation: chain = [] @@ -136,33 +142,50 @@ def create_transformation(self) -> Transformation: AddTimeFeatures( start_field=FieldName.START, target_field=FieldName.TARGET, - output_field="time_feature", + output_field=FieldName.FEAT_TIME, time_features=time_features_from_frequency_str(self.freq), pred_length=self.prediction_length, ), ) - dynamic_feat_fields.append("time_feature") + dynamic_feat_fields.append(FieldName.FEAT_TIME) + + if self.add_age_feature: + chain.append( + AddAgeFeature( + target_field=FieldName.TARGET, + output_field=FieldName.FEAT_AGE, + pred_length=self.prediction_length, + ), + ) + dynamic_feat_fields.append(FieldName.FEAT_AGE) if self.use_dynamic_feat: dynamic_feat_fields.append(FieldName.FEAT_DYNAMIC_REAL) - if dynamic_feat_fields: + if len(dynamic_feat_fields) > 1: chain.append( VstackFeatures( - output_field=FieldName.FEAT_TIME, + output_field=FieldName.FEAT_DYNAMIC_REAL, input_fields=dynamic_feat_fields, ) ) - output_field = [FieldName.FEAT_TIME] - else: - output_field = [] + elif len(dynamic_feat_fields) == 1: + chain.append( + RenameFields( + {dynamic_feat_fields[0]: FieldName.FEAT_DYNAMIC_REAL} + ) + ) + + decoder_field = ( + [FieldName.FEAT_DYNAMIC_REAL] if dynamic_feat_fields else [] + ) chain.append( ForkingSequenceSplitter( train_sampler=TestSplitSampler(), enc_len=self.context_length, dec_len=self.prediction_length, - encoder_series_fields=output_field, + encoder_series_fields=decoder_field, ), ) @@ -181,7 +204,7 @@ def create_training_network(self) -> ForkingSeq2SeqNetworkBase: enc2dec=PassThroughEnc2Dec(), decoder=self.decoder, quantile_output=self.quantile_output, - use_dynamic_real=self.use_dynamic_feat, + use_dynamic_real=self.dynamic_network, ).get_training_network() def create_predictor( @@ -200,7 +223,7 @@ def create_predictor( enc2dec=trained_network.enc2dec, decoder=trained_network.decoder, quantile_output=trained_network.quantile_output, - use_dynamic_real=self.use_dynamic_feat, + use_dynamic_real=self.dynamic_network, ).get_prediction_network() copy_parameters(trained_network, prediction_network) diff --git a/src/gluonts/model/seq2seq/_forking_network.py b/src/gluonts/model/seq2seq/_forking_network.py index e2b5bfee29..a8a0d53970 100644 --- a/src/gluonts/model/seq2seq/_forking_network.py +++ b/src/gluonts/model/seq2seq/_forking_network.py @@ -104,7 +104,7 @@ def get_training_network(self) -> ForkingSeq2SeqNetworkBase: quantile_output=self.quantile_output, ) else: - raise ("Not implemented yet!") + raise NotImplementedError def get_prediction_network(self) -> ForkingSeq2SeqNetworkBase: if self.use_static_cat is False and self.use_dynamic_real is False: @@ -122,7 +122,7 @@ def get_prediction_network(self) -> ForkingSeq2SeqNetworkBase: quantile_output=self.quantile_output, ) else: - raise ("Not implemented yet!") + raise NotImplementedError class ForkingSeq2SeqTrainingNetwork(ForkingSeq2SeqNetworkBase): diff --git a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py index 7dcac51a20..1c905a39cb 100644 --- a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py +++ b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py @@ -15,6 +15,7 @@ from typing import List, Optional # First-party imports +from dataset.stat import calculate_dataset_statistics from gluonts.evaluation.backtest import make_evaluation_predictions from gluonts.block.decoder import ForkingMLPDecoder from gluonts.block.encoder import ( @@ -49,6 +50,8 @@ def __init__( prediction_length: int, freq: str, use_dynamic_feat: bool = False, + add_time_feature: bool = False, + add_age_feature: bool = False, decoder_mlp_dim_seq: List[int] = [20], quantiles: List[float] = list(), trainer: Trainer = Trainer(), @@ -75,6 +78,8 @@ def __init__( quantile_output=quantile_output, freq=freq, use_dynamic_feat=use_dynamic_feat, + add_age_feature=add_age_feature, + add_time_feature=add_time_feature, prediction_length=prediction_length, context_length=context_length, trainer=trainer, @@ -93,7 +98,9 @@ def __init__( prediction_length: int, freq: str, context_length: Optional[int] = None, - use_dynamic_feat: bool = False, + use_feat_dynamic_real: bool = False, + add_time_feature: bool = False, + add_age_feature: bool = False, seed: Optional[int] = None, decoder_mlp_dim_seq: List[int] = [20], channels_seq: List[int] = [30, 30, 30], @@ -106,6 +113,11 @@ def __init__( trainer: Trainer = Trainer(), ) -> None: + use_dynamic_feat_cnn = False + + if use_feat_dynamic_real or add_age_feature or add_time_feature: + use_dynamic_feat_cnn = True + if seed: np.random.seed(seed) mx.random.seed(seed) @@ -122,13 +134,15 @@ def __init__( kernel_size_seq=channels_seq, channels_seq=kernel_size_seq, use_residual=use_residual, - use_dynamic_feat=use_dynamic_feat, + use_dynamic_feat=use_dynamic_feat_cnn, prefix="encoder_", ) super(MQCNNEstimator, self).__init__( encoder=encoder, - use_dynamic_feat=use_dynamic_feat, + use_dynamic_feat=use_feat_dynamic_real, + add_time_feature=add_time_feature, + add_age_feature=add_age_feature, decoder_mlp_dim_seq=decoder_mlp_dim_seq, freq=freq, prediction_length=prediction_length, @@ -137,6 +151,16 @@ def __init__( quantiles=quantiles, ) + @classmethod + def derive_auto_fields(cls, train_iter): + stats = calculate_dataset_statistics(train_iter) + + return { + "use_feat_dynamic_real": stats.num_feat_dynamic_real > 0, + # "use_feat_static_cat": bool(stats.feat_static_cat), + # "cardinality": [len(cats) for cats in stats.feat_static_cat], + } + class MQRNNEstimator(MQDNNEstimator): """ @@ -186,8 +210,10 @@ def __init__( metrics = [] for _ in range(1): - estimator = MQCNNEstimator( - use_dynamic_feat=True, + estimator = MQCNNEstimator.from_inputs( + dataset.train, + # add_time_feature=True, + # add_age_feature=True, prediction_length=dataset.metadata.prediction_length, seed=42, freq=dataset.metadata.freq, @@ -199,12 +225,14 @@ def __init__( predictor = estimator.train(dataset.train) + assert dataset.test is not None + forecast_it, ts_it = make_evaluation_predictions( dataset.test, predictor=predictor, num_samples=100 ) agg_metrics, item_metrics = Evaluator()( - ts_it, forecast_it, num_series=len(dataset.test) + ts_it, forecast_it, num_series=len(dataset.test) # type: ignore ) metrics.append(agg_metrics["wQuantileLoss[0.5]"]) diff --git a/src/gluonts/model/seq2seq/_transform.py b/src/gluonts/model/seq2seq/_transform.py index dc0ff84664..72df2a0d8e 100644 --- a/src/gluonts/model/seq2seq/_transform.py +++ b/src/gluonts/model/seq2seq/_transform.py @@ -13,7 +13,7 @@ # Standard library imports from collections import Counter -from typing import Iterator, List +from typing import Iterator, List, Any # Third-party imports import numpy as np @@ -126,7 +126,7 @@ def flatmap_transform( continue if ts_field in decoder_fields: - d3 = () if ts_field == self.target_in else (len(ts),) + d3: Any = () if ts_field == self.target_in else (len(ts),) forking_dec_field = np.zeros( shape=(self.enc_len, self.dec_len) + d3 ) diff --git a/src/gluonts/shell/train.py b/src/gluonts/shell/train.py index e561c15ba8..96e090681e 100644 --- a/src/gluonts/shell/train.py +++ b/src/gluonts/shell/train.py @@ -63,7 +63,7 @@ def run_train_and_test( logger.info(f"Using forecaster {forecaster_fq_name} v{forecaster_version}") forecaster = forecaster_type.from_inputs( - env.datasets["train"], env.hyperparameters + env.datasets["train"], **env.hyperparameters ) logger.info( From 1f66ac1715881f8e225cf7a4e5684b8f3da251f1 Mon Sep 17 00:00:00 2001 From: Jasper Schulz Date: Wed, 26 Feb 2020 11:53:56 +0100 Subject: [PATCH 08/44] Fixup. --- src/gluonts/model/seq2seq/_mq_dnn_estimator.py | 2 +- test/model/seq2seq/test_encoders.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py index 1c905a39cb..362b89a073 100644 --- a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py +++ b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py @@ -15,7 +15,7 @@ from typing import List, Optional # First-party imports -from dataset.stat import calculate_dataset_statistics +from gluonts.dataset.stat import calculate_dataset_statistics from gluonts.evaluation.backtest import make_evaluation_predictions from gluonts.block.decoder import ForkingMLPDecoder from gluonts.block.encoder import ( diff --git a/test/model/seq2seq/test_encoders.py b/test/model/seq2seq/test_encoders.py index 96063864de..2fa9f35e44 100644 --- a/test/model/seq2seq/test_encoders.py +++ b/test/model/seq2seq/test_encoders.py @@ -38,7 +38,7 @@ def test_hierarchical_cnn_encoders(use_residual, hybridize) -> None: dial_seq = [1, 3, 9] cnn = HierarchicalCausalConv1DEncoder( - dial_seq, ks_seq, chl_dim, use_residual, use_dynamic_feat=True + dial_seq, ks_seq, chl_dim, use_residual, use_dynamic_feat=True, use_static_feat=True, ) cnn.collect_params().initialize() @@ -47,4 +47,4 @@ def test_hierarchical_cnn_encoders(use_residual, hybridize) -> None: true_shape = (num_ts, ts_len, 31) if use_residual else (num_ts, ts_len, 30) - assert cnn(test_data, test_static_feat, test_dynamic_feat)[1].shape == true_shape \ No newline at end of file + assert cnn(test_data, test_static_feat, test_dynamic_feat)[1].shape == true_shape From de08785234e0918e7cbcf7fc32a41a77209e453b Mon Sep 17 00:00:00 2001 From: Jasper Schulz Date: Tue, 31 Mar 2020 10:41:37 +0200 Subject: [PATCH 09/44] xx --- src/gluonts/model/seq2seq/_forking_network.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gluonts/model/seq2seq/_forking_network.py b/src/gluonts/model/seq2seq/_forking_network.py index f4f962eb7a..0d55a85ce8 100644 --- a/src/gluonts/model/seq2seq/_forking_network.py +++ b/src/gluonts/model/seq2seq/_forking_network.py @@ -58,9 +58,9 @@ def __init__( self.decoder = decoder self.quantile_output = quantile_output - self.feat_static_real = nd_None - self.past_feat_dynamic_real = nd_None - self.future_feat_dynamic_real = nd_None + # self.feat_static_real = F.zeros(shape=(1,)) + # self.past_feat_dynamic_real = F.zeros(shape=(1,)) + # self.future_feat_dynamic_real = F.zeros(shape=(1,)) with self.name_scope(): self.quantile_proj = quantile_output.get_quantile_proj() From 2a6d737fc3eec57ace426c3e5a8508c9986cef93 Mon Sep 17 00:00:00 2001 From: Jasper Schulz Date: Tue, 31 Mar 2020 17:58:22 +0200 Subject: [PATCH 10/44] Fixup. --- src/gluonts/block/encoder.py | 11 ++---- src/gluonts/model/seq2seq/_forking_network.py | 34 +++++++++---------- src/gluonts/model/seq2seq/_transform.py | 2 +- 3 files changed, 20 insertions(+), 27 deletions(-) diff --git a/src/gluonts/block/encoder.py b/src/gluonts/block/encoder.py index 9856097601..f69cc696f2 100644 --- a/src/gluonts/block/encoder.py +++ b/src/gluonts/block/encoder.py @@ -32,10 +32,6 @@ class Seq2SeqEncoder(nn.HybridBlock): a dynamic latent code with the same length as the `target` sequence. """ - @validated() - def __init__(self, **kwargs): - super().__init__(**kwargs) - # noinspection PyMethodOverriding def hybrid_forward( self, @@ -77,7 +73,6 @@ def hybrid_forward( """ raise NotImplementedError - @staticmethod def _assemble_inputs( F, target: Tensor, static_features: Tensor, dynamic_features: Tensor ) -> Tensor: @@ -226,7 +221,7 @@ def hybrid_forward( """ if self.use_dynamic_feat and self.use_static_feat: - inputs = Seq2SeqEncoder._assemble_inputs( + inputs = self._assemble_inputs( F, target=target, static_features=static_features, @@ -383,7 +378,7 @@ def hybrid_forward( shape (batch_size, sequence_length, num_dynamic_features) """ - inputs = Seq2SeqEncoder._assemble_inputs( + inputs = self._assemble_inputs( F, target, static_features, dynamic_features ) static_code = self.model(inputs) @@ -465,7 +460,7 @@ def hybrid_forward( dynamic code, shape (batch_size, sequence_length, num_dynamic_features) """ - inputs = Seq2SeqEncoder._assemble_inputs( + inputs = self._assemble_inputs( F, target, static_features, dynamic_features ) dynamic_code = self.rnn(inputs) diff --git a/src/gluonts/model/seq2seq/_forking_network.py b/src/gluonts/model/seq2seq/_forking_network.py index 0d55a85ce8..b497a1d482 100644 --- a/src/gluonts/model/seq2seq/_forking_network.py +++ b/src/gluonts/model/seq2seq/_forking_network.py @@ -106,7 +106,7 @@ def get_training_network(self) -> ForkingSeq2SeqNetworkBase: raise NotImplementedError def get_prediction_network(self) -> ForkingSeq2SeqNetworkBase: - if self.use_static_cat is False and self.use_dynamic_real is False: + if not self.use_static_cat and not self.use_dynamic_real: return ForkingSeq2SeqTargetPredictionNetwork( encoder=self.encoder, enc2dec=self.enc2dec, @@ -153,15 +153,12 @@ def hybrid_forward( past_feat_dynamic_real = F.zeros(shape=(1,)) future_feat_dynamic_real = F.zeros(shape=(1,)) - enc_output_static, enc_output_dynamic = self.encoder( - past_target, self.feat_static_real, past_feat_dynamic_real + past_target, feat_static_real, past_feat_dynamic_real ) dec_input_static, dec_input_dynamic, _ = self.enc2dec( - enc_output_static, - enc_output_dynamic, - self.future_feat_dynamic_real, + enc_output_static, enc_output_dynamic, future_feat_dynamic_real ) dec_output = self.decoder(dec_input_dynamic, dec_input_static) @@ -193,8 +190,11 @@ def hybrid_forward( # FIXME: can we factor out a common prefix in the base network? + feat_static_real = F.zeros(shape=(1,)) + future_feat_dynamic_real = F.zeros(shape=(1,)) + enc_output_static, enc_output_dynamic = self.encoder( - past_target, self.feat_static_real, past_feat_dynamic_real + past_target, feat_static_real, past_feat_dynamic_real ) enc_output_static = ( @@ -202,9 +202,7 @@ def hybrid_forward( ) dec_inp_static, dec_inp_dynamic, _ = self.enc2dec( - enc_output_static, - enc_output_dynamic, - self.future_feat_dynamic_real, + enc_output_static, enc_output_dynamic, future_feat_dynamic_real, ) dec_output = self.decoder(dec_inp_dynamic, dec_inp_static) @@ -235,14 +233,16 @@ def hybrid_forward( loss with shape (FIXME, FIXME) """ + feat_static_real = F.zeros(shape=(1,)) + past_feat_dynamic_real = F.zeros(shape=(1,)) + future_feat_dynamic_real = F.zeros(shape=(1,)) + enc_output_static, enc_output_dynamic = self.encoder( - past_target, self.feat_static_real, self.past_feat_dynamic_real + past_target, feat_static_real, past_feat_dynamic_real ) dec_input_static, dec_input_dynamic, _ = self.enc2dec( - enc_output_static, - enc_output_dynamic, - self.future_feat_dynamic_real, + enc_output_static, enc_output_dynamic, future_feat_dynamic_real ) dec_output = self.decoder(dec_input_dynamic, dec_input_static) @@ -274,7 +274,7 @@ def hybrid_forward(self, F, past_target: Tensor) -> Tensor: future_feat_dynamic_real = F.zeros(shape=(1,)) enc_output_static, enc_output_dynamic = self.encoder( - past_target, self.feat_static_real, self.past_feat_dynamic_real + past_target, feat_static_real, past_feat_dynamic_real ) enc_output_static = ( @@ -284,9 +284,7 @@ def hybrid_forward(self, F, past_target: Tensor) -> Tensor: ) dec_inp_static, dec_inp_dynamic, _ = self.enc2dec( - enc_output_static, - enc_output_dynamic, - self.future_feat_dynamic_real, + enc_output_static, enc_output_dynamic, future_feat_dynamic_real ) dec_output = self.decoder(dec_inp_dynamic, dec_inp_static) diff --git a/src/gluonts/model/seq2seq/_transform.py b/src/gluonts/model/seq2seq/_transform.py index 72df2a0d8e..d8f3564a63 100644 --- a/src/gluonts/model/seq2seq/_transform.py +++ b/src/gluonts/model/seq2seq/_transform.py @@ -130,8 +130,8 @@ def flatmap_transform( forking_dec_field = np.zeros( shape=(self.enc_len, self.dec_len) + d3 ) + skip = max(0, self.enc_len - sampling_idx) - skip = max(0, self.enc_len - 1 - sampling_idx) for dec_field, idx in zip( forking_dec_field[skip:], range(start_idx + 1, start_idx + self.enc_len + 1), From 592eb673bb76a4362f2a5b450e5fb4765050d683 Mon Sep 17 00:00:00 2001 From: Jasper Schulz Date: Wed, 1 Apr 2020 17:38:02 +0200 Subject: [PATCH 11/44] Another fixup. --- src/gluonts/block/encoder.py | 6 +++++- test/model/seq2seq/test_forking_sequence_splitter.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/gluonts/block/encoder.py b/src/gluonts/block/encoder.py index f69cc696f2..77d7ea4f68 100644 --- a/src/gluonts/block/encoder.py +++ b/src/gluonts/block/encoder.py @@ -74,7 +74,11 @@ def hybrid_forward( raise NotImplementedError def _assemble_inputs( - F, target: Tensor, static_features: Tensor, dynamic_features: Tensor + self, + F, + target: Tensor, + static_features: Tensor, + dynamic_features: Tensor, ) -> Tensor: """ Assemble features from target, static features, and the dynamic diff --git a/test/model/seq2seq/test_forking_sequence_splitter.py b/test/model/seq2seq/test_forking_sequence_splitter.py index 2087f86551..42d2881bc6 100644 --- a/test/model/seq2seq/test_forking_sequence_splitter.py +++ b/test/model/seq2seq/test_forking_sequence_splitter.py @@ -46,7 +46,7 @@ def test_forking_sequence_splitter() -> None: dec_len = 3 trans = transform.Chain( - trans=[ + [ transform.AddAgeFeature( target_field=FieldName.TARGET, output_field="age", @@ -61,7 +61,7 @@ def test_forking_sequence_splitter() -> None: ] ) - out = trans(iter(ds), is_train=True) + out = trans(ds, is_train=True) transformed_data = next(iter(out)) future_target = np.array( From db57304e34d532492741fa2abf116bbba7bf58fd Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Tue, 14 Apr 2020 14:23:45 +0200 Subject: [PATCH 12/44] Fixing formatting and tests. --- src/gluonts/model/estimator.py | 2 +- src/gluonts/model/predictor.py | 2 +- src/gluonts/model/seq2seq/_forking_network.py | 5 +++++ test/model/seq2seq/test_encoders.py | 14 ++++++++++---- test/model/seq2seq/test_model.py | 4 ++-- 5 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/gluonts/model/estimator.py b/src/gluonts/model/estimator.py index f3c95aa17b..b48dfa0e3b 100644 --- a/src/gluonts/model/estimator.py +++ b/src/gluonts/model/estimator.py @@ -12,7 +12,7 @@ # permissions and limitations under the License. # Standard library imports -from typing import NamedTuple, Optional +from typing import NamedTuple, Optional, Iterator # Third-party imports import numpy as np diff --git a/src/gluonts/model/predictor.py b/src/gluonts/model/predictor.py index edd62ea6c3..a3031107de 100644 --- a/src/gluonts/model/predictor.py +++ b/src/gluonts/model/predictor.py @@ -161,7 +161,7 @@ def derive_auto_fields(cls, train_iter): return {} @classmethod - def from_inputs(cls, train_iter, params): + def from_inputs(cls, train_iter, **params): auto_params = cls.derive_auto_fields(train_iter) return cls.from_hyperparameters(**auto_params, **params) diff --git a/src/gluonts/model/seq2seq/_forking_network.py b/src/gluonts/model/seq2seq/_forking_network.py index b497a1d482..d8020e8d05 100644 --- a/src/gluonts/model/seq2seq/_forking_network.py +++ b/src/gluonts/model/seq2seq/_forking_network.py @@ -14,6 +14,8 @@ # Third-party imports import mxnet as mx from mxnet import gluon +from mxnet import nd + # First-party imports from gluonts.block.decoder import Seq2SeqDecoder @@ -24,6 +26,9 @@ from gluonts.model.common import Tensor +nd_None = nd.array([]) + + class ForkingSeq2SeqNetworkBase(gluon.HybridBlock): """ Base network for the :class:`ForkingSeq2SeqEstimator`. diff --git a/test/model/seq2seq/test_encoders.py b/test/model/seq2seq/test_encoders.py index 2fa9f35e44..7ac949d6df 100644 --- a/test/model/seq2seq/test_encoders.py +++ b/test/model/seq2seq/test_encoders.py @@ -18,8 +18,6 @@ # First-party imports from gluonts.block.encoder import HierarchicalCausalConv1DEncoder -nd_None = nd.array([]) - @pytest.mark.parametrize("use_residual", [True, False]) @pytest.mark.parametrize("hybridize", [True, False]) @@ -38,7 +36,12 @@ def test_hierarchical_cnn_encoders(use_residual, hybridize) -> None: dial_seq = [1, 3, 9] cnn = HierarchicalCausalConv1DEncoder( - dial_seq, ks_seq, chl_dim, use_residual, use_dynamic_feat=True, use_static_feat=True, + dial_seq, + ks_seq, + chl_dim, + use_residual, + use_dynamic_feat=True, + use_static_feat=True, ) cnn.collect_params().initialize() @@ -47,4 +50,7 @@ def test_hierarchical_cnn_encoders(use_residual, hybridize) -> None: true_shape = (num_ts, ts_len, 31) if use_residual else (num_ts, ts_len, 30) - assert cnn(test_data, test_static_feat, test_dynamic_feat)[1].shape == true_shape + assert ( + cnn(test_data, test_static_feat, test_dynamic_feat)[1].shape + == true_shape + ) diff --git a/test/model/seq2seq/test_model.py b/test/model/seq2seq/test_model.py index 6c8487e5e1..ca4cc135f4 100644 --- a/test/model/seq2seq/test_model.py +++ b/test/model/seq2seq/test_model.py @@ -43,9 +43,9 @@ def Estimator(request): @pytest.mark.parametrize("hybridize", [True, False]) def test_accuracy(Estimator, accuracy_test, hyperparameters, hybridize): - hyperparameters.update(num_batches_per_epoch=200, hybridize=hybridize) + hyperparameters.update(num_batches_per_epoch=100, hybridize=hybridize) - accuracy_test(Estimator, hyperparameters, accuracy=0.2) + accuracy_test(Estimator, hyperparameters, accuracy=0.25) def test_repr(Estimator, repr_test, hyperparameters): From ecf31e42df5b7fdce05816a65c78ef6a7376f7ff Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Wed, 15 Apr 2020 14:01:06 +0200 Subject: [PATCH 13/44] A lot of TODOs and comments added. --- src/gluonts/block/decoder.py | 2 ++ src/gluonts/model/estimator.py | 6 ++++ .../model/seq2seq/_forking_estimator.py | 7 +++- src/gluonts/model/seq2seq/_forking_network.py | 13 +++++-- .../model/seq2seq/_mq_dnn_estimator.py | 35 ++++++++++++++----- .../model/seq2seq/_seq2seq_estimator.py | 4 +++ 6 files changed, 55 insertions(+), 12 deletions(-) diff --git a/src/gluonts/block/decoder.py b/src/gluonts/block/decoder.py index 9b0e26e28f..69eeeaaff0 100644 --- a/src/gluonts/block/decoder.py +++ b/src/gluonts/block/decoder.py @@ -52,6 +52,7 @@ def hybrid_forward( pass +# TODO: add support for static variables class ForkingMLPDecoder(Seq2SeqDecoder): """ Multilayer perceptron decoder for sequence-to-sequence models. @@ -104,6 +105,7 @@ def __init__( ) self.model.add(layer) + # TODO: add support for static input def hybrid_forward( self, F, dynamic_input: Tensor, static_input: Tensor = None ) -> Tensor: diff --git a/src/gluonts/model/estimator.py b/src/gluonts/model/estimator.py index b48dfa0e3b..0668d19ce6 100644 --- a/src/gluonts/model/estimator.py +++ b/src/gluonts/model/estimator.py @@ -75,7 +75,13 @@ def derive_auto_fields(cls, train_iter): @classmethod def from_inputs(cls, train_iter, **params): + # auto_params usually include `use_feat_dynamic_real`, `use_feat_static_cat` and `cardinality` auto_params = cls.derive_auto_fields(train_iter) + # FIXME: probably params should take precedence over auto_params, since they were deliberately set, + # however, on that case this method does not make sense, since if params says `use_feat_dynamic_real`=True + # but `auto_params`=False, then this will lead to an error, since the appropriate data does not exist. + # This the only context in which this method makes sense is when auto_params take precedence, which could + # lead to overwriting of explicit parameters. In this case a warning should be issued. return cls.from_hyperparameters(**auto_params, **params) diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index 77e49e0f47..eee591bdbb 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -127,6 +127,9 @@ def __init__( self.add_time_feature = add_time_feature self.add_age_feature = add_age_feature + # TODO: refactor this variable name: dynamic_network, in fact it + # is not even necessary as is, because this is how use_dynamic_feat was + # set in MQCNNEstimator and otherwise its not used, i.e. False # is target only network or not? self.dynamic_network = ( use_dynamic_feat or add_time_feature or add_age_feature @@ -172,7 +175,9 @@ def create_transformation(self) -> Transformation: elif len(dynamic_feat_fields) == 1: chain.append( RenameFields( - {dynamic_feat_fields[0]: FieldName.FEAT_DYNAMIC_REAL} + { + dynamic_feat_fields[0]: FieldName.FEAT_DYNAMIC_REAL + } # TODO: find out why this is done ) ) diff --git a/src/gluonts/model/seq2seq/_forking_network.py b/src/gluonts/model/seq2seq/_forking_network.py index d8020e8d05..fc0af163f4 100644 --- a/src/gluonts/model/seq2seq/_forking_network.py +++ b/src/gluonts/model/seq2seq/_forking_network.py @@ -72,6 +72,8 @@ def __init__( self.loss = quantile_output.get_loss() +# TODO: THIS SHOULD NOT EXIST, the if else logic should be handled in +# the _forking_estimator.py, and possible assertions too class ForkingSeq2SeqNetwork: @validated() def __init__( @@ -129,6 +131,8 @@ def get_prediction_network(self) -> ForkingSeq2SeqNetworkBase: raise NotImplementedError +# TODO: figure out whether we need 2 classes each, in fact we would need 4 each, +# if adding categorical with this technique, does not seem reasonable class ForkingSeq2SeqTrainingNetwork(ForkingSeq2SeqNetworkBase): # noinspection PyMethodOverriding def hybrid_forward( @@ -155,13 +159,18 @@ def hybrid_forward( # FIXME: can we factor out a common prefix in the base network? feat_static_real = F.zeros(shape=(1,)) - past_feat_dynamic_real = F.zeros(shape=(1,)) + # TODO: THIS IS OVERWRITING THE ARGUMENT?!?! (REMOVING IT makes add time and age feature work): + # past_feat_dynamic_real = F.zeros(shape=(1,)) future_feat_dynamic_real = F.zeros(shape=(1,)) + # arguments: target, static_features, dynamic_features enc_output_static, enc_output_dynamic = self.encoder( past_target, feat_static_real, past_feat_dynamic_real ) + # arguments: encoder_output_static, encoder_output_dynamic, future_features + # TODO: figure out how future_features is supposed to be used: since no distinction + # between dynamic and static anymore (shape is (N, T, C) suggesting dynamic feature) dec_input_static, dec_input_dynamic, _ = self.enc2dec( enc_output_static, enc_output_dynamic, future_feat_dynamic_real ) @@ -169,8 +178,6 @@ def hybrid_forward( dec_output = self.decoder(dec_input_dynamic, dec_input_static) dec_dist_output = self.quantile_proj(dec_output) - # print(f"decoder output: {dec_dist_output.shape}") - loss = self.loss(future_target, dec_dist_output) return loss.mean(axis=1) diff --git a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py index 362b89a073..62eb9cb28d 100644 --- a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py +++ b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py @@ -12,7 +12,7 @@ # permissions and limitations under the License. # Standard library imports -from typing import List, Optional +from typing import List, Optional, Sized # First-party imports from gluonts.dataset.stat import calculate_dataset_statistics @@ -33,7 +33,13 @@ import numpy as np import mxnet as mx +# TODO: in general, it seems unnecessary to put the MQCNN and MQRNN into Seq2Seq since their commonality in code with +# the rest is just the abstract classes Seq2SeqDecoder and Se2SeqEncoder, +# and the Estimator is not based on Seq2SeqEstimator! + +# TODO: THIS CLASS SHOULD NOT EXIST, the decoder +# can be defined in each current subclass class MQDNNEstimator(ForkingSeq2SeqEstimator): """ Intermediate base class for a Multi-horizon Quantile Deep Neural Network @@ -72,7 +78,7 @@ def __init__( quantile_output = QuantileOutput(quantiles) - super(MQDNNEstimator, self).__init__( + super().__init__( encoder=encoder, decoder=decoder, quantile_output=quantile_output, @@ -86,6 +92,7 @@ def __init__( ) +# TODO: integrate MQDNN, change arguments to non mutable class MQCNNEstimator(MQDNNEstimator): """ An :class:`MQDNNEstimator` with a Convolutional Neural Network (CNN) as an @@ -99,8 +106,12 @@ def __init__( freq: str, context_length: Optional[int] = None, use_feat_dynamic_real: bool = False, - add_time_feature: bool = False, + use_feat_static_cat: bool = False, + cardinality: Optional[List[int]] = None, + # TODO: fix add age and time features, currently dont work + # (might be resolved through commenting out line 161 of _forkin_network.py) add_age_feature: bool = False, + add_time_feature: bool = False, seed: Optional[int] = None, decoder_mlp_dim_seq: List[int] = [20], channels_seq: List[int] = [30, 30, 30], @@ -114,10 +125,13 @@ def __init__( ) -> None: use_dynamic_feat_cnn = False - if use_feat_dynamic_real or add_age_feature or add_time_feature: use_dynamic_feat_cnn = True + use_static_feat_cnn = False + if use_feat_static_cat or use_feat_static_cat: + use_static_feat_cnn = True + if seed: np.random.seed(seed) mx.random.seed(seed) @@ -129,18 +143,20 @@ def __init__( f"{len(dilation_seq)} vs. {len(kernel_size_seq)}" ) + # TODO: figure out whether this needs any additional modification; doesn't seems o encoder = HierarchicalCausalConv1DEncoder( dilation_seq=dilation_seq, kernel_size_seq=channels_seq, channels_seq=kernel_size_seq, use_residual=use_residual, use_dynamic_feat=use_dynamic_feat_cnn, + use_static_feat=use_static_feat_cnn, prefix="encoder_", ) - super(MQCNNEstimator, self).__init__( + super().__init__( encoder=encoder, - use_dynamic_feat=use_feat_dynamic_real, + use_dynamic_feat=use_feat_dynamic_real, # TODO: make use_dynamic_feat this more specific add_time_feature=add_time_feature, add_age_feature=add_age_feature, decoder_mlp_dim_seq=decoder_mlp_dim_seq, @@ -151,6 +167,7 @@ def __init__( quantiles=quantiles, ) + # TODO: does this work? I think this might @classmethod def derive_auto_fields(cls, train_iter): stats = calculate_dataset_statistics(train_iter) @@ -162,6 +179,7 @@ def derive_auto_fields(cls, train_iter): } +# TODO: integrate MQDNN, change arguments to non mutable class MQRNNEstimator(MQDNNEstimator): """ An :class:`MQDNNEstimator` with a Recurrent Neural Network (RNN) as an @@ -185,7 +203,7 @@ def __init__( bidirectional=True, prefix="encoder_", ) - super(MQRNNEstimator, self).__init__( + super().__init__( encoder=encoder, decoder_mlp_dim_seq=decoder_mlp_dim_seq, freq=freq, @@ -196,6 +214,7 @@ def __init__( ) +# TODO: REMOVE THIS if __name__ == "__main__": from gluonts.dataset.repository.datasets import ( get_dataset, @@ -232,7 +251,7 @@ def __init__( ) agg_metrics, item_metrics = Evaluator()( - ts_it, forecast_it, num_series=len(dataset.test) # type: ignore + ts_it, forecast_it, num_series=len(list(dataset.test)) ) metrics.append(agg_metrics["wQuantileLoss[0.5]"]) diff --git a/src/gluonts/model/seq2seq/_seq2seq_estimator.py b/src/gluonts/model/seq2seq/_seq2seq_estimator.py index e543887141..2b3211f161 100644 --- a/src/gluonts/model/seq2seq/_seq2seq_estimator.py +++ b/src/gluonts/model/seq2seq/_seq2seq_estimator.py @@ -45,6 +45,7 @@ from ._seq2seq_network import Seq2SeqPredictionNetwork, Seq2SeqTrainingNetwork +# TODO: fix mutable arguments class Seq2SeqEstimator(GluonEstimator): """ Quantile-Regression Sequence-to-Sequence Estimator @@ -181,6 +182,7 @@ def create_predictor( ) +# TODO: fix mutable arguments class MLP2QRForecaster(Seq2SeqEstimator): @validated() def __init__( @@ -215,6 +217,7 @@ def __init__( ) +# TODO: fix mutable arguments class RNN2QRForecaster(Seq2SeqEstimator): @validated() def __init__( @@ -257,6 +260,7 @@ def __init__( ) +# TODO: fix mutable arguments class CNN2QRForecaster(Seq2SeqEstimator): @validated() def __init__( From dab925c074fb059ab880475ec2e866e252c9d7a4 Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Thu, 16 Apr 2020 12:04:24 +0200 Subject: [PATCH 14/44] Merge from production. --- .../model/seq2seq/_forking_estimator.py | 18 +----- src/gluonts/model/seq2seq/_forking_network.py | 64 ++++++++----------- .../model/seq2seq/_mq_dnn_estimator.py | 12 ++-- src/gluonts/model/seq2seq/_transform.py | 2 +- 4 files changed, 34 insertions(+), 62 deletions(-) diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index eee591bdbb..e326eed030 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -28,7 +28,6 @@ from gluonts.support.util import copy_parameters from gluonts.trainer import Trainer from gluonts.transform import ( - AsNumpyArray, AddAgeFeature, AddTimeFeatures, Chain, @@ -41,12 +40,8 @@ # Relative imports from gluonts.time_feature import time_features_from_frequency_str from ._forking_network import ( - ForkingSeq2SeqPredictionNetwork, - ForkingSeq2SeqTrainingNetwork, ForkingSeq2SeqNetwork, ForkingSeq2SeqNetworkBase, - ForkingSeq2SeqTargetPredictionNetwork, - ForkingSeq2SeqTargetTrainingNetwork, ) from ._transform import ForkingSequenceSplitter @@ -175,10 +170,8 @@ def create_transformation(self) -> Transformation: elif len(dynamic_feat_fields) == 1: chain.append( RenameFields( - { - dynamic_feat_fields[0]: FieldName.FEAT_DYNAMIC_REAL - } # TODO: find out why this is done - ) + {dynamic_feat_fields[0]: FieldName.FEAT_DYNAMIC_REAL} + ) # TODO: find out why this is done ) decoder_field = ( @@ -197,13 +190,6 @@ def create_transformation(self) -> Transformation: return Chain(chain) def create_training_network(self) -> ForkingSeq2SeqNetworkBase: - # return ForkingSeq2SeqTrainingNetwork( - # encoder=self.encoder, - # enc2dec=PassThroughEnc2Dec(), - # decoder=self.decoder, - # quantile_output=self.quantile_output, - # ) - return ForkingSeq2SeqNetwork( encoder=self.encoder, enc2dec=PassThroughEnc2Dec(), diff --git a/src/gluonts/model/seq2seq/_forking_network.py b/src/gluonts/model/seq2seq/_forking_network.py index fc0af163f4..026ecdd3d8 100644 --- a/src/gluonts/model/seq2seq/_forking_network.py +++ b/src/gluonts/model/seq2seq/_forking_network.py @@ -12,10 +12,7 @@ # permissions and limitations under the License. # Third-party imports -import mxnet as mx -from mxnet import gluon -from mxnet import nd - +from mxnet import gluon, nd # First-party imports from gluonts.block.decoder import Seq2SeqDecoder @@ -25,7 +22,6 @@ from gluonts.core.component import validated from gluonts.model.common import Tensor - nd_None = nd.array([]) @@ -63,17 +59,15 @@ def __init__( self.decoder = decoder self.quantile_output = quantile_output - # self.feat_static_real = F.zeros(shape=(1,)) - # self.past_feat_dynamic_real = F.zeros(shape=(1,)) - # self.future_feat_dynamic_real = F.zeros(shape=(1,)) + self.feat_static_real = nd_None + self.past_feat_dynamic_real = nd_None + self.future_feat_dynamic_real = nd_None with self.name_scope(): self.quantile_proj = quantile_output.get_quantile_proj() self.loss = quantile_output.get_loss() -# TODO: THIS SHOULD NOT EXIST, the if else logic should be handled in -# the _forking_estimator.py, and possible assertions too class ForkingSeq2SeqNetwork: @validated() def __init__( @@ -113,7 +107,7 @@ def get_training_network(self) -> ForkingSeq2SeqNetworkBase: raise NotImplementedError def get_prediction_network(self) -> ForkingSeq2SeqNetworkBase: - if not self.use_static_cat and not self.use_dynamic_real: + if self.use_static_cat is False and self.use_dynamic_real is False: return ForkingSeq2SeqTargetPredictionNetwork( encoder=self.encoder, enc2dec=self.enc2dec, @@ -157,27 +151,29 @@ def hybrid_forward( loss with shape (FIXME, FIXME) """ - # FIXME: can we factor out a common prefix in the base network? - feat_static_real = F.zeros(shape=(1,)) - # TODO: THIS IS OVERWRITING THE ARGUMENT?!?! (REMOVING IT makes add time and age feature work): - # past_feat_dynamic_real = F.zeros(shape=(1,)) - future_feat_dynamic_real = F.zeros(shape=(1,)) + # print(f"past target: {past_target.shape}") + # print(f"past_feat_dynamic_real: {past_feat_dynamic_real.shape}") + # print(f"future_target: {future_target.shape}") # arguments: target, static_features, dynamic_features enc_output_static, enc_output_dynamic = self.encoder( - past_target, feat_static_real, past_feat_dynamic_real + past_target, self.feat_static_real, past_feat_dynamic_real ) # arguments: encoder_output_static, encoder_output_dynamic, future_features # TODO: figure out how future_features is supposed to be used: since no distinction # between dynamic and static anymore (shape is (N, T, C) suggesting dynamic feature) dec_input_static, dec_input_dynamic, _ = self.enc2dec( - enc_output_static, enc_output_dynamic, future_feat_dynamic_real + enc_output_static, + enc_output_dynamic, + self.future_feat_dynamic_real, ) dec_output = self.decoder(dec_input_dynamic, dec_input_static) dec_dist_output = self.quantile_proj(dec_output) + # print(f"decoder output: {dec_dist_output.shape}") + loss = self.loss(future_target, dec_dist_output) return loss.mean(axis=1) @@ -202,11 +198,8 @@ def hybrid_forward( # FIXME: can we factor out a common prefix in the base network? - feat_static_real = F.zeros(shape=(1,)) - future_feat_dynamic_real = F.zeros(shape=(1,)) - enc_output_static, enc_output_dynamic = self.encoder( - past_target, feat_static_real, past_feat_dynamic_real + past_target, self.feat_static_real, past_feat_dynamic_real ) enc_output_static = ( @@ -214,7 +207,9 @@ def hybrid_forward( ) dec_inp_static, dec_inp_dynamic, _ = self.enc2dec( - enc_output_static, enc_output_dynamic, future_feat_dynamic_real, + enc_output_static, + enc_output_dynamic, + self.future_feat_dynamic_real, ) dec_output = self.decoder(dec_inp_dynamic, dec_inp_static) @@ -245,16 +240,14 @@ def hybrid_forward( loss with shape (FIXME, FIXME) """ - feat_static_real = F.zeros(shape=(1,)) - past_feat_dynamic_real = F.zeros(shape=(1,)) - future_feat_dynamic_real = F.zeros(shape=(1,)) - enc_output_static, enc_output_dynamic = self.encoder( - past_target, feat_static_real, past_feat_dynamic_real + past_target, self.feat_static_real, self.past_feat_dynamic_real ) dec_input_static, dec_input_dynamic, _ = self.enc2dec( - enc_output_static, enc_output_dynamic, future_feat_dynamic_real + enc_output_static, + enc_output_dynamic, + self.future_feat_dynamic_real, ) dec_output = self.decoder(dec_input_dynamic, dec_input_static) @@ -281,22 +274,19 @@ def hybrid_forward(self, F, past_target: Tensor) -> Tensor: """ # FIXME: can we factor out a common prefix in the base network? - feat_static_real = F.zeros(shape=(1,)) - past_feat_dynamic_real = F.zeros(shape=(1,)) - future_feat_dynamic_real = F.zeros(shape=(1,)) enc_output_static, enc_output_dynamic = self.encoder( - past_target, feat_static_real, past_feat_dynamic_real + past_target, self.feat_static_real, self.past_feat_dynamic_real ) enc_output_static = ( - F.zeros(shape=(1,)) - if enc_output_static is None - else enc_output_static + nd_None if enc_output_static is None else enc_output_static ) dec_inp_static, dec_inp_dynamic, _ = self.enc2dec( - enc_output_static, enc_output_dynamic, future_feat_dynamic_real + enc_output_static, + enc_output_dynamic, + self.future_feat_dynamic_real, ) dec_output = self.decoder(dec_inp_dynamic, dec_inp_static) diff --git a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py index 62eb9cb28d..0259ccdb2a 100644 --- a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py +++ b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py @@ -12,7 +12,7 @@ # permissions and limitations under the License. # Standard library imports -from typing import List, Optional, Sized +from typing import List, Optional # First-party imports from gluonts.dataset.stat import calculate_dataset_statistics @@ -108,10 +108,8 @@ def __init__( use_feat_dynamic_real: bool = False, use_feat_static_cat: bool = False, cardinality: Optional[List[int]] = None, - # TODO: fix add age and time features, currently dont work - # (might be resolved through commenting out line 161 of _forkin_network.py) - add_age_feature: bool = False, add_time_feature: bool = False, + add_age_feature: bool = False, seed: Optional[int] = None, decoder_mlp_dim_seq: List[int] = [20], channels_seq: List[int] = [30, 30, 30], @@ -146,11 +144,10 @@ def __init__( # TODO: figure out whether this needs any additional modification; doesn't seems o encoder = HierarchicalCausalConv1DEncoder( dilation_seq=dilation_seq, - kernel_size_seq=channels_seq, - channels_seq=kernel_size_seq, + kernel_size_seq=kernel_size_seq, + channels_seq=channels_seq, use_residual=use_residual, use_dynamic_feat=use_dynamic_feat_cnn, - use_static_feat=use_static_feat_cnn, prefix="encoder_", ) @@ -167,7 +164,6 @@ def __init__( quantiles=quantiles, ) - # TODO: does this work? I think this might @classmethod def derive_auto_fields(cls, train_iter): stats = calculate_dataset_statistics(train_iter) diff --git a/src/gluonts/model/seq2seq/_transform.py b/src/gluonts/model/seq2seq/_transform.py index d8f3564a63..72df2a0d8e 100644 --- a/src/gluonts/model/seq2seq/_transform.py +++ b/src/gluonts/model/seq2seq/_transform.py @@ -130,8 +130,8 @@ def flatmap_transform( forking_dec_field = np.zeros( shape=(self.enc_len, self.dec_len) + d3 ) - skip = max(0, self.enc_len - sampling_idx) + skip = max(0, self.enc_len - 1 - sampling_idx) for dec_field, idx in zip( forking_dec_field[skip:], range(start_idx + 1, start_idx + self.enc_len + 1), From 923510bdb881411a9787241dc406e1127a688b95 Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Thu, 16 Apr 2020 13:30:53 +0200 Subject: [PATCH 15/44] Fixing mq_dnn single quantile error and type errors. --- src/gluonts/block/quantile_output.py | 9 ++- src/gluonts/model/seq2seq/_forking_network.py | 66 ++++++++++--------- .../model/seq2seq/_mq_dnn_estimator.py | 1 + src/gluonts/model/seq2seq/_transform.py | 2 +- test/model/seq2seq/test_model.py | 9 ++- 5 files changed, 50 insertions(+), 37 deletions(-) diff --git a/src/gluonts/block/quantile_output.py b/src/gluonts/block/quantile_output.py index 9b0fd38950..3ae4f54d62 100644 --- a/src/gluonts/block/quantile_output.py +++ b/src/gluonts/block/quantile_output.py @@ -86,9 +86,12 @@ def hybrid_forward( Tensor weighted sum of the quantile losses, shape N1 x N1 x ... Nk """ - y_pred_all = F.split( - y_pred, axis=-1, num_outputs=self.num_quantiles, squeeze_axis=1 - ) + if self.num_quantiles > 1: + y_pred_all = F.split( + y_pred, axis=-1, num_outputs=self.num_quantiles, squeeze_axis=1 + ) + else: + y_pred_all = [F.squeeze(y_pred, axis=-1)] qt_loss = [] for i, y_pred_q in enumerate(y_pred_all): diff --git a/src/gluonts/model/seq2seq/_forking_network.py b/src/gluonts/model/seq2seq/_forking_network.py index 026ecdd3d8..0d5acc6bc7 100644 --- a/src/gluonts/model/seq2seq/_forking_network.py +++ b/src/gluonts/model/seq2seq/_forking_network.py @@ -12,7 +12,9 @@ # permissions and limitations under the License. # Third-party imports -from mxnet import gluon, nd +import mxnet as mx +from mxnet import gluon + # First-party imports from gluonts.block.decoder import Seq2SeqDecoder @@ -22,8 +24,6 @@ from gluonts.core.component import validated from gluonts.model.common import Tensor -nd_None = nd.array([]) - class ForkingSeq2SeqNetworkBase(gluon.HybridBlock): """ @@ -59,15 +59,13 @@ def __init__( self.decoder = decoder self.quantile_output = quantile_output - self.feat_static_real = nd_None - self.past_feat_dynamic_real = nd_None - self.future_feat_dynamic_real = nd_None - with self.name_scope(): self.quantile_proj = quantile_output.get_quantile_proj() self.loss = quantile_output.get_loss() +# TODO: THIS SHOULD NOT EXIST, the if else logic should be handled in +# the _forking_estimator.py, and possible assertions too class ForkingSeq2SeqNetwork: @validated() def __init__( @@ -107,7 +105,7 @@ def get_training_network(self) -> ForkingSeq2SeqNetworkBase: raise NotImplementedError def get_prediction_network(self) -> ForkingSeq2SeqNetworkBase: - if self.use_static_cat is False and self.use_dynamic_real is False: + if not self.use_static_cat and not self.use_dynamic_real: return ForkingSeq2SeqTargetPredictionNetwork( encoder=self.encoder, enc2dec=self.enc2dec, @@ -151,29 +149,27 @@ def hybrid_forward( loss with shape (FIXME, FIXME) """ - # print(f"past target: {past_target.shape}") - # print(f"past_feat_dynamic_real: {past_feat_dynamic_real.shape}") - # print(f"future_target: {future_target.shape}") + # FIXME: can we factor out a common prefix in the base network? + feat_static_real = F.zeros(shape=(1,)) + # TODO: THIS IS OVERWRITING THE ARGUMENT?!?! (REMOVING IT makes add time and age feature work): + # past_feat_dynamic_real = F.zeros(shape=(1,)) + future_feat_dynamic_real = F.zeros(shape=(1,)) # arguments: target, static_features, dynamic_features enc_output_static, enc_output_dynamic = self.encoder( - past_target, self.feat_static_real, past_feat_dynamic_real + past_target, feat_static_real, past_feat_dynamic_real ) # arguments: encoder_output_static, encoder_output_dynamic, future_features # TODO: figure out how future_features is supposed to be used: since no distinction # between dynamic and static anymore (shape is (N, T, C) suggesting dynamic feature) dec_input_static, dec_input_dynamic, _ = self.enc2dec( - enc_output_static, - enc_output_dynamic, - self.future_feat_dynamic_real, + enc_output_static, enc_output_dynamic, future_feat_dynamic_real ) dec_output = self.decoder(dec_input_dynamic, dec_input_static) dec_dist_output = self.quantile_proj(dec_output) - # print(f"decoder output: {dec_dist_output.shape}") - loss = self.loss(future_target, dec_dist_output) return loss.mean(axis=1) @@ -198,18 +194,21 @@ def hybrid_forward( # FIXME: can we factor out a common prefix in the base network? + feat_static_real = F.zeros(shape=(1,)) + future_feat_dynamic_real = F.zeros(shape=(1,)) + enc_output_static, enc_output_dynamic = self.encoder( - past_target, self.feat_static_real, past_feat_dynamic_real + past_target, feat_static_real, past_feat_dynamic_real ) enc_output_static = ( - nd_None if enc_output_static is None else enc_output_static + F.zeros(shape=(1,)) + if enc_output_static is None + else enc_output_static ) dec_inp_static, dec_inp_dynamic, _ = self.enc2dec( - enc_output_static, - enc_output_dynamic, - self.future_feat_dynamic_real, + enc_output_static, enc_output_dynamic, future_feat_dynamic_real, ) dec_output = self.decoder(dec_inp_dynamic, dec_inp_static) @@ -240,14 +239,16 @@ def hybrid_forward( loss with shape (FIXME, FIXME) """ + feat_static_real = F.zeros(shape=(1,)) + past_feat_dynamic_real = F.zeros(shape=(1,)) + future_feat_dynamic_real = F.zeros(shape=(1,)) + enc_output_static, enc_output_dynamic = self.encoder( - past_target, self.feat_static_real, self.past_feat_dynamic_real + past_target, feat_static_real, past_feat_dynamic_real ) dec_input_static, dec_input_dynamic, _ = self.enc2dec( - enc_output_static, - enc_output_dynamic, - self.future_feat_dynamic_real, + enc_output_static, enc_output_dynamic, future_feat_dynamic_real ) dec_output = self.decoder(dec_input_dynamic, dec_input_static) @@ -274,19 +275,22 @@ def hybrid_forward(self, F, past_target: Tensor) -> Tensor: """ # FIXME: can we factor out a common prefix in the base network? + feat_static_real = F.zeros(shape=(1,)) + past_feat_dynamic_real = F.zeros(shape=(1,)) + future_feat_dynamic_real = F.zeros(shape=(1,)) enc_output_static, enc_output_dynamic = self.encoder( - past_target, self.feat_static_real, self.past_feat_dynamic_real + past_target, feat_static_real, past_feat_dynamic_real ) enc_output_static = ( - nd_None if enc_output_static is None else enc_output_static + F.zeros(shape=(1,)) + if enc_output_static is None + else enc_output_static ) dec_inp_static, dec_inp_dynamic, _ = self.enc2dec( - enc_output_static, - enc_output_dynamic, - self.future_feat_dynamic_real, + enc_output_static, enc_output_dynamic, future_feat_dynamic_real ) dec_output = self.decoder(dec_inp_dynamic, dec_inp_static) diff --git a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py index 0259ccdb2a..fdc60fd4ab 100644 --- a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py +++ b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py @@ -148,6 +148,7 @@ def __init__( channels_seq=channels_seq, use_residual=use_residual, use_dynamic_feat=use_dynamic_feat_cnn, + # use_static_feat=use_static_feat_cnn, # TODO: enable this prefix="encoder_", ) diff --git a/src/gluonts/model/seq2seq/_transform.py b/src/gluonts/model/seq2seq/_transform.py index 72df2a0d8e..1943e1d0c4 100644 --- a/src/gluonts/model/seq2seq/_transform.py +++ b/src/gluonts/model/seq2seq/_transform.py @@ -131,7 +131,7 @@ def flatmap_transform( shape=(self.enc_len, self.dec_len) + d3 ) - skip = max(0, self.enc_len - 1 - sampling_idx) + skip = max(0, self.enc_len - sampling_idx) for dec_field, idx in zip( forking_dec_field[skip:], range(start_idx + 1, start_idx + self.enc_len + 1), diff --git a/test/model/seq2seq/test_model.py b/test/model/seq2seq/test_model.py index ca4cc135f4..db78e1eddf 100644 --- a/test/model/seq2seq/test_model.py +++ b/test/model/seq2seq/test_model.py @@ -41,9 +41,14 @@ def Estimator(request): return request.param +@pytest.mark.parametrize("quantiles", [[0.1, 0.5, 0.9], [0.5]]) @pytest.mark.parametrize("hybridize", [True, False]) -def test_accuracy(Estimator, accuracy_test, hyperparameters, hybridize): - hyperparameters.update(num_batches_per_epoch=100, hybridize=hybridize) +def test_accuracy( + Estimator, accuracy_test, hyperparameters, hybridize, quantiles +): + hyperparameters.update( + num_batches_per_epoch=100, hybridize=hybridize, quantiles=quantiles + ) accuracy_test(Estimator, hyperparameters, accuracy=0.25) From e5530955f8a6b4905615534fd0300c4d2b35d12f Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Fri, 17 Apr 2020 14:08:13 +0200 Subject: [PATCH 16/44] Refactoring dnn_estimator file. --- .../model/seq2seq/_forking_estimator.py | 33 ++- .../model/seq2seq/_mq_dnn_estimator.py | 201 ++++++------------ 2 files changed, 90 insertions(+), 144 deletions(-) diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index e326eed030..5bf56ef1a8 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -101,14 +101,23 @@ def __init__( context_length: Optional[int] = None, trainer: Trainer = Trainer(), ) -> None: + super().__init__(trainer=trainer) + assert ( context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert ( prediction_length > 0 ), "The value of `prediction_length` should be > 0" - - super().__init__(trainer=trainer) + # assert (cardinality and use_feat_static_cat) or ( + # not (cardinality or use_feat_static_cat) + # ), "You should set `cardinality` if and only if `use_feat_static_cat=True`" + # assert cardinality is None or all( + # [c > 0 for c in cardinality] + # ), "Elements of `cardinality` should be > 0" + # assert embedding_dimension is None or all( + # [e > 0 for e in embedding_dimension] + # ), "Elements of `embedding_dimension` should be > 0" self.encoder = encoder self.decoder = decoder @@ -122,14 +131,26 @@ def __init__( self.add_time_feature = add_time_feature self.add_age_feature = add_age_feature + # self.use_feat_static_cat = use_feat_static_cat + # self.use_feat_dynamic_real = use_feat_dynamic_real + # self.cardinality = ( + # cardinality if cardinality and use_feat_static_cat else [1] + # ) + # self.embedding_dimension = ( + # embedding_dimension + # if embedding_dimension is not None + # else [min(50, (cat + 1) // 2) for cat in self.cardinality] + # ) + # TODO: refactor this variable name: dynamic_network, in fact it # is not even necessary as is, because this is how use_dynamic_feat was # set in MQCNNEstimator and otherwise its not used, i.e. False # is target only network or not? - self.dynamic_network = ( + self.use_dynamic_real = ( use_dynamic_feat or add_time_feature or add_age_feature ) - print(f"use_dynamic_network: {self.dynamic_network}") + + print(f"use_dynamic_network: {self.use_dynamic_real}") def create_transformation(self) -> Transformation: chain = [] @@ -195,7 +216,7 @@ def create_training_network(self) -> ForkingSeq2SeqNetworkBase: enc2dec=PassThroughEnc2Dec(), decoder=self.decoder, quantile_output=self.quantile_output, - use_dynamic_real=self.dynamic_network, + use_dynamic_real=self.use_dynamic_real, ).get_training_network() def create_predictor( @@ -214,7 +235,7 @@ def create_predictor( enc2dec=trained_network.enc2dec, decoder=trained_network.decoder, quantile_output=trained_network.quantile_output, - use_dynamic_real=self.dynamic_network, + use_dynamic_real=self.use_dynamic_real, ).get_prediction_network() copy_parameters(trained_network, prediction_network) diff --git a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py index fdc60fd4ab..67246f7f48 100644 --- a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py +++ b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py @@ -14,100 +14,42 @@ # Standard library imports from typing import List, Optional +# Third-party imports +import numpy as np +import mxnet as mx + # First-party imports from gluonts.dataset.stat import calculate_dataset_statistics -from gluonts.evaluation.backtest import make_evaluation_predictions from gluonts.block.decoder import ForkingMLPDecoder -from gluonts.block.encoder import ( - HierarchicalCausalConv1DEncoder, - RNNEncoder, - Seq2SeqEncoder, -) +from gluonts.block.encoder import HierarchicalCausalConv1DEncoder, RNNEncoder from gluonts.block.quantile_output import QuantileOutput from gluonts.core.component import validated from gluonts.trainer import Trainer - -# Relative imports from gluonts.model.seq2seq._forking_estimator import ForkingSeq2SeqEstimator -from gluonts.evaluation import Evaluator -import numpy as np -import mxnet as mx + # TODO: in general, it seems unnecessary to put the MQCNN and MQRNN into Seq2Seq since their commonality in code with # the rest is just the abstract classes Seq2SeqDecoder and Se2SeqEncoder, # and the Estimator is not based on Seq2SeqEstimator! -# TODO: THIS CLASS SHOULD NOT EXIST, the decoder -# can be defined in each current subclass -class MQDNNEstimator(ForkingSeq2SeqEstimator): - """ - Intermediate base class for a Multi-horizon Quantile Deep Neural Network - (MQ-DNN), [WTN+17]_. The class fixes the decoder is a multi-quantile MLP. - Subclasses fix the encoder to be either a Convolutional Neural Network - (MQ-CNN) or a Recurrent Neural Network (MQ-RNN). - """ - - @validated() - def __init__( - self, - encoder: Seq2SeqEncoder, - context_length: Optional[int], - prediction_length: int, - freq: str, - use_dynamic_feat: bool = False, - add_time_feature: bool = False, - add_age_feature: bool = False, - decoder_mlp_dim_seq: List[int] = [20], - quantiles: List[float] = list(), - trainer: Trainer = Trainer(), - ) -> None: - context_length = ( - prediction_length if context_length is None else context_length - ) - assert all( - [d > 0 for d in decoder_mlp_dim_seq] - ), "Elements of `mlp_hidden_dimension_seq` should be > 0" - - decoder = ForkingMLPDecoder( - dec_len=prediction_length, - final_dim=decoder_mlp_dim_seq[-1], - hidden_dimension_sequence=decoder_mlp_dim_seq[:-1], - prefix="decoder_", - ) - - quantile_output = QuantileOutput(quantiles) - - super().__init__( - encoder=encoder, - decoder=decoder, - quantile_output=quantile_output, - freq=freq, - use_dynamic_feat=use_dynamic_feat, - add_age_feature=add_age_feature, - add_time_feature=add_time_feature, - prediction_length=prediction_length, - context_length=context_length, - trainer=trainer, - ) - - # TODO: integrate MQDNN, change arguments to non mutable -class MQCNNEstimator(MQDNNEstimator): +class MQCNNEstimator(ForkingSeq2SeqEstimator): """ An :class:`MQDNNEstimator` with a Convolutional Neural Network (CNN) as an - encoder. Implements the MQ-CNN Forecaster, proposed in [WTN+17]_. + encoder and a multi-quantile MLP as a decoder. Implements the MQ-CNN Forecaster, proposed in [WTN+17]_. """ @validated() def __init__( self, - prediction_length: int, freq: str, + prediction_length: int, context_length: Optional[int] = None, use_feat_dynamic_real: bool = False, use_feat_static_cat: bool = False, - cardinality: Optional[List[int]] = None, + cardinality: List[int] = None, + embedding_dimension: List[int] = None, add_time_feature: bool = False, add_age_feature: bool = False, seed: Optional[int] = None, @@ -122,14 +64,6 @@ def __init__( trainer: Trainer = Trainer(), ) -> None: - use_dynamic_feat_cnn = False - if use_feat_dynamic_real or add_age_feature or add_time_feature: - use_dynamic_feat_cnn = True - - use_static_feat_cnn = False - if use_feat_static_cat or use_feat_static_cat: - use_static_feat_cnn = True - if seed: np.random.seed(seed) mx.random.seed(seed) @@ -140,29 +74,47 @@ def __init__( f"mismatch CNN configurations: {len(channels_seq)} vs. " f"{len(dilation_seq)} vs. {len(kernel_size_seq)}" ) + assert ( + prediction_length > 0 + ), f"Invalid prediction length: {prediction_length}." + assert all( + [d > 0 for d in decoder_mlp_dim_seq] + ), "Elements of `mlp_hidden_dimension_seq` should be > 0" + + use_dynamic_feat = ( + use_feat_dynamic_real or add_age_feature or add_time_feature + ) - # TODO: figure out whether this needs any additional modification; doesn't seems o encoder = HierarchicalCausalConv1DEncoder( dilation_seq=dilation_seq, kernel_size_seq=kernel_size_seq, channels_seq=channels_seq, use_residual=use_residual, - use_dynamic_feat=use_dynamic_feat_cnn, - # use_static_feat=use_static_feat_cnn, # TODO: enable this + use_dynamic_feat=use_dynamic_feat, + use_static_feat=use_feat_static_cat, prefix="encoder_", ) + decoder = ForkingMLPDecoder( + dec_len=prediction_length, + final_dim=decoder_mlp_dim_seq[-1], + hidden_dimension_sequence=decoder_mlp_dim_seq[:-1], + prefix="decoder_", + ) + + quantile_output = QuantileOutput(quantiles) + super().__init__( encoder=encoder, - use_dynamic_feat=use_feat_dynamic_real, # TODO: make use_dynamic_feat this more specific - add_time_feature=add_time_feature, - add_age_feature=add_age_feature, - decoder_mlp_dim_seq=decoder_mlp_dim_seq, + decoder=decoder, + quantile_output=quantile_output, freq=freq, prediction_length=prediction_length, - trainer=trainer, context_length=context_length, - quantiles=quantiles, + use_dynamic_feat=use_dynamic_feat, + add_time_feature=add_time_feature, + add_age_feature=add_age_feature, + trainer=trainer, ) @classmethod @@ -171,16 +123,16 @@ def derive_auto_fields(cls, train_iter): return { "use_feat_dynamic_real": stats.num_feat_dynamic_real > 0, - # "use_feat_static_cat": bool(stats.feat_static_cat), - # "cardinality": [len(cats) for cats in stats.feat_static_cat], + "use_feat_static_cat": bool(stats.feat_static_cat), + "cardinality": [len(cats) for cats in stats.feat_static_cat], } # TODO: integrate MQDNN, change arguments to non mutable -class MQRNNEstimator(MQDNNEstimator): +class MQRNNEstimator(ForkingSeq2SeqEstimator): """ An :class:`MQDNNEstimator` with a Recurrent Neural Network (RNN) as an - encoder. Implements the MQ-RNN Forecaster, proposed in [WTN+17]_. + encoder and a multi-quantile MLP as a decoder. Implements the MQ-RNN Forecaster, proposed in [WTN+17]_. """ @validated() @@ -193,6 +145,14 @@ def __init__( trainer: Trainer = Trainer(), quantiles: List[float] = list([0.1, 0.5, 0.9]), ) -> None: + + assert ( + prediction_length > 0 + ), f"Invalid prediction length: {prediction_length}." + assert all( + [d > 0 for d in decoder_mlp_dim_seq] + ), "Elements of `mlp_hidden_dimension_seq` should be > 0" + encoder = RNNEncoder( mode="gru", hidden_size=50, @@ -200,57 +160,22 @@ def __init__( bidirectional=True, prefix="encoder_", ) + + decoder = ForkingMLPDecoder( + dec_len=prediction_length, + final_dim=decoder_mlp_dim_seq[-1], + hidden_dimension_sequence=decoder_mlp_dim_seq[:-1], + prefix="decoder_", + ) + + quantile_output = QuantileOutput(quantiles) + super().__init__( encoder=encoder, - decoder_mlp_dim_seq=decoder_mlp_dim_seq, + decoder=decoder, + quantile_output=quantile_output, freq=freq, prediction_length=prediction_length, - trainer=trainer, context_length=context_length, - quantiles=quantiles, - ) - - -# TODO: REMOVE THIS -if __name__ == "__main__": - from gluonts.dataset.repository.datasets import ( - get_dataset, - dataset_recipes, - ) - - print(f"datasets available: {dataset_recipes.keys()}") - - # we pick m4_hourly as it only contains a few hundred time series - dataset = get_dataset("m4_hourly", regenerate=False) - - metrics = [] - - for _ in range(1): - estimator = MQCNNEstimator.from_inputs( - dataset.train, - # add_time_feature=True, - # add_age_feature=True, - prediction_length=dataset.metadata.prediction_length, - seed=42, - freq=dataset.metadata.freq, - quantiles=[0.5], - trainer=Trainer( - epochs=1, num_batches_per_epoch=10, hybridize=True - ), - ) - - predictor = estimator.train(dataset.train) - - assert dataset.test is not None - - forecast_it, ts_it = make_evaluation_predictions( - dataset.test, predictor=predictor, num_samples=100 - ) - - agg_metrics, item_metrics = Evaluator()( - ts_it, forecast_it, num_series=len(list(dataset.test)) + trainer=trainer, ) - - metrics.append(agg_metrics["wQuantileLoss[0.5]"]) - - print(metrics) From 1ba58718118775fdca85baed2795886bd8766aae Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Fri, 17 Apr 2020 17:48:20 +0200 Subject: [PATCH 17/44] Adding additional tests, minor bugfix. --- .../model/seq2seq/_forking_estimator.py | 37 ++++++++++++++++++- src/gluonts/model/seq2seq/_forking_network.py | 15 ++++---- .../model/seq2seq/_mq_dnn_estimator.py | 6 +-- test/model/seq2seq/test_model.py | 34 ++++++++++++++++- 4 files changed, 78 insertions(+), 14 deletions(-) diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index 5bf56ef1a8..2b086a1acd 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -35,6 +35,8 @@ Transformation, VstackFeatures, RenameFields, + SetField, + RemoveFields, ) # Relative imports @@ -153,6 +155,12 @@ def __init__( print(f"use_dynamic_network: {self.use_dynamic_real}") def create_transformation(self) -> Transformation: + # remove_field_names = [FieldName.FEAT_DYNAMIC_CAT] + # if not self.use_feat_static_real: + # remove_field_names.append(FieldName.FEAT_STATIC_REAL) + # if not self.use_feat_dynamic_real: + # remove_field_names.append(FieldName.FEAT_DYNAMIC_REAL) + chain = [] dynamic_feat_fields = [] @@ -188,18 +196,43 @@ def create_transformation(self) -> Transformation: input_fields=dynamic_feat_fields, ) ) - elif len(dynamic_feat_fields) == 1: + elif ( + len(dynamic_feat_fields) == 1 + and FieldName.FEAT_DYNAMIC_REAL not in dynamic_feat_fields + ): chain.append( RenameFields( {dynamic_feat_fields[0]: FieldName.FEAT_DYNAMIC_REAL} - ) # TODO: find out why this is done + ) ) + # TODO: current problem: cannot have no input, if some input provided, because the decoder will not + # accept input, however, the batches contain input, and python complains that + # it cannot map something to nothing? + + # if dynamic_feat_fields: + # chain.append( + # VstackFeatures( + # output_field=FieldName.FEAT_DYNAMIC_REAL, + # input_fields=dynamic_feat_fields, + # ) + # ) + # else: + # # Unfortunately we always need to pass something. + # # Passing a constant does not have an effect on performance and essentially acts as a bias term. + # SetField( + # output_field=FieldName.FEAT_DYNAMIC_REAL, value=[0.0] + # ) + # dynamic_feat_fields.append(FieldName.FEAT_DYNAMIC_REAL) + + # So far the decoder only uses dynamic real decoder_field = ( [FieldName.FEAT_DYNAMIC_REAL] if dynamic_feat_fields else [] ) chain.append( + # because of how the forking decoder works, every time step + # in context is used for splitting, which is why we use the TestSplitSampler ForkingSequenceSplitter( train_sampler=TestSplitSampler(), enc_len=self.context_length, diff --git a/src/gluonts/model/seq2seq/_forking_network.py b/src/gluonts/model/seq2seq/_forking_network.py index 0d5acc6bc7..c01ef4c3cf 100644 --- a/src/gluonts/model/seq2seq/_forking_network.py +++ b/src/gluonts/model/seq2seq/_forking_network.py @@ -86,41 +86,40 @@ def __init__( self.use_dynamic_real = use_dynamic_real self.use_static_cat = use_static_cat + # TODO: add this feature: + assert self.use_static_cat is False + def get_training_network(self) -> ForkingSeq2SeqNetworkBase: - if self.use_static_cat is False and self.use_dynamic_real is False: + if self.use_dynamic_real is False: return ForkingSeq2SeqTargetTrainingNetwork( encoder=self.encoder, enc2dec=self.enc2dec, decoder=self.decoder, quantile_output=self.quantile_output, ) - elif self.use_static_cat is False and self.use_dynamic_real: + else: return ForkingSeq2SeqTrainingNetwork( encoder=self.encoder, enc2dec=self.enc2dec, decoder=self.decoder, quantile_output=self.quantile_output, ) - else: - raise NotImplementedError def get_prediction_network(self) -> ForkingSeq2SeqNetworkBase: - if not self.use_static_cat and not self.use_dynamic_real: + if self.use_dynamic_real is False: return ForkingSeq2SeqTargetPredictionNetwork( encoder=self.encoder, enc2dec=self.enc2dec, decoder=self.decoder, quantile_output=self.quantile_output, ) - elif self.use_static_cat is False and self.use_dynamic_real: + else: return ForkingSeq2SeqPredictionNetwork( encoder=self.encoder, enc2dec=self.enc2dec, decoder=self.decoder, quantile_output=self.quantile_output, ) - else: - raise NotImplementedError # TODO: figure out whether we need 2 classes each, in fact we would need 4 each, diff --git a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py index 67246f7f48..96cd710791 100644 --- a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py +++ b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py @@ -91,7 +91,7 @@ def __init__( channels_seq=channels_seq, use_residual=use_residual, use_dynamic_feat=use_dynamic_feat, - use_static_feat=use_feat_static_cat, + # use_static_feat=use_feat_static_cat, prefix="encoder_", ) @@ -123,8 +123,8 @@ def derive_auto_fields(cls, train_iter): return { "use_feat_dynamic_real": stats.num_feat_dynamic_real > 0, - "use_feat_static_cat": bool(stats.feat_static_cat), - "cardinality": [len(cats) for cats in stats.feat_static_cat], + # "use_feat_static_cat": bool(stats.feat_static_cat), + # "cardinality": [len(cats) for cats in stats.feat_static_cat], } diff --git a/test/model/seq2seq/test_model.py b/test/model/seq2seq/test_model.py index db78e1eddf..4502cf5969 100644 --- a/test/model/seq2seq/test_model.py +++ b/test/model/seq2seq/test_model.py @@ -16,8 +16,8 @@ from gluonts.model.seq2seq import ( MQCNNEstimator, MQRNNEstimator, - Seq2SeqEstimator, ) +from gluonts.testutil.dummy_datasets import make_dummy_datasets_with_features @pytest.fixture() @@ -53,6 +53,38 @@ def test_accuracy( accuracy_test(Estimator, hyperparameters, accuracy=0.25) +@pytest.mark.parametrize("use_feat_dynamic_real", [True, False]) +@pytest.mark.parametrize("add_time_feature", [True, False]) +@pytest.mark.parametrize("add_age_feature", [True, False]) +def test_mqcnn_covariate_smoke_test( + use_feat_dynamic_real, add_time_feature, add_age_feature +): + hps = { + "seed": 42, + "freq": "D", + "prediction_length": 3, + "quantiles": [0.5, 0.1], + "epochs": 3, + "num_batches_per_epoch": 3, + "use_feat_dynamic_real": use_feat_dynamic_real, + "add_time_feature": add_time_feature, + "add_age_feature": add_age_feature, + } + + dataset_train, dataset_test = make_dummy_datasets_with_features( + cardinality=[3, 10, 42], + num_feat_dynamic_real=3, + freq=hps["freq"], + prediction_length=hps["prediction_length"], + ) + + estimator = MQCNNEstimator.from_hyperparameters(**hps) + + predictor = estimator.train(dataset_train) + forecasts = list(predictor.predict(dataset_test)) + assert len(forecasts) == len(dataset_test) + + def test_repr(Estimator, repr_test, hyperparameters): repr_test(Estimator, hyperparameters) From 8ba66e0e2c016d21f5defbac270583f094e83c4c Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Fri, 17 Apr 2020 23:25:45 +0200 Subject: [PATCH 18/44] Major refactoring that allows for disabling inputs at will. All tests pass. --- src/gluonts/block/decoder.py | 4 +- src/gluonts/block/encoder.py | 117 +++--------- src/gluonts/dataset/field_names.py | 2 + .../model/seq2seq/_forking_estimator.py | 108 ++++++----- src/gluonts/model/seq2seq/_forking_network.py | 168 +++--------------- .../model/seq2seq/_mq_dnn_estimator.py | 12 +- .../model/seq2seq/_seq2seq_estimator.py | 6 +- src/gluonts/model/seq2seq/_transform.py | 4 + src/gluonts/transform/field.py | 10 +- test/model/seq2seq/test_model.py | 1 + 10 files changed, 133 insertions(+), 299 deletions(-) diff --git a/src/gluonts/block/decoder.py b/src/gluonts/block/decoder.py index 69eeeaaff0..deabc87175 100644 --- a/src/gluonts/block/decoder.py +++ b/src/gluonts/block/decoder.py @@ -52,7 +52,7 @@ def hybrid_forward( pass -# TODO: add support for static variables +# TODO: add support for static variables at some point class ForkingMLPDecoder(Seq2SeqDecoder): """ Multilayer perceptron decoder for sequence-to-sequence models. @@ -105,7 +105,7 @@ def __init__( ) self.model.add(layer) - # TODO: add support for static input + # TODO: add support for static input at some point def hybrid_forward( self, F, dynamic_input: Tensor, static_input: Tensor = None ) -> Tensor: diff --git a/src/gluonts/block/encoder.py b/src/gluonts/block/encoder.py index 77d7ea4f68..f3f7f23ce2 100644 --- a/src/gluonts/block/encoder.py +++ b/src/gluonts/block/encoder.py @@ -236,6 +236,8 @@ def hybrid_forward( else: inputs = target + print("Been here done that.") + # NTC -> NCT (or NCW) ct = inputs.swapaxes(1, 2) ct = self.cnn(ct) @@ -248,12 +250,15 @@ def hybrid_forward( # return the last state as the static code static_code = F.slice_axis(ct, axis=1, begin=-1, end=None) static_code = F.squeeze(static_code, axis=1) + + print("Been here done that. 2.") + return static_code, ct class RNNEncoder(Seq2SeqEncoder): """ - Defines an RNN as the encoder. + Defines RNN encoder that uses covariates and target as input to the RNN if desired. Parameters ---------- @@ -278,12 +283,20 @@ def __init__( hidden_size: int, num_layers: int, bidirectional: bool, + use_static_feat: bool = False, + use_dynamic_feat: bool = False, **kwargs, ) -> None: assert num_layers > 0, "`num_layers` value must be greater than zero" assert hidden_size > 0, "`hidden_size` value must be greater than zero" super().__init__(**kwargs) + self.mode = mode + self.hidden_size = hidden_size + self.num_layers = num_layers + self.bidirectional = bidirectional + self.use_static_feat = use_static_feat + self.use_dynamic_feat = use_dynamic_feat with self.name_scope(): self.rnn = RNN(mode, hidden_size, num_layers, bidirectional) @@ -324,7 +337,19 @@ def hybrid_forward( dynamic code, shape (batch_size, sequence_length, num_dynamic_features) """ - dynamic_code = self.rnn(target) + if self.use_dynamic_feat and self.use_static_feat: + inputs = self._assemble_inputs( + F, + target=target, + static_features=static_features, + dynamic_features=dynamic_features, + ) + elif self.use_dynamic_feat: + inputs = F.concat(target, dynamic_features, dim=2) # (N, T, C) + else: + inputs = target + + dynamic_code = self.rnn(inputs) static_code = F.slice_axis(dynamic_code, axis=1, begin=-1, end=None) return static_code, dynamic_code @@ -388,91 +413,3 @@ def hybrid_forward( static_code = self.model(inputs) dynamic_code = F.zeros_like(target).expand_dims(2) return static_code, dynamic_code - - -class RNNCovariateEncoder(Seq2SeqEncoder): - """ - Defines RNN encoder that uses covariates and target as input to the RNN. - - Parameters - ---------- - mode - type of the RNN. Can be either: rnn_relu (RNN with relu activation), - rnn_tanh, (RNN with tanh activation), lstm or gru. - - hidden_size - number of units per hidden layer. - - num_layers - number of hidden layers. - - bidirectional - toggle use of bi-directional RNN as encoder. - """ - - @validated() - def __init__( - self, - mode: str, - hidden_size: int, - num_layers: int, - bidirectional: bool, - **kwargs, - ) -> None: - - assert num_layers > 0, "`num_layers` value must be greater than zero" - assert hidden_size > 0, "`hidden_size` value must be greater than zero" - - super().__init__(**kwargs) - - with self.name_scope(): - self.rnn = RNN(mode, hidden_size, num_layers, bidirectional) - - def hybrid_forward( - self, - F, - target: Tensor, - static_features: Tensor, - dynamic_features: Tensor, - ) -> Tuple[Tensor, Tensor]: - """ - Parameters - ---------- - F - A module that can either refer to the Symbol API or the NDArray - API in MXNet. - - target - target time series, - shape (batch_size, sequence_length, 1) - - static_features - static features, - shape (batch_size, num_static_features) - - dynamic_features - dynamic_features, - shape (batch_size, sequence_length, num_dynamic_features) - - Returns - ------- - Tensor - static code, - shape (batch_size, num_static_features) - - Tensor - dynamic code, - shape (batch_size, sequence_length, num_dynamic_features) - """ - inputs = self._assemble_inputs( - F, target, static_features, dynamic_features - ) - dynamic_code = self.rnn(inputs) - - # using the last state as the static code, - # but not working as well as the concat of all the previous states - static_code = F.squeeze( - F.slice_axis(dynamic_code, axis=1, begin=-1, end=None), axis=1 - ) - - return static_code, dynamic_code diff --git a/src/gluonts/dataset/field_names.py b/src/gluonts/dataset/field_names.py index b7419de9dd..342c397a17 100644 --- a/src/gluonts/dataset/field_names.py +++ b/src/gluonts/dataset/field_names.py @@ -28,6 +28,8 @@ class FieldName: FEAT_DYNAMIC_CAT = "feat_dynamic_cat" FEAT_DYNAMIC_REAL = "feat_dynamic_real" + # TODO: maybe add FEAT_DYNAMIC = "feat_dynamic" + FEAT_TIME = "time_feat" FEAT_CONST = "feat_dynamic_const" FEAT_AGE = "feat_dynamic_age" diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index 2b086a1acd..9cad2d139f 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -27,6 +27,7 @@ from gluonts.model.forecast_generator import QuantileForecastGenerator from gluonts.support.util import copy_parameters from gluonts.trainer import Trainer +from gluonts.time_feature import time_features_from_frequency_str from gluonts.transform import ( AddAgeFeature, AddTimeFeatures, @@ -35,15 +36,15 @@ Transformation, VstackFeatures, RenameFields, - SetField, + AddConstFeature, RemoveFields, ) # Relative imports -from gluonts.time_feature import time_features_from_frequency_str from ._forking_network import ( - ForkingSeq2SeqNetwork, ForkingSeq2SeqNetworkBase, + ForkingSeq2SeqTrainingNetwork, + ForkingSeq2SeqPredictionNetwork, ) from ._transform import ForkingSequenceSplitter @@ -97,7 +98,7 @@ def __init__( quantile_output: QuantileOutput, freq: str, prediction_length: int, - use_dynamic_feat: bool = False, + use_feat_dynamic_real: bool = False, add_time_feature: bool = False, add_age_feature: bool = False, context_length: Optional[int] = None, @@ -124,17 +125,21 @@ def __init__( self.encoder = encoder self.decoder = decoder self.quantile_output = quantile_output - self.prediction_length = prediction_length self.freq = freq - self.use_dynamic_feat = use_dynamic_feat + self.prediction_length = prediction_length self.context_length = ( - context_length if context_length is not None else prediction_length + context_length + if context_length is not None + else self.prediction_length ) + self.use_feat_dynamic_real = use_feat_dynamic_real self.add_time_feature = add_time_feature self.add_age_feature = add_age_feature + self.use_dynamic_feat = ( + use_feat_dynamic_real or add_age_feature or add_time_feature + ) # self.use_feat_static_cat = use_feat_static_cat - # self.use_feat_dynamic_real = use_feat_dynamic_real # self.cardinality = ( # cardinality if cardinality and use_feat_static_cat else [1] # ) @@ -147,17 +152,14 @@ def __init__( # TODO: refactor this variable name: dynamic_network, in fact it # is not even necessary as is, because this is how use_dynamic_feat was # set in MQCNNEstimator and otherwise its not used, i.e. False - # is target only network or not? - self.use_dynamic_real = ( - use_dynamic_feat or add_time_feature or add_age_feature - ) - - print(f"use_dynamic_network: {self.use_dynamic_real}") + # # is target only network or not? + # self.use_dynamic_real = ( + # use_dynamic_feat or add_time_feature or add_age_feature or True # TODO: fix this + # ) + # + # print(f"use_dynamic_network: {self.use_dynamic_real}") def create_transformation(self) -> Transformation: - # remove_field_names = [FieldName.FEAT_DYNAMIC_CAT] - # if not self.use_feat_static_real: - # remove_field_names.append(FieldName.FEAT_STATIC_REAL) # if not self.use_feat_dynamic_real: # remove_field_names.append(FieldName.FEAT_DYNAMIC_REAL) @@ -186,9 +188,31 @@ def create_transformation(self) -> Transformation: ) dynamic_feat_fields.append(FieldName.FEAT_AGE) - if self.use_dynamic_feat: + # TODO: there may have been a bug here + if self.use_feat_dynamic_real: + print("NO IM HERE") dynamic_feat_fields.append(FieldName.FEAT_DYNAMIC_REAL) + else: + print("IM HERE") + chain.append( + RemoveFields(field_names=[FieldName.FEAT_DYNAMIC_REAL]) + ) + + # we need to make sure that there is always some dynamic input + # we will however disregard it in the hybrid forward + if len(dynamic_feat_fields) == 0: + chain.append( + AddConstFeature( + target_field=FieldName.TARGET, + output_field=FieldName.FEAT_CONST, + pred_length=self.prediction_length, + ), + ) + dynamic_feat_fields.append(FieldName.FEAT_CONST) + # now we map all the dynamic input onto FieldName.FEAT_DYNAMIC_REAL + # TODO: change the field from FieldName.FEAT_DYNAMIC_REAL to FieldName.FEAT_TIME for consistency with deepAR + # or to FieldName.FEAT_DYNAMIC, which would have to be added if len(dynamic_feat_fields) > 1: chain.append( VstackFeatures( @@ -200,35 +224,17 @@ def create_transformation(self) -> Transformation: len(dynamic_feat_fields) == 1 and FieldName.FEAT_DYNAMIC_REAL not in dynamic_feat_fields ): + print("ONLY HAVE DYNAMIC REAL") chain.append( RenameFields( {dynamic_feat_fields[0]: FieldName.FEAT_DYNAMIC_REAL} ) ) - - # TODO: current problem: cannot have no input, if some input provided, because the decoder will not - # accept input, however, the batches contain input, and python complains that - # it cannot map something to nothing? - - # if dynamic_feat_fields: - # chain.append( - # VstackFeatures( - # output_field=FieldName.FEAT_DYNAMIC_REAL, - # input_fields=dynamic_feat_fields, - # ) - # ) - # else: - # # Unfortunately we always need to pass something. - # # Passing a constant does not have an effect on performance and essentially acts as a bias term. - # SetField( - # output_field=FieldName.FEAT_DYNAMIC_REAL, value=[0.0] - # ) - # dynamic_feat_fields.append(FieldName.FEAT_DYNAMIC_REAL) - - # So far the decoder only uses dynamic real - decoder_field = ( - [FieldName.FEAT_DYNAMIC_REAL] if dynamic_feat_fields else [] - ) + else: + print( + "IM NAUGHTY?: ", + FieldName.FEAT_DYNAMIC_REAL in dynamic_feat_fields, + ) chain.append( # because of how the forking decoder works, every time step @@ -237,20 +243,22 @@ def create_transformation(self) -> Transformation: train_sampler=TestSplitSampler(), enc_len=self.context_length, dec_len=self.prediction_length, - encoder_series_fields=decoder_field, + encoder_series_fields=[ + FieldName.FEAT_DYNAMIC_REAL + ], # TODO: later add categorical too ), ) return Chain(chain) def create_training_network(self) -> ForkingSeq2SeqNetworkBase: - return ForkingSeq2SeqNetwork( + return ForkingSeq2SeqTrainingNetwork( encoder=self.encoder, enc2dec=PassThroughEnc2Dec(), decoder=self.decoder, quantile_output=self.quantile_output, - use_dynamic_real=self.use_dynamic_real, - ).get_training_network() + use_dynamic_feat=self.use_dynamic_feat, + ) def create_predictor( self, @@ -263,13 +271,15 @@ def create_predictor( for quantile in self.quantile_output.quantiles ] - prediction_network = ForkingSeq2SeqNetwork( + print("TOTALLY FINE THUS FAR P1") + + prediction_network = ForkingSeq2SeqPredictionNetwork( encoder=trained_network.encoder, enc2dec=trained_network.enc2dec, decoder=trained_network.decoder, quantile_output=trained_network.quantile_output, - use_dynamic_real=self.use_dynamic_real, - ).get_prediction_network() + use_dynamic_feat=trained_network.use_dynamic_feat, + ) copy_parameters(trained_network, prediction_network) diff --git a/src/gluonts/model/seq2seq/_forking_network.py b/src/gluonts/model/seq2seq/_forking_network.py index c01ef4c3cf..f8eb188c7d 100644 --- a/src/gluonts/model/seq2seq/_forking_network.py +++ b/src/gluonts/model/seq2seq/_forking_network.py @@ -50,6 +50,8 @@ def __init__( enc2dec: Seq2SeqEnc2Dec, decoder: Seq2SeqDecoder, quantile_output: QuantileOutput, + use_dynamic_feat: bool, + # use_static_feat: bool, **kwargs, ) -> None: super().__init__(**kwargs) @@ -58,70 +60,14 @@ def __init__( self.enc2dec = enc2dec self.decoder = decoder self.quantile_output = quantile_output + self.use_dynamic_feat = use_dynamic_feat + # self.use_static_feat = use_static_feat with self.name_scope(): self.quantile_proj = quantile_output.get_quantile_proj() self.loss = quantile_output.get_loss() -# TODO: THIS SHOULD NOT EXIST, the if else logic should be handled in -# the _forking_estimator.py, and possible assertions too -class ForkingSeq2SeqNetwork: - @validated() - def __init__( - self, - encoder: Seq2SeqEncoder, - enc2dec: Seq2SeqEnc2Dec, - decoder: Seq2SeqDecoder, - quantile_output: QuantileOutput, - use_dynamic_real: bool = False, - use_static_cat: bool = False, - **kwargs, - ) -> None: - self.encoder = encoder - self.enc2dec = enc2dec - self.decoder = decoder - self.quantile_output = quantile_output - - self.use_dynamic_real = use_dynamic_real - self.use_static_cat = use_static_cat - - # TODO: add this feature: - assert self.use_static_cat is False - - def get_training_network(self) -> ForkingSeq2SeqNetworkBase: - if self.use_dynamic_real is False: - return ForkingSeq2SeqTargetTrainingNetwork( - encoder=self.encoder, - enc2dec=self.enc2dec, - decoder=self.decoder, - quantile_output=self.quantile_output, - ) - else: - return ForkingSeq2SeqTrainingNetwork( - encoder=self.encoder, - enc2dec=self.enc2dec, - decoder=self.decoder, - quantile_output=self.quantile_output, - ) - - def get_prediction_network(self) -> ForkingSeq2SeqNetworkBase: - if self.use_dynamic_real is False: - return ForkingSeq2SeqTargetPredictionNetwork( - encoder=self.encoder, - enc2dec=self.enc2dec, - decoder=self.decoder, - quantile_output=self.quantile_output, - ) - else: - return ForkingSeq2SeqPredictionNetwork( - encoder=self.encoder, - enc2dec=self.enc2dec, - decoder=self.decoder, - quantile_output=self.quantile_output, - ) - - # TODO: figure out whether we need 2 classes each, in fact we would need 4 each, # if adding categorical with this technique, does not seem reasonable class ForkingSeq2SeqTrainingNetwork(ForkingSeq2SeqNetworkBase): @@ -148,10 +94,12 @@ def hybrid_forward( loss with shape (FIXME, FIXME) """ - # FIXME: can we factor out a common prefix in the base network? + print("TOTALLY FINE SO FAR") + feat_static_real = F.zeros(shape=(1,)) - # TODO: THIS IS OVERWRITING THE ARGUMENT?!?! (REMOVING IT makes add time and age feature work): - # past_feat_dynamic_real = F.zeros(shape=(1,)) + # TODO: Required to be commented out for shape inference... + # if not self.use_dynamic_feat: + # past_feat_dynamic_real = F.zeros(shape=(1,)) future_feat_dynamic_real = F.zeros(shape=(1,)) # arguments: target, static_features, dynamic_features @@ -159,6 +107,8 @@ def hybrid_forward( past_target, feat_static_real, past_feat_dynamic_real ) + print("TOTALLY FINE SO FAR 2") + # arguments: encoder_output_static, encoder_output_dynamic, future_features # TODO: figure out how future_features is supposed to be used: since no distinction # between dynamic and static anymore (shape is (N, T, C) suggesting dynamic feature) @@ -169,7 +119,11 @@ def hybrid_forward( dec_output = self.decoder(dec_input_dynamic, dec_input_static) dec_dist_output = self.quantile_proj(dec_output) + print("TOTALLY FINE SO FAR 3") + loss = self.loss(future_target, dec_dist_output) + + print("TOTALLY FINE SO FAR 4") return loss.mean(axis=1) @@ -191,105 +145,31 @@ def hybrid_forward( prediction tensor with shape (FIXME, FIXME) """ - # FIXME: can we factor out a common prefix in the base network? + print("TOTALLY FINE SO FAR 5") feat_static_real = F.zeros(shape=(1,)) + # TODO: Required to be commented out for shape inference... + # if not self.use_dynamic_feat: + # past_feat_dynamic_real = F.zeros(shape=(1,)) future_feat_dynamic_real = F.zeros(shape=(1,)) - enc_output_static, enc_output_dynamic = self.encoder( - past_target, feat_static_real, past_feat_dynamic_real - ) - - enc_output_static = ( - F.zeros(shape=(1,)) - if enc_output_static is None - else enc_output_static - ) - - dec_inp_static, dec_inp_dynamic, _ = self.enc2dec( - enc_output_static, enc_output_dynamic, future_feat_dynamic_real, - ) - - dec_output = self.decoder(dec_inp_dynamic, dec_inp_static) - fcst_output = F.slice_axis(dec_output, axis=1, begin=-1, end=None) - fcst_output = F.squeeze(fcst_output, axis=1) - - predictions = self.quantile_proj(fcst_output).swapaxes(2, 1) - return predictions - - -class ForkingSeq2SeqTargetTrainingNetwork(ForkingSeq2SeqNetworkBase): - # noinspection PyMethodOverriding - def hybrid_forward( - self, F, past_target: Tensor, future_target: Tensor - ) -> Tensor: - """ - Parameters - ---------- - F: mx.symbol or mx.ndarray - Gluon function space - past_target: Tensor - FIXME - future_target: Tensor - shape (num_ts, encoder_length, 1) FIXME - - Returns - ------- - loss with shape (FIXME, FIXME) - """ - - feat_static_real = F.zeros(shape=(1,)) - past_feat_dynamic_real = F.zeros(shape=(1,)) - future_feat_dynamic_real = F.zeros(shape=(1,)) - - enc_output_static, enc_output_dynamic = self.encoder( - past_target, feat_static_real, past_feat_dynamic_real - ) - - dec_input_static, dec_input_dynamic, _ = self.enc2dec( - enc_output_static, enc_output_dynamic, future_feat_dynamic_real - ) - - dec_output = self.decoder(dec_input_dynamic, dec_input_static) - dec_dist_output = self.quantile_proj(dec_output) - - loss = self.loss(future_target, dec_dist_output) - return loss.mean(axis=1) - - -class ForkingSeq2SeqTargetPredictionNetwork(ForkingSeq2SeqNetworkBase): - # noinspection PyMethodOverriding - def hybrid_forward(self, F, past_target: Tensor) -> Tensor: - """ - Parameters - ---------- - F: mx.symbol or mx.ndarray - Gluon function space - past_target: Tensor - FIXME - - Returns - ------- - prediction tensor with shape (FIXME, FIXME) - """ - - # FIXME: can we factor out a common prefix in the base network? - feat_static_real = F.zeros(shape=(1,)) - past_feat_dynamic_real = F.zeros(shape=(1,)) - future_feat_dynamic_real = F.zeros(shape=(1,)) + print("TOTALLY FINE SO FAR 6") enc_output_static, enc_output_dynamic = self.encoder( past_target, feat_static_real, past_feat_dynamic_real ) + # TODO: figure out WHY IS THIS NEEDED HERE? enc_output_static = ( F.zeros(shape=(1,)) if enc_output_static is None else enc_output_static ) + print("TOTALLY FINE SO FAR 7") + dec_inp_static, dec_inp_dynamic, _ = self.enc2dec( - enc_output_static, enc_output_dynamic, future_feat_dynamic_real + enc_output_static, enc_output_dynamic, future_feat_dynamic_real, ) dec_output = self.decoder(dec_inp_dynamic, dec_inp_static) diff --git a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py index 96cd710791..34f8136352 100644 --- a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py +++ b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py @@ -81,17 +81,13 @@ def __init__( [d > 0 for d in decoder_mlp_dim_seq] ), "Elements of `mlp_hidden_dimension_seq` should be > 0" - use_dynamic_feat = ( - use_feat_dynamic_real or add_age_feature or add_time_feature - ) - encoder = HierarchicalCausalConv1DEncoder( dilation_seq=dilation_seq, kernel_size_seq=kernel_size_seq, channels_seq=channels_seq, use_residual=use_residual, - use_dynamic_feat=use_dynamic_feat, - # use_static_feat=use_feat_static_cat, + use_static_feat=False, + use_dynamic_feat=True, prefix="encoder_", ) @@ -111,7 +107,7 @@ def __init__( freq=freq, prediction_length=prediction_length, context_length=context_length, - use_dynamic_feat=use_dynamic_feat, + use_feat_dynamic_real=use_feat_dynamic_real, add_time_feature=add_time_feature, add_age_feature=add_age_feature, trainer=trainer, @@ -159,6 +155,8 @@ def __init__( num_layers=1, bidirectional=True, prefix="encoder_", + use_static_feat=False, + use_dynamic_feat=True, ) decoder = ForkingMLPDecoder( diff --git a/src/gluonts/model/seq2seq/_seq2seq_estimator.py b/src/gluonts/model/seq2seq/_seq2seq_estimator.py index 2b3211f161..c50ae96bb4 100644 --- a/src/gluonts/model/seq2seq/_seq2seq_estimator.py +++ b/src/gluonts/model/seq2seq/_seq2seq_estimator.py @@ -23,9 +23,9 @@ from gluonts.block.enc2dec import PassThroughEnc2Dec from gluonts.block.encoder import ( HierarchicalCausalConv1DEncoder, - RNNCovariateEncoder, MLPEncoder, Seq2SeqEncoder, + RNNEncoder, ) from gluonts.block.feature import FeatureEmbedder from gluonts.block.quantile_output import QuantileOutput @@ -238,11 +238,13 @@ def __init__( trainer: Trainer = Trainer(), num_parallel_samples: int = 100, ) -> None: - encoder = RNNCovariateEncoder( + encoder = RNNEncoder( mode=encoder_rnn_model, hidden_size=encoder_rnn_num_hidden, num_layers=encoder_rnn_layer, bidirectional=encoder_rnn_bidirectional, + use_static_feat=True, + use_dynamic_feat=True, ) super(RNN2QRForecaster, self).__init__( freq=freq, diff --git a/src/gluonts/model/seq2seq/_transform.py b/src/gluonts/model/seq2seq/_transform.py index 1943e1d0c4..bab1e878d4 100644 --- a/src/gluonts/model/seq2seq/_transform.py +++ b/src/gluonts/model/seq2seq/_transform.py @@ -66,9 +66,11 @@ def __init__( self.forecast_start_out = forecast_start_output_field self.decoder_series_fields = decoder_series_fields + # TODO: make use of these def _past(self, col_name): return f"past_{col_name}" + # TODO: make use of these def _future(self, col_name): return f"future_{col_name}" @@ -140,11 +142,13 @@ def flatmap_transform( out[self._future(ts_field)] = forking_dec_field + # So far pad indicator not in use pad_indicator = np.zeros(self.enc_len) pad_length = max(0, self.enc_len - sampling_idx) pad_indicator[:pad_length] = True out[f"past_{self.is_pad_out}"] = pad_indicator + # So far pad forecast_start_out not in use out[self.forecast_start_out] = shift_timestamp( out[self.start_in], sampling_idx ) diff --git a/src/gluonts/transform/field.py b/src/gluonts/transform/field.py index 860204c00b..2e39320c22 100644 --- a/src/gluonts/transform/field.py +++ b/src/gluonts/transform/field.py @@ -39,11 +39,11 @@ def __init__(self, mapping: Dict[str, str]) -> None: def transform(self, data: DataEntry): for key, new_key in self.mapping.items(): - if key not in data: - continue - assert new_key not in data - data[new_key] = data[key] - del data[key] + if key in data: + # no implicit overriding + assert new_key not in data + data[new_key] = data[key] + del data[key] return data diff --git a/test/model/seq2seq/test_model.py b/test/model/seq2seq/test_model.py index 4502cf5969..385020c41f 100644 --- a/test/model/seq2seq/test_model.py +++ b/test/model/seq2seq/test_model.py @@ -69,6 +69,7 @@ def test_mqcnn_covariate_smoke_test( "use_feat_dynamic_real": use_feat_dynamic_real, "add_time_feature": add_time_feature, "add_age_feature": add_age_feature, + "hybridize": True, } dataset_train, dataset_test = make_dummy_datasets_with_features( From ae683df9b2b885c3800a9c32f64d35ac5dde5fad Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Fri, 17 Apr 2020 23:27:42 +0200 Subject: [PATCH 19/44] Removed print --- src/gluonts/model/seq2seq/_forking_estimator.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index 9cad2d139f..5feb9fd190 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -230,11 +230,6 @@ def create_transformation(self) -> Transformation: {dynamic_feat_fields[0]: FieldName.FEAT_DYNAMIC_REAL} ) ) - else: - print( - "IM NAUGHTY?: ", - FieldName.FEAT_DYNAMIC_REAL in dynamic_feat_fields, - ) chain.append( # because of how the forking decoder works, every time step From bd733d84e1d1f428472843d57fd204da107b7e08 Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Sat, 18 Apr 2020 00:15:30 +0200 Subject: [PATCH 20/44] Ensuring backward compatibility, some refactoring. --- src/gluonts/block/encoder.py | 65 +++++++-------- .../model/seq2seq/_forking_estimator.py | 19 ----- src/gluonts/model/seq2seq/_forking_network.py | 81 +++++++------------ src/gluonts/model/seq2seq/_transform.py | 4 +- 4 files changed, 57 insertions(+), 112 deletions(-) diff --git a/src/gluonts/block/encoder.py b/src/gluonts/block/encoder.py index f3f7f23ce2..a1ed3dd484 100644 --- a/src/gluonts/block/encoder.py +++ b/src/gluonts/block/encoder.py @@ -43,30 +43,24 @@ def hybrid_forward( """ Parameters ---------- - F: A module that can either refer to the Symbol API or the NDArray API in MXNet. - target target time series, shape (batch_size, sequence_length) - static_features static features, shape (batch_size, num_static_features) - dynamic_features dynamic_features, shape (batch_size, sequence_length, num_dynamic_features) - Returns ------- Tensor static code, shape (batch_size, num_static_features) - Tensor dynamic code, shape (batch_size, sequence_length, num_dynamic_features) @@ -89,15 +83,12 @@ def _assemble_inputs( F A module that can either refer to the Symbol API or the NDArray API in MXNet. - target target time series, shape (batch_size, sequence_length, 1) - static_features static features, shape (batch_size, num_static_features) - dynamic_features dynamic_features, shape (batch_size, sequence_length, num_dynamic_features) @@ -108,7 +99,6 @@ def _assemble_inputs( combined features, shape (batch_size, sequence_length, num_static_features + num_dynamic_features + 1) - """ helper_ones = F.ones_like(target) # Ones of (N, T, 1) @@ -133,18 +123,16 @@ class HierarchicalCausalConv1DEncoder(Seq2SeqEncoder): ---------- dilation_seq dilation for each convolution in the stack. - kernel_size_seq kernel size for each convolution in the stack. - channels_seq number of channels for each convolution in the stack. - use_residual flag to toggle using residual connections. - - use_covariates - flag to toggle whether to use coveriates as input to the encoder + use_static_feat + flag to toggle whether to use use_static_feat as input to the encoder + use_dynamic_feat + flag to toggle whether to use use_static_feat as input to the encoder """ @validated() @@ -196,29 +184,23 @@ def hybrid_forward( """ Parameters ---------- - F A module that can either refer to the Symbol API or the NDArray API in MXNet. - target target time series, shape (batch_size, sequence_length, 1) - static_features static features, shape (batch_size, num_static_features) - dynamic_features dynamic_features, shape (batch_size, sequence_length, num_dynamic_features) - Returns ------- Tensor static code, shape (batch_size, num_static_features) - Tensor dynamic code, shape (batch_size, sequence_length, num_dynamic_features) @@ -236,8 +218,6 @@ def hybrid_forward( else: inputs = target - print("Been here done that.") - # NTC -> NCT (or NCW) ct = inputs.swapaxes(1, 2) ct = self.cnn(ct) @@ -251,8 +231,6 @@ def hybrid_forward( static_code = F.slice_axis(ct, axis=1, begin=-1, end=None) static_code = F.squeeze(static_code, axis=1) - print("Been here done that. 2.") - return static_code, ct @@ -265,15 +243,16 @@ class RNNEncoder(Seq2SeqEncoder): mode type of the RNN. Can be either: rnn_relu (RNN with relu activation), rnn_tanh, (RNN with tanh activation), lstm or gru. - hidden_size number of units per hidden layer. - num_layers number of hidden layers. - bidirectional toggle use of bi-directional RNN as encoder. + use_static_feat + flag to toggle whether to use use_static_feat as input to the encoder + use_dynamic_feat + flag to toggle whether to use use_static_feat as input to the encoder """ @validated() @@ -291,6 +270,7 @@ def __init__( assert hidden_size > 0, "`hidden_size` value must be greater than zero" super().__init__(**kwargs) + self.mode = mode self.hidden_size = hidden_size self.num_layers = num_layers @@ -314,15 +294,12 @@ def hybrid_forward( F A module that can either refer to the Symbol API or the NDArray API in MXNet. - target target time series, shape (batch_size, sequence_length, 1) - static_features static features, shape (batch_size, num_static_features) - dynamic_features dynamic_features, shape (batch_size, sequence_length, num_dynamic_features) @@ -332,7 +309,6 @@ def hybrid_forward( Tensor static code, shape (batch_size, num_static_features) - Tensor dynamic code, shape (batch_size, sequence_length, num_dynamic_features) @@ -383,15 +359,12 @@ def hybrid_forward( F A module that can either refer to the Symbol API or the NDArray API in MXNet. - target target time series, shape (batch_size, sequence_length) - static_features static features, shape (batch_size, num_static_features) - dynamic_features dynamic_features, shape (batch_size, sequence_length, num_dynamic_features) @@ -401,7 +374,6 @@ def hybrid_forward( Tensor static code, shape (batch_size, num_static_features) - Tensor dynamic code, shape (batch_size, sequence_length, num_dynamic_features) @@ -413,3 +385,22 @@ def hybrid_forward( static_code = self.model(inputs) dynamic_code = F.zeros_like(target).expand_dims(2) return static_code, dynamic_code + + +class RNNCovariateEncoder(RNNEncoder): + """ + Deprecated class only for compatibility; use RNNEncoder instead. + """ + + @validated() + def __init__( + self, + use_static_feat: bool = True, + use_dynamic_feat: bool = True, + **kwargs, + ) -> None: + super().__init__( + use_static_feat=use_static_feat, + use_dynamic_feat=use_dynamic_feat, + **kwargs, + ) diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index 5feb9fd190..11625e9d37 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -149,20 +149,7 @@ def __init__( # else [min(50, (cat + 1) // 2) for cat in self.cardinality] # ) - # TODO: refactor this variable name: dynamic_network, in fact it - # is not even necessary as is, because this is how use_dynamic_feat was - # set in MQCNNEstimator and otherwise its not used, i.e. False - # # is target only network or not? - # self.use_dynamic_real = ( - # use_dynamic_feat or add_time_feature or add_age_feature or True # TODO: fix this - # ) - # - # print(f"use_dynamic_network: {self.use_dynamic_real}") - def create_transformation(self) -> Transformation: - # if not self.use_feat_dynamic_real: - # remove_field_names.append(FieldName.FEAT_DYNAMIC_REAL) - chain = [] dynamic_feat_fields = [] @@ -188,12 +175,9 @@ def create_transformation(self) -> Transformation: ) dynamic_feat_fields.append(FieldName.FEAT_AGE) - # TODO: there may have been a bug here if self.use_feat_dynamic_real: - print("NO IM HERE") dynamic_feat_fields.append(FieldName.FEAT_DYNAMIC_REAL) else: - print("IM HERE") chain.append( RemoveFields(field_names=[FieldName.FEAT_DYNAMIC_REAL]) ) @@ -224,7 +208,6 @@ def create_transformation(self) -> Transformation: len(dynamic_feat_fields) == 1 and FieldName.FEAT_DYNAMIC_REAL not in dynamic_feat_fields ): - print("ONLY HAVE DYNAMIC REAL") chain.append( RenameFields( {dynamic_feat_fields[0]: FieldName.FEAT_DYNAMIC_REAL} @@ -266,8 +249,6 @@ def create_predictor( for quantile in self.quantile_output.quantiles ] - print("TOTALLY FINE THUS FAR P1") - prediction_network = ForkingSeq2SeqPredictionNetwork( encoder=trained_network.encoder, enc2dec=trained_network.enc2dec, diff --git a/src/gluonts/model/seq2seq/_forking_network.py b/src/gluonts/model/seq2seq/_forking_network.py index f8eb188c7d..95d1cba6df 100644 --- a/src/gluonts/model/seq2seq/_forking_network.py +++ b/src/gluonts/model/seq2seq/_forking_network.py @@ -67,6 +67,28 @@ def __init__( self.quantile_proj = quantile_output.get_quantile_proj() self.loss = quantile_output.get_loss() + # this method connects the sub-networks and returns the decoder output + def get_decoder_network_output( + self, F, past_target: Tensor, past_feat_dynamic_real: Tensor + ) -> Tensor: + feat_static_real = F.zeros(shape=(1,)) + future_feat_dynamic_real = F.zeros(shape=(1,)) + + # arguments: target, static_features, dynamic_features + enc_output_static, enc_output_dynamic = self.encoder( + past_target, feat_static_real, past_feat_dynamic_real + ) + + # arguments: encoder_output_static, encoder_output_dynamic, future_features + dec_input_static, dec_input_dynamic, _ = self.enc2dec( + enc_output_static, enc_output_dynamic, future_feat_dynamic_real + ) + + # arguments: dynamic_input, static_input + dec_output = self.decoder(dec_input_dynamic, dec_input_static) + + return dec_output + # TODO: figure out whether we need 2 classes each, in fact we would need 4 each, # if adding categorical with this technique, does not seem reasonable @@ -93,37 +115,13 @@ def hybrid_forward( ------- loss with shape (FIXME, FIXME) """ - - print("TOTALLY FINE SO FAR") - - feat_static_real = F.zeros(shape=(1,)) - # TODO: Required to be commented out for shape inference... - # if not self.use_dynamic_feat: - # past_feat_dynamic_real = F.zeros(shape=(1,)) - future_feat_dynamic_real = F.zeros(shape=(1,)) - - # arguments: target, static_features, dynamic_features - enc_output_static, enc_output_dynamic = self.encoder( - past_target, feat_static_real, past_feat_dynamic_real - ) - - print("TOTALLY FINE SO FAR 2") - - # arguments: encoder_output_static, encoder_output_dynamic, future_features - # TODO: figure out how future_features is supposed to be used: since no distinction - # between dynamic and static anymore (shape is (N, T, C) suggesting dynamic feature) - dec_input_static, dec_input_dynamic, _ = self.enc2dec( - enc_output_static, enc_output_dynamic, future_feat_dynamic_real + dec_output = self.get_decoder_network_output( + F, past_target, past_feat_dynamic_real ) - dec_output = self.decoder(dec_input_dynamic, dec_input_static) dec_dist_output = self.quantile_proj(dec_output) - - print("TOTALLY FINE SO FAR 3") - loss = self.loss(future_target, dec_dist_output) - print("TOTALLY FINE SO FAR 4") return loss.mean(axis=1) @@ -144,37 +142,12 @@ def hybrid_forward( ------- prediction tensor with shape (FIXME, FIXME) """ - - print("TOTALLY FINE SO FAR 5") - - feat_static_real = F.zeros(shape=(1,)) - # TODO: Required to be commented out for shape inference... - # if not self.use_dynamic_feat: - # past_feat_dynamic_real = F.zeros(shape=(1,)) - future_feat_dynamic_real = F.zeros(shape=(1,)) - - print("TOTALLY FINE SO FAR 6") - - enc_output_static, enc_output_dynamic = self.encoder( - past_target, feat_static_real, past_feat_dynamic_real - ) - - # TODO: figure out WHY IS THIS NEEDED HERE? - enc_output_static = ( - F.zeros(shape=(1,)) - if enc_output_static is None - else enc_output_static - ) - - print("TOTALLY FINE SO FAR 7") - - dec_inp_static, dec_inp_dynamic, _ = self.enc2dec( - enc_output_static, enc_output_dynamic, future_feat_dynamic_real, + dec_output = self.get_decoder_network_output( + F, past_target, past_feat_dynamic_real ) - dec_output = self.decoder(dec_inp_dynamic, dec_inp_static) fcst_output = F.slice_axis(dec_output, axis=1, begin=-1, end=None) fcst_output = F.squeeze(fcst_output, axis=1) - predictions = self.quantile_proj(fcst_output).swapaxes(2, 1) + return predictions diff --git a/src/gluonts/model/seq2seq/_transform.py b/src/gluonts/model/seq2seq/_transform.py index bab1e878d4..aede54faa5 100644 --- a/src/gluonts/model/seq2seq/_transform.py +++ b/src/gluonts/model/seq2seq/_transform.py @@ -121,7 +121,7 @@ def flatmap_transform( # if we have less than enc_len values, pad_left with 0 past_piece = pad_to_size(slice, self.enc_len) - out[f"past_{ts_field}"] = past_piece.transpose() + out[self._past(ts_field)] = past_piece.transpose() # in prediction mode, don't provide decode-values if not is_train and ts_field == self.target_in: @@ -146,7 +146,7 @@ def flatmap_transform( pad_indicator = np.zeros(self.enc_len) pad_length = max(0, self.enc_len - sampling_idx) pad_indicator[:pad_length] = True - out[f"past_{self.is_pad_out}"] = pad_indicator + out[self._past(self.is_pad_out)] = pad_indicator # So far pad forecast_start_out not in use out[self.forecast_start_out] = shift_timestamp( From b17c66309ffebc1b7a4b2180e8ea6561203173e7 Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Sat, 18 Apr 2020 00:55:55 +0200 Subject: [PATCH 21/44] Mainly argument refactoring, but also some legibility refactoring. --- src/gluonts/block/encoder.py | 4 +- src/gluonts/dataset/field_names.py | 2 +- .../model/seq2seq/_forking_estimator.py | 28 +++-- src/gluonts/model/seq2seq/_forking_network.py | 16 +-- .../model/seq2seq/_mq_dnn_estimator.py | 110 ++++++++++++------ 5 files changed, 94 insertions(+), 66 deletions(-) diff --git a/src/gluonts/block/encoder.py b/src/gluonts/block/encoder.py index a1ed3dd484..76e1cd7fa9 100644 --- a/src/gluonts/block/encoder.py +++ b/src/gluonts/block/encoder.py @@ -132,7 +132,7 @@ class HierarchicalCausalConv1DEncoder(Seq2SeqEncoder): use_static_feat flag to toggle whether to use use_static_feat as input to the encoder use_dynamic_feat - flag to toggle whether to use use_static_feat as input to the encoder + flag to toggle whether to use use_dynamic_feat as input to the encoder """ @validated() @@ -252,7 +252,7 @@ class RNNEncoder(Seq2SeqEncoder): use_static_feat flag to toggle whether to use use_static_feat as input to the encoder use_dynamic_feat - flag to toggle whether to use use_static_feat as input to the encoder + flag to toggle whether to use use_dynamic_feat as input to the encoder """ @validated() diff --git a/src/gluonts/dataset/field_names.py b/src/gluonts/dataset/field_names.py index 342c397a17..0e0a6ff7f8 100644 --- a/src/gluonts/dataset/field_names.py +++ b/src/gluonts/dataset/field_names.py @@ -28,7 +28,7 @@ class FieldName: FEAT_DYNAMIC_CAT = "feat_dynamic_cat" FEAT_DYNAMIC_REAL = "feat_dynamic_real" - # TODO: maybe add FEAT_DYNAMIC = "feat_dynamic" + FEAT_DYNAMIC = "feat_dynamic" FEAT_TIME = "time_feat" FEAT_CONST = "feat_dynamic_const" diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index 11625e9d37..3fb9a422e3 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -84,6 +84,13 @@ class ForkingSeq2SeqEstimator(GluonEstimator): frequency of the time series prediction_length length of the decoding sequence + use_feat_dynamic_real + Whether to use the ``feat_dynamic_real`` field from the data (default: False) + add_time_feature + Adds a set of time features. + add_age_feature + Adds an age feature. + The age feature starts with a small value at the start of the time series and grows over time. context_length length of the encoding sequence (prediction_length is used if None) trainer @@ -98,10 +105,10 @@ def __init__( quantile_output: QuantileOutput, freq: str, prediction_length: int, + context_length: Optional[int] = None, use_feat_dynamic_real: bool = False, add_time_feature: bool = False, add_age_feature: bool = False, - context_length: Optional[int] = None, trainer: Trainer = Trainer(), ) -> None: super().__init__(trainer=trainer) @@ -194,24 +201,17 @@ def create_transformation(self) -> Transformation: ) dynamic_feat_fields.append(FieldName.FEAT_CONST) - # now we map all the dynamic input onto FieldName.FEAT_DYNAMIC_REAL - # TODO: change the field from FieldName.FEAT_DYNAMIC_REAL to FieldName.FEAT_TIME for consistency with deepAR - # or to FieldName.FEAT_DYNAMIC, which would have to be added + # now we map all the dynamic input onto FieldName.FEAT_DYNAMIC if len(dynamic_feat_fields) > 1: chain.append( VstackFeatures( - output_field=FieldName.FEAT_DYNAMIC_REAL, + output_field=FieldName.FEAT_DYNAMIC, input_fields=dynamic_feat_fields, ) ) - elif ( - len(dynamic_feat_fields) == 1 - and FieldName.FEAT_DYNAMIC_REAL not in dynamic_feat_fields - ): + elif len(dynamic_feat_fields) == 1: chain.append( - RenameFields( - {dynamic_feat_fields[0]: FieldName.FEAT_DYNAMIC_REAL} - ) + RenameFields({dynamic_feat_fields[0]: FieldName.FEAT_DYNAMIC}) ) chain.append( @@ -222,7 +222,7 @@ def create_transformation(self) -> Transformation: enc_len=self.context_length, dec_len=self.prediction_length, encoder_series_fields=[ - FieldName.FEAT_DYNAMIC_REAL + FieldName.FEAT_DYNAMIC ], # TODO: later add categorical too ), ) @@ -235,7 +235,6 @@ def create_training_network(self) -> ForkingSeq2SeqNetworkBase: enc2dec=PassThroughEnc2Dec(), decoder=self.decoder, quantile_output=self.quantile_output, - use_dynamic_feat=self.use_dynamic_feat, ) def create_predictor( @@ -254,7 +253,6 @@ def create_predictor( enc2dec=trained_network.enc2dec, decoder=trained_network.decoder, quantile_output=trained_network.quantile_output, - use_dynamic_feat=trained_network.use_dynamic_feat, ) copy_parameters(trained_network, prediction_network) diff --git a/src/gluonts/model/seq2seq/_forking_network.py b/src/gluonts/model/seq2seq/_forking_network.py index 95d1cba6df..2d9f9f5721 100644 --- a/src/gluonts/model/seq2seq/_forking_network.py +++ b/src/gluonts/model/seq2seq/_forking_network.py @@ -50,8 +50,6 @@ def __init__( enc2dec: Seq2SeqEnc2Dec, decoder: Seq2SeqDecoder, quantile_output: QuantileOutput, - use_dynamic_feat: bool, - # use_static_feat: bool, **kwargs, ) -> None: super().__init__(**kwargs) @@ -60,8 +58,6 @@ def __init__( self.enc2dec = enc2dec self.decoder = decoder self.quantile_output = quantile_output - self.use_dynamic_feat = use_dynamic_feat - # self.use_static_feat = use_static_feat with self.name_scope(): self.quantile_proj = quantile_output.get_quantile_proj() @@ -69,14 +65,14 @@ def __init__( # this method connects the sub-networks and returns the decoder output def get_decoder_network_output( - self, F, past_target: Tensor, past_feat_dynamic_real: Tensor + self, F, past_target: Tensor, past_feat_dynamic: Tensor ) -> Tensor: feat_static_real = F.zeros(shape=(1,)) future_feat_dynamic_real = F.zeros(shape=(1,)) # arguments: target, static_features, dynamic_features enc_output_static, enc_output_dynamic = self.encoder( - past_target, feat_static_real, past_feat_dynamic_real + past_target, feat_static_real, past_feat_dynamic ) # arguments: encoder_output_static, encoder_output_dynamic, future_features @@ -98,7 +94,7 @@ def hybrid_forward( self, F, past_target: Tensor, - past_feat_dynamic_real: Tensor, + past_feat_dynamic: Tensor, future_target: Tensor, ) -> Tensor: """ @@ -116,7 +112,7 @@ def hybrid_forward( loss with shape (FIXME, FIXME) """ dec_output = self.get_decoder_network_output( - F, past_target, past_feat_dynamic_real + F, past_target, past_feat_dynamic ) dec_dist_output = self.quantile_proj(dec_output) @@ -128,7 +124,7 @@ def hybrid_forward( class ForkingSeq2SeqPredictionNetwork(ForkingSeq2SeqNetworkBase): # noinspection PyMethodOverriding def hybrid_forward( - self, F, past_target: Tensor, past_feat_dynamic_real: Tensor + self, F, past_target: Tensor, past_feat_dynamic: Tensor ) -> Tensor: """ Parameters @@ -143,7 +139,7 @@ def hybrid_forward( prediction tensor with shape (FIXME, FIXME) """ dec_output = self.get_decoder_network_output( - F, past_target, past_feat_dynamic_real + F, past_target, past_feat_dynamic ) fcst_output = F.slice_axis(dec_output, axis=1, begin=-1, end=None) diff --git a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py index 34f8136352..2857fed8e7 100644 --- a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py +++ b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py @@ -28,12 +28,6 @@ from gluonts.model.seq2seq._forking_estimator import ForkingSeq2SeqEstimator -# TODO: in general, it seems unnecessary to put the MQCNN and MQRNN into Seq2Seq since their commonality in code with -# the rest is just the abstract classes Seq2SeqDecoder and Se2SeqEncoder, -# and the Estimator is not based on Seq2SeqEstimator! - - -# TODO: integrate MQDNN, change arguments to non mutable class MQCNNEstimator(ForkingSeq2SeqEstimator): """ An :class:`MQDNNEstimator` with a Convolutional Neural Network (CNN) as an @@ -53,38 +47,69 @@ def __init__( add_time_feature: bool = False, add_age_feature: bool = False, seed: Optional[int] = None, - decoder_mlp_dim_seq: List[int] = [20], - channels_seq: List[int] = [30, 30, 30], - dilation_seq: List[int] = [1, 3, 9], - kernel_size_seq: List[int] = [3, 3, 3], + decoder_mlp_dim_seq: Optional[List[int]] = None, + channels_seq: Optional[List[int]] = None, + dilation_seq: Optional[List[int]] = None, + kernel_size_seq: Optional[List[int]] = None, use_residual: bool = True, - quantiles: List[float] = list( - [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] - ), + quantiles: Optional[List[float]] = None, trainer: Trainer = Trainer(), ) -> None: - if seed: - np.random.seed(seed) - mx.random.seed(seed) - - assert ( - len(channels_seq) == len(dilation_seq) == len(kernel_size_seq) - ), ( - f"mismatch CNN configurations: {len(channels_seq)} vs. " - f"{len(dilation_seq)} vs. {len(kernel_size_seq)}" - ) assert ( prediction_length > 0 ), f"Invalid prediction length: {prediction_length}." - assert all( - [d > 0 for d in decoder_mlp_dim_seq] + assert decoder_mlp_dim_seq is None or all( + d > 0 for d in decoder_mlp_dim_seq ), "Elements of `mlp_hidden_dimension_seq` should be > 0" + assert channels_seq is None or all( + [d > 0 for d in channels_seq] + ), "Elements of `channels_seq` should be > 0" + assert dilation_seq is None or all( + [d > 0 for d in dilation_seq] + ), "Elements of `dilation_seq` should be > 0" + assert kernel_size_seq is None or all( + [d > 0 for d in kernel_size_seq] + ), "Elements of `kernel_size_seq` should be > 0" + assert quantiles is None or all( + [0 <= d <= 1 for d in quantiles] + ), "Elements of `quantiles` should be >= 0 and <= 1" + + self.decoder_mlp_dim_seq = ( + decoder_mlp_dim_seq if decoder_mlp_dim_seq is not None else [20] + ) + self.channels_seq = ( + channels_seq if channels_seq is not None else [30, 30, 30] + ) + self.dilation_seq = ( + dilation_seq if dilation_seq is not None else [1, 3, 9] + ) + self.kernel_size_seq = ( + kernel_size_seq if kernel_size_seq is not None else [3, 3, 3] + ) + self.quantiles = ( + quantiles + if quantiles is not None + else [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] + ) + + assert ( + len(self.channels_seq) + == len(self.dilation_seq) + == len(self.kernel_size_seq) + ), ( + f"mismatch CNN configurations: {len(self.channels_seq)} vs. " + f"{len(self.dilation_seq)} vs. {len(self.kernel_size_seq)}" + ) + + if seed: + np.random.seed(seed) + mx.random.seed(seed) encoder = HierarchicalCausalConv1DEncoder( - dilation_seq=dilation_seq, - kernel_size_seq=kernel_size_seq, - channels_seq=channels_seq, + dilation_seq=self.dilation_seq, + kernel_size_seq=self.kernel_size_seq, + channels_seq=self.channels_seq, use_residual=use_residual, use_static_feat=False, use_dynamic_feat=True, @@ -93,12 +118,12 @@ def __init__( decoder = ForkingMLPDecoder( dec_len=prediction_length, - final_dim=decoder_mlp_dim_seq[-1], - hidden_dimension_sequence=decoder_mlp_dim_seq[:-1], + final_dim=self.decoder_mlp_dim_seq[-1], + hidden_dimension_sequence=self.decoder_mlp_dim_seq[:-1], prefix="decoder_", ) - quantile_output = QuantileOutput(quantiles) + quantile_output = QuantileOutput(self.quantiles) super().__init__( encoder=encoder, @@ -124,7 +149,6 @@ def derive_auto_fields(cls, train_iter): } -# TODO: integrate MQDNN, change arguments to non mutable class MQRNNEstimator(ForkingSeq2SeqEstimator): """ An :class:`MQDNNEstimator` with a Recurrent Neural Network (RNN) as an @@ -137,17 +161,27 @@ def __init__( prediction_length: int, freq: str, context_length: Optional[int] = None, - decoder_mlp_dim_seq: List[int] = [20], + decoder_mlp_dim_seq: List[int] = None, trainer: Trainer = Trainer(), - quantiles: List[float] = list([0.1, 0.5, 0.9]), + quantiles: List[float] = None, ) -> None: assert ( prediction_length > 0 ), f"Invalid prediction length: {prediction_length}." - assert all( + assert decoder_mlp_dim_seq is None or all( [d > 0 for d in decoder_mlp_dim_seq] ), "Elements of `mlp_hidden_dimension_seq` should be > 0" + assert quantiles is None or all( + [0 <= d <= 1 for d in quantiles] + ), "Elements of `quantiles` should be >= 0 and <= 1" + + self.decoder_mlp_dim_seq = ( + decoder_mlp_dim_seq if decoder_mlp_dim_seq is not None else [20] + ) + self.quantiles = ( + quantiles if quantiles is not None else [0.1, 0.5, 0.9] + ) encoder = RNNEncoder( mode="gru", @@ -161,12 +195,12 @@ def __init__( decoder = ForkingMLPDecoder( dec_len=prediction_length, - final_dim=decoder_mlp_dim_seq[-1], - hidden_dimension_sequence=decoder_mlp_dim_seq[:-1], + final_dim=self.decoder_mlp_dim_seq[-1], + hidden_dimension_sequence=self.decoder_mlp_dim_seq[:-1], prefix="decoder_", ) - quantile_output = QuantileOutput(quantiles) + quantile_output = QuantileOutput(self.quantiles) super().__init__( encoder=encoder, From 62395f6578752c5a15e26ae376eddeb8a7969811 Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Mon, 20 Apr 2020 20:02:53 +0200 Subject: [PATCH 22/44] Added use_feat_static_cat support and observed_values support. --- .../model/seq2seq/_forking_estimator.py | 132 +++++++++++++---- src/gluonts/model/seq2seq/_forking_network.py | 135 +++++++++++++++--- .../model/seq2seq/_mq_dnn_estimator.py | 18 ++- src/gluonts/model/seq2seq/_transform.py | 36 +++-- test/model/seq2seq/test_model.py | 9 +- 5 files changed, 260 insertions(+), 70 deletions(-) diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index 3fb9a422e3..effadd2e44 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -12,14 +12,17 @@ # permissions and limitations under the License. # Standard library imports -from typing import Optional +from typing import Optional, List + +# Third-party imports +import numpy as np # First-party imports from gluonts.block.decoder import Seq2SeqDecoder from gluonts.block.enc2dec import PassThroughEnc2Dec from gluonts.block.encoder import Seq2SeqEncoder from gluonts.block.quantile_output import QuantileOutput -from gluonts.core.component import validated +from gluonts.core.component import validated, DType from gluonts.dataset.field_names import FieldName from gluonts.model.estimator import GluonEstimator from gluonts.model.forecast import Quantile @@ -38,6 +41,9 @@ RenameFields, AddConstFeature, RemoveFields, + AsNumpyArray, + AddObservedValuesIndicator, + SetField, ) # Relative imports @@ -84,17 +90,29 @@ class ForkingSeq2SeqEstimator(GluonEstimator): frequency of the time series prediction_length length of the decoding sequence + context_length + length of the encoding sequence (prediction_length is used if None) use_feat_dynamic_real Whether to use the ``feat_dynamic_real`` field from the data (default: False) + use_feat_static_cat: + Whether to use the ``feat_static_cat`` field from the data (default: False) + cardinality: List[int] = None, + Number of values of each categorical feature. + This must be set if ``use_feat_static_cat == True`` (default: None) + embedding_dimension: List[int] = None, + Dimension of the embeddings for categorical features + (default: [min(50, (cat+1)//2) for cat in cardinality]) add_time_feature Adds a set of time features. add_age_feature Adds an age feature. The age feature starts with a small value at the start of the time series and grows over time. - context_length - length of the encoding sequence (prediction_length is used if None) trainer - trainer + trainer (default: Trainer()) + dummy_value + Value to use for replacing missing values (default: 0.0) + dtype + (default: np.float32) """ @validated() @@ -107,9 +125,14 @@ def __init__( prediction_length: int, context_length: Optional[int] = None, use_feat_dynamic_real: bool = False, + use_feat_static_cat: bool = False, + cardinality: List[int] = None, + embedding_dimension: List[int] = None, add_time_feature: bool = False, add_age_feature: bool = False, trainer: Trainer = Trainer(), + dummy_value: float = 0.0, + dtype: DType = np.float32, ) -> None: super().__init__(trainer=trainer) @@ -119,15 +142,15 @@ def __init__( assert ( prediction_length > 0 ), "The value of `prediction_length` should be > 0" - # assert (cardinality and use_feat_static_cat) or ( - # not (cardinality or use_feat_static_cat) - # ), "You should set `cardinality` if and only if `use_feat_static_cat=True`" - # assert cardinality is None or all( - # [c > 0 for c in cardinality] - # ), "Elements of `cardinality` should be > 0" - # assert embedding_dimension is None or all( - # [e > 0 for e in embedding_dimension] - # ), "Elements of `embedding_dimension` should be > 0" + assert (cardinality and use_feat_static_cat) or ( + not (cardinality or use_feat_static_cat) + ), "You should set `cardinality` if and only if `use_feat_static_cat=True`" + assert cardinality is None or all( + [c > 0 for c in cardinality] + ), "Elements of `cardinality` should be > 0" + assert embedding_dimension is None or all( + [e > 0 for e in embedding_dimension] + ), "Elements of `embedding_dimension` should be > 0" self.encoder = encoder self.decoder = decoder @@ -140,25 +163,53 @@ def __init__( else self.prediction_length ) self.use_feat_dynamic_real = use_feat_dynamic_real + self.use_feat_static_cat = use_feat_static_cat + self.cardinality = ( + cardinality if cardinality and use_feat_static_cat else [1] + ) + self.embedding_dimension = ( + embedding_dimension + if embedding_dimension is not None + else [min(50, (cat + 1) // 2) for cat in self.cardinality] + ) self.add_time_feature = add_time_feature self.add_age_feature = add_age_feature self.use_dynamic_feat = ( use_feat_dynamic_real or add_age_feature or add_time_feature ) - # self.use_feat_static_cat = use_feat_static_cat - # self.cardinality = ( - # cardinality if cardinality and use_feat_static_cat else [1] - # ) - # self.embedding_dimension = ( - # embedding_dimension - # if embedding_dimension is not None - # else [min(50, (cat + 1) // 2) for cat in self.cardinality] - # ) + self.dummy_value = dummy_value + self.dtype = dtype def create_transformation(self) -> Transformation: chain = [] dynamic_feat_fields = [] + remove_field_names = [FieldName.FEAT_DYNAMIC_CAT] + + # --- GENERAL TRANSFORMATION CHAIN --- + + # determine unused input + if not self.use_feat_dynamic_real: + remove_field_names.append(FieldName.FEAT_DYNAMIC_REAL) + if not self.use_feat_static_cat: + remove_field_names.append(FieldName.FEAT_STATIC_CAT) + + chain.extend( + [ + RemoveFields(field_names=remove_field_names), + AsNumpyArray( + field=FieldName.TARGET, expected_ndim=1, dtype=self.dtype + ), + AddObservedValuesIndicator( + target_field=FieldName.TARGET, + output_field=FieldName.OBSERVED_VALUES, + dummy_value=self.dummy_value, + dtype=self.dtype, + ), + ] + ) + + # --- TRANSFORMATION CHAIN FOR DYNAMIC FEATURES --- if self.add_time_feature: chain.append( @@ -178,16 +229,13 @@ def create_transformation(self) -> Transformation: target_field=FieldName.TARGET, output_field=FieldName.FEAT_AGE, pred_length=self.prediction_length, + dtype=self.dtype, ), ) dynamic_feat_fields.append(FieldName.FEAT_AGE) if self.use_feat_dynamic_real: dynamic_feat_fields.append(FieldName.FEAT_DYNAMIC_REAL) - else: - chain.append( - RemoveFields(field_names=[FieldName.FEAT_DYNAMIC_REAL]) - ) # we need to make sure that there is always some dynamic input # we will however disregard it in the hybrid forward @@ -197,6 +245,7 @@ def create_transformation(self) -> Transformation: target_field=FieldName.TARGET, output_field=FieldName.FEAT_CONST, pred_length=self.prediction_length, + dtype=self.dtype, ), ) dynamic_feat_fields.append(FieldName.FEAT_CONST) @@ -214,6 +263,22 @@ def create_transformation(self) -> Transformation: RenameFields({dynamic_feat_fields[0]: FieldName.FEAT_DYNAMIC}) ) + # --- TRANSFORMATION CHAIN FOR STATIC FEATURES --- + + if not self.use_feat_static_cat: + chain.append( + SetField(output_field=FieldName.FEAT_STATIC_CAT, value=[0.0]), + ) + chain.append( + AsNumpyArray( + field=FieldName.FEAT_STATIC_CAT, + expected_ndim=1, + dtype=self.dtype, + ), + ) + + # --- SAMPLE AND CUT THE TIME-SERIES --- + chain.append( # because of how the forking decoder works, every time step # in context is used for splitting, which is why we use the TestSplitSampler @@ -222,8 +287,9 @@ def create_transformation(self) -> Transformation: enc_len=self.context_length, dec_len=self.prediction_length, encoder_series_fields=[ - FieldName.FEAT_DYNAMIC - ], # TODO: later add categorical too + FieldName.FEAT_DYNAMIC, + FieldName.OBSERVED_VALUES, + ], ), ) @@ -235,6 +301,10 @@ def create_training_network(self) -> ForkingSeq2SeqNetworkBase: enc2dec=PassThroughEnc2Dec(), decoder=self.decoder, quantile_output=self.quantile_output, + context_length=self.context_length, + cardinality=self.cardinality, + embedding_dimension=self.embedding_dimension, + dtype=self.dtype, ) def create_predictor( @@ -253,6 +323,10 @@ def create_predictor( enc2dec=trained_network.enc2dec, decoder=trained_network.decoder, quantile_output=trained_network.quantile_output, + context_length=self.context_length, + cardinality=self.cardinality, + embedding_dimension=self.embedding_dimension, + dtype=self.dtype, ) copy_parameters(trained_network, prediction_network) diff --git a/src/gluonts/model/seq2seq/_forking_network.py b/src/gluonts/model/seq2seq/_forking_network.py index 2d9f9f5721..651f894497 100644 --- a/src/gluonts/model/seq2seq/_forking_network.py +++ b/src/gluonts/model/seq2seq/_forking_network.py @@ -11,10 +11,13 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. +# Third-party imports +from typing import List + # Third-party imports import mxnet as mx from mxnet import gluon - +import numpy as np # First-party imports from gluonts.block.decoder import Seq2SeqDecoder @@ -23,6 +26,10 @@ from gluonts.block.quantile_output import QuantileOutput from gluonts.core.component import validated from gluonts.model.common import Tensor +from gluonts.block.feature import FeatureEmbedder +from gluonts.block.scaler import MeanScaler, NOPScaler +from gluonts.core.component import DType +from gluonts.support.util import weighted_average class ForkingSeq2SeqNetworkBase(gluon.HybridBlock): @@ -39,6 +46,14 @@ class ForkingSeq2SeqNetworkBase(gluon.HybridBlock): decoder block quantile_output: QuantileOutput quantile output block + context_length: int, + length of the encoding sequence + cardinality: List[int], + number of values of each categorical feature. + embedding_dimension: List[int], + dimension of the embeddings for categorical features + dtype + (default: np.float32) kwargs: dict dictionary of Gluon HybridBlock parameters """ @@ -50,6 +65,10 @@ def __init__( enc2dec: Seq2SeqEnc2Dec, decoder: Seq2SeqDecoder, quantile_output: QuantileOutput, + context_length: int, + cardinality: List[int], + embedding_dimension: List[int], + dtype: DType = np.float32, **kwargs, ) -> None: super().__init__(**kwargs) @@ -58,26 +77,71 @@ def __init__( self.enc2dec = enc2dec self.decoder = decoder self.quantile_output = quantile_output + self.context_length = context_length + self.cardinality = cardinality + self.embedding_dimension = embedding_dimension + self.dtype = dtype + + # TODO: implement scaling + scaling = False + if scaling: + self.scaler = MeanScaler(keepdims=True) + else: + self.scaler = NOPScaler(keepdims=True) with self.name_scope(): self.quantile_proj = quantile_output.get_quantile_proj() self.loss = quantile_output.get_loss() + self.embedder = FeatureEmbedder( + cardinalities=cardinality, + embedding_dims=embedding_dimension, + dtype=self.dtype, + ) # this method connects the sub-networks and returns the decoder output def get_decoder_network_output( - self, F, past_target: Tensor, past_feat_dynamic: Tensor + self, + F, + past_target: Tensor, + past_feat_dynamic: Tensor, + feat_static_cat: Tensor, + past_observed_values: Tensor, ) -> Tensor: - feat_static_real = F.zeros(shape=(1,)) - future_feat_dynamic_real = F.zeros(shape=(1,)) + + # scale is computed on the context length last units of the past target + # scale shape is (batch_size, 1, *target_shape) + _, scale = self.scaler( + past_target.slice_axis( + axis=1, begin=-self.context_length, end=None + ), + past_observed_values.slice_axis( + axis=1, begin=-self.context_length, end=None + ), + ) + + # (batch_size, num_features) + embedded_cat = self.embedder(feat_static_cat) + + # in addition to embedding features, use the log scale as it can help prediction too + # (batch_size, num_features + prod(target_shape)) + feat_static_real = F.concat( + embedded_cat, F.log(scale.squeeze(axis=1)), dim=1, + ) + + # Passing past_observed_values as a feature would allow the network to + # make that distinction and possibly ignore the masked values. + past_feat_dynamic_extended = F.concat( + past_feat_dynamic, past_observed_values, dim=-1 + ) # arguments: target, static_features, dynamic_features enc_output_static, enc_output_dynamic = self.encoder( - past_target, feat_static_real, past_feat_dynamic + past_target, feat_static_real, past_feat_dynamic_extended ) # arguments: encoder_output_static, encoder_output_dynamic, future_features dec_input_static, dec_input_dynamic, _ = self.enc2dec( - enc_output_static, enc_output_dynamic, future_feat_dynamic_real + enc_output_static, enc_output_dynamic, F.zeros(shape=(1,)) ) # arguments: dynamic_input, static_input @@ -86,45 +150,67 @@ def get_decoder_network_output( return dec_output -# TODO: figure out whether we need 2 classes each, in fact we would need 4 each, -# if adding categorical with this technique, does not seem reasonable class ForkingSeq2SeqTrainingNetwork(ForkingSeq2SeqNetworkBase): # noinspection PyMethodOverriding def hybrid_forward( self, F, + future_target: Tensor, past_target: Tensor, past_feat_dynamic: Tensor, - future_target: Tensor, + feat_static_cat: Tensor, + past_observed_values: Tensor, # FOR SOME REASON NOT USED??? + future_observed_values: Tensor, ) -> Tensor: """ Parameters ---------- F: mx.symbol or mx.ndarray Gluon function space - past_target: Tensor - FIXME future_target: Tensor - shape (num_ts, encoder_length, 1) FIXME + shape (batch_size, encoder_length, decoder_length) + past_target: Tensor + shape (batch_size, encoder_length, 1) + feat_static_cat + shape (batch_size, encoder_length, num_feature_static_cat) + past_feat_dynamic + shape (batch_size, encoder_length, num_feature_dynamic) + past_observed_values: Tensor + shape (batch_size, encoder_length, 1) + future_observed_values: Tensor + shape (batch_size, encoder_length, decoder_length) Returns ------- - loss with shape (FIXME, FIXME) + loss with shape (batch_size, prediction_length) """ dec_output = self.get_decoder_network_output( - F, past_target, past_feat_dynamic + F, + past_target, + past_feat_dynamic, + feat_static_cat, + past_observed_values, ) dec_dist_output = self.quantile_proj(dec_output) loss = self.loss(future_target, dec_dist_output) - return loss.mean(axis=1) + weighted_loss = weighted_average( + F=F, x=loss, weights=future_observed_values, axis=1 + ) + + return weighted_loss class ForkingSeq2SeqPredictionNetwork(ForkingSeq2SeqNetworkBase): # noinspection PyMethodOverriding def hybrid_forward( - self, F, past_target: Tensor, past_feat_dynamic: Tensor + self, + F, + past_target: Tensor, + past_feat_dynamic: Tensor, + feat_static_cat: Tensor, + past_observed_values: Tensor, ) -> Tensor: """ Parameters @@ -132,14 +218,25 @@ def hybrid_forward( F: mx.symbol or mx.ndarray Gluon function space past_target: Tensor - FIXME + shape (batch_size, encoder_length, 1) + feat_static_cat + shape (batch_size, encoder_length, num_feature_static_cat) + past_feat_dynamic + shape (batch_size, encoder_length, num_feature_dynamic) + past_observed_values: Tensor + shape (batch_size, encoder_length, 1) Returns ------- - prediction tensor with shape (FIXME, FIXME) + prediction tensor with shape (batch_size, prediction_length) """ + dec_output = self.get_decoder_network_output( - F, past_target, past_feat_dynamic + F, + past_target, + past_feat_dynamic, + feat_static_cat, + past_observed_values, ) fcst_output = F.slice_axis(dec_output, axis=1, begin=-1, end=None) diff --git a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py index 2857fed8e7..1a896988b4 100644 --- a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py +++ b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py @@ -68,8 +68,9 @@ def __init__( assert dilation_seq is None or all( [d > 0 for d in dilation_seq] ), "Elements of `dilation_seq` should be > 0" + # TODO: add support for kernel size=1 assert kernel_size_seq is None or all( - [d > 0 for d in kernel_size_seq] + [d > 1 for d in kernel_size_seq] ), "Elements of `kernel_size_seq` should be > 0" assert quantiles is None or all( [0 <= d <= 1 for d in quantiles] @@ -106,12 +107,14 @@ def __init__( np.random.seed(seed) mx.random.seed(seed) + # `use_static_feat` and `use_dynamic_feat` always True because network + # always receives input; either from the input data or constants encoder = HierarchicalCausalConv1DEncoder( dilation_seq=self.dilation_seq, kernel_size_seq=self.kernel_size_seq, channels_seq=self.channels_seq, use_residual=use_residual, - use_static_feat=False, + use_static_feat=True, use_dynamic_feat=True, prefix="encoder_", ) @@ -133,6 +136,9 @@ def __init__( prediction_length=prediction_length, context_length=context_length, use_feat_dynamic_real=use_feat_dynamic_real, + use_feat_static_cat=use_feat_static_cat, + cardinality=cardinality, + embedding_dimension=embedding_dimension, add_time_feature=add_time_feature, add_age_feature=add_age_feature, trainer=trainer, @@ -144,8 +150,8 @@ def derive_auto_fields(cls, train_iter): return { "use_feat_dynamic_real": stats.num_feat_dynamic_real > 0, - # "use_feat_static_cat": bool(stats.feat_static_cat), - # "cardinality": [len(cats) for cats in stats.feat_static_cat], + "use_feat_static_cat": bool(stats.feat_static_cat), + "cardinality": [len(cats) for cats in stats.feat_static_cat], } @@ -183,13 +189,15 @@ def __init__( quantiles if quantiles is not None else [0.1, 0.5, 0.9] ) + # `use_static_feat` and `use_dynamic_feat` always True because network + # always receives input; either from the input data or constants encoder = RNNEncoder( mode="gru", hidden_size=50, num_layers=1, bidirectional=True, prefix="encoder_", - use_static_feat=False, + use_static_feat=True, use_dynamic_feat=True, ) diff --git a/src/gluonts/model/seq2seq/_transform.py b/src/gluonts/model/seq2seq/_transform.py index aede54faa5..dbae874c86 100644 --- a/src/gluonts/model/seq2seq/_transform.py +++ b/src/gluonts/model/seq2seq/_transform.py @@ -13,7 +13,7 @@ # Standard library imports from collections import Counter -from typing import Iterator, List, Any +from typing import Iterator, List, Any, Optional # Third-party imports import numpy as np @@ -21,6 +21,7 @@ # First-party imports from gluonts.core.component import validated from gluonts.dataset.common import DataEntry +from gluonts.dataset.field_names import FieldName from gluonts.transform import FlatMapTransformation, shift_timestamp @@ -43,9 +44,10 @@ def __init__( train_sampler, enc_len: int, dec_len: int, - target_in: str = "target", - encoder_series_fields: List[str] = None, - decoder_series_fields: List[str] = [], + target_in: str = FieldName.TARGET, + observed_in: str = FieldName.OBSERVED_VALUES, + encoder_series_fields: Optional[List[str]] = None, + decoder_series_fields: Optional[List[str]] = None, is_pad_out: str = "is_pad", start_input_field: str = "start", forecast_start_output_field: str = "forecast_start", @@ -57,20 +59,21 @@ def __init__( self.train_sampler = train_sampler self.enc_len = enc_len self.dec_len = dec_len - self.ts_fields = ( - encoder_series_fields if encoder_series_fields is not None else [] - ) self.target_in = target_in + self.observed_in = observed_in self.is_pad_out = is_pad_out self.start_in = start_input_field self.forecast_start_out = forecast_start_output_field - self.decoder_series_fields = decoder_series_fields + self.ts_fields = ( + encoder_series_fields if encoder_series_fields is not None else [] + ) + self.decoder_series_fields = ( + decoder_series_fields if decoder_series_fields is not None else [] + ) - # TODO: make use of these def _past(self, col_name): return f"past_{col_name}" - # TODO: make use of these def _future(self, col_name): return f"future_{col_name}" @@ -93,7 +96,9 @@ def flatmap_transform( else: sampling_indices = [len(target)] - decoder_fields = set([self.target_in] + self.decoder_series_fields) + decoder_fields = set( + [self.target_in, self.observed_in] + self.decoder_series_fields + ) ts_fields_counter = Counter( self.ts_fields + [self.target_in] + self.decoder_series_fields @@ -124,11 +129,16 @@ def flatmap_transform( out[self._past(ts_field)] = past_piece.transpose() # in prediction mode, don't provide decode-values - if not is_train and ts_field == self.target_in: + if not is_train and ( + ts_field in [self.target_in, self.observed_in] + ): continue if ts_field in decoder_fields: - d3: Any = () if ts_field == self.target_in else (len(ts),) + d3: Any = () if ts_field in [ + self.target_in, + self.observed_in, + ] else (len(ts),) forking_dec_field = np.zeros( shape=(self.enc_len, self.dec_len) + d3 ) diff --git a/test/model/seq2seq/test_model.py b/test/model/seq2seq/test_model.py index 385020c41f..9da983308c 100644 --- a/test/model/seq2seq/test_model.py +++ b/test/model/seq2seq/test_model.py @@ -56,8 +56,9 @@ def test_accuracy( @pytest.mark.parametrize("use_feat_dynamic_real", [True, False]) @pytest.mark.parametrize("add_time_feature", [True, False]) @pytest.mark.parametrize("add_age_feature", [True, False]) +@pytest.mark.parametrize("hybridize", [True, False]) def test_mqcnn_covariate_smoke_test( - use_feat_dynamic_real, add_time_feature, add_age_feature + use_feat_dynamic_real, add_time_feature, add_age_feature, hybridize ): hps = { "seed": 42, @@ -69,12 +70,12 @@ def test_mqcnn_covariate_smoke_test( "use_feat_dynamic_real": use_feat_dynamic_real, "add_time_feature": add_time_feature, "add_age_feature": add_age_feature, - "hybridize": True, + "hybridize": hybridize, } dataset_train, dataset_test = make_dummy_datasets_with_features( - cardinality=[3, 10, 42], - num_feat_dynamic_real=3, + cardinality=[3, 10], + num_feat_dynamic_real=2, freq=hps["freq"], prediction_length=hps["prediction_length"], ) From 8d7b87d2848f9c7b9cb05fd8b20195a253e5e820 Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Mon, 20 Apr 2020 21:57:06 +0200 Subject: [PATCH 23/44] Minor refactoring. --- .../model/seq2seq/_forking_estimator.py | 19 +++----- src/gluonts/model/seq2seq/_forking_network.py | 1 + .../model/seq2seq/_seq2seq_estimator.py | 19 ++++---- src/gluonts/model/seq2seq/_transform.py | 47 ++++++++++--------- .../seq2seq/test_forking_sequence_splitter.py | 1 - test/model/seq2seq/test_model.py | 2 +- 6 files changed, 46 insertions(+), 43 deletions(-) diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index effadd2e44..b4a3f73de0 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -109,8 +109,6 @@ class ForkingSeq2SeqEstimator(GluonEstimator): The age feature starts with a small value at the start of the time series and grows over time. trainer trainer (default: Trainer()) - dummy_value - Value to use for replacing missing values (default: 0.0) dtype (default: np.float32) """ @@ -131,7 +129,6 @@ def __init__( add_time_feature: bool = False, add_age_feature: bool = False, trainer: Trainer = Trainer(), - dummy_value: float = 0.0, dtype: DType = np.float32, ) -> None: super().__init__(trainer=trainer) @@ -177,14 +174,15 @@ def __init__( self.use_dynamic_feat = ( use_feat_dynamic_real or add_age_feature or add_time_feature ) - - self.dummy_value = dummy_value self.dtype = dtype def create_transformation(self) -> Transformation: chain = [] dynamic_feat_fields = [] - remove_field_names = [FieldName.FEAT_DYNAMIC_CAT] + remove_field_names = [ + FieldName.FEAT_DYNAMIC_CAT, + FieldName.FEAT_STATIC_REAL, + ] # --- GENERAL TRANSFORMATION CHAIN --- @@ -203,7 +201,6 @@ def create_transformation(self) -> Transformation: AddObservedValuesIndicator( target_field=FieldName.TARGET, output_field=FieldName.OBSERVED_VALUES, - dummy_value=self.dummy_value, dtype=self.dtype, ), ] @@ -286,10 +283,8 @@ def create_transformation(self) -> Transformation: train_sampler=TestSplitSampler(), enc_len=self.context_length, dec_len=self.prediction_length, - encoder_series_fields=[ - FieldName.FEAT_DYNAMIC, - FieldName.OBSERVED_VALUES, - ], + encoder_series_fields=[FieldName.FEAT_DYNAMIC], + shared_series_fields=[FieldName.OBSERVED_VALUES], ), ) @@ -312,7 +307,7 @@ def create_predictor( transformation: Transformation, trained_network: ForkingSeq2SeqNetworkBase, ) -> Predictor: - # todo: this is specific to quantile output + # this is specific to quantile output quantile_strs = [ Quantile.from_float(quantile).name for quantile in self.quantile_output.quantiles diff --git a/src/gluonts/model/seq2seq/_forking_network.py b/src/gluonts/model/seq2seq/_forking_network.py index 651f894497..5e783fa769 100644 --- a/src/gluonts/model/seq2seq/_forking_network.py +++ b/src/gluonts/model/seq2seq/_forking_network.py @@ -195,6 +195,7 @@ def hybrid_forward( dec_dist_output = self.quantile_proj(dec_output) loss = self.loss(future_target, dec_dist_output) + # mask the loss based on observed indicator weighted_loss = weighted_average( F=F, x=loss, weights=future_observed_values, axis=1 ) diff --git a/src/gluonts/model/seq2seq/_seq2seq_estimator.py b/src/gluonts/model/seq2seq/_seq2seq_estimator.py index c50ae96bb4..cf9756300b 100644 --- a/src/gluonts/model/seq2seq/_seq2seq_estimator.py +++ b/src/gluonts/model/seq2seq/_seq2seq_estimator.py @@ -45,7 +45,6 @@ from ._seq2seq_network import Seq2SeqPredictionNetwork, Seq2SeqTrainingNetwork -# TODO: fix mutable arguments class Seq2SeqEstimator(GluonEstimator): """ Quantile-Regression Sequence-to-Sequence Estimator @@ -64,7 +63,7 @@ def __init__( decoder_mlp_static_dim: int, scaler: Scaler = NOPScaler(), context_length: Optional[int] = None, - quantiles: List[float] = [0.1, 0.5, 0.9], + quantiles: Optional[List[float]] = None, trainer: Trainer = Trainer(), num_parallel_samples: int = 100, ) -> None: @@ -74,6 +73,9 @@ def __init__( assert ( context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" + assert quantiles is None or all( + [0 <= d <= 1 for d in quantiles] + ), "Elements of `quantiles` should be >= 0 and <= 1" super().__init__(trainer=trainer) @@ -82,7 +84,9 @@ def __init__( ) self.prediction_length = prediction_length self.freq = freq - self.quantiles = quantiles + self.quantiles = ( + quantiles if quantiles is not None else [0.1, 0.5, 0.9] + ) self.encoder = encoder self.decoder_mlp_layer = decoder_mlp_layer self.decoder_mlp_static_dim = decoder_mlp_static_dim @@ -196,7 +200,7 @@ def __init__( decoder_mlp_static_dim: int, scaler: Scaler = NOPScaler(), context_length: Optional[int] = None, - quantiles: List[float] = list([0.1, 0.5, 0.9]), + quantiles: Optional[List[float]] = None, trainer: Trainer = Trainer(), num_parallel_samples: int = 100, ) -> None: @@ -217,7 +221,6 @@ def __init__( ) -# TODO: fix mutable arguments class RNN2QRForecaster(Seq2SeqEstimator): @validated() def __init__( @@ -234,7 +237,7 @@ def __init__( encoder_rnn_bidirectional: bool = True, scaler: Scaler = NOPScaler(), context_length: Optional[int] = None, - quantiles: List[float] = list([0.1, 0.5, 0.9]), + quantiles: Optional[List[float]] = None, trainer: Trainer = Trainer(), num_parallel_samples: int = 100, ) -> None: @@ -262,7 +265,6 @@ def __init__( ) -# TODO: fix mutable arguments class CNN2QRForecaster(Seq2SeqEstimator): @validated() def __init__( @@ -275,7 +277,7 @@ def __init__( decoder_mlp_static_dim: int, scaler: Scaler = NOPScaler(), context_length: Optional[int] = None, - quantiles: List[float] = list([0.1, 0.5, 0.9]), + quantiles: Optional[List[float]] = None, trainer: Trainer = Trainer(), num_parallel_samples: int = 100, ) -> None: @@ -285,6 +287,7 @@ def __init__( channels_seq=[30, 30, 30], use_residual=True, use_dynamic_feat=True, + use_static_feat=True, ) super(CNN2QRForecaster, self).__init__( diff --git a/src/gluonts/model/seq2seq/_transform.py b/src/gluonts/model/seq2seq/_transform.py index dbae874c86..9629efd873 100644 --- a/src/gluonts/model/seq2seq/_transform.py +++ b/src/gluonts/model/seq2seq/_transform.py @@ -44,13 +44,12 @@ def __init__( train_sampler, enc_len: int, dec_len: int, - target_in: str = FieldName.TARGET, - observed_in: str = FieldName.OBSERVED_VALUES, + target_field=FieldName.TARGET, encoder_series_fields: Optional[List[str]] = None, decoder_series_fields: Optional[List[str]] = None, + shared_series_fields: Optional[List[str]] = None, is_pad_out: str = "is_pad", start_input_field: str = "start", - forecast_start_output_field: str = "forecast_start", ) -> None: assert enc_len > 0, "The value of `enc_len` should be > 0" @@ -59,17 +58,24 @@ def __init__( self.train_sampler = train_sampler self.enc_len = enc_len self.dec_len = dec_len - self.target_in = target_in - self.observed_in = observed_in - self.is_pad_out = is_pad_out - self.start_in = start_input_field - self.forecast_start_out = forecast_start_output_field - self.ts_fields = ( + self.target_field = target_field + + self.encoder_series_fields = ( encoder_series_fields if encoder_series_fields is not None else [] ) self.decoder_series_fields = ( decoder_series_fields if decoder_series_fields is not None else [] ) + # defines the fields that are shared among encoder and decoder, + # this includes the target by default + self.shared_series_fields = ( + shared_series_fields + [self.target_field] + if shared_series_fields is not None + else [self.target_field] + ) + + self.is_pad_out = is_pad_out + self.start_in = start_input_field def _past(self, col_name): return f"past_{col_name}" @@ -80,7 +86,7 @@ def _future(self, col_name): def flatmap_transform( self, data: DataEntry, is_train: bool ) -> Iterator[DataEntry]: - target = data[self.target_in] + target = data[self.target_field] if is_train: # We currently cannot handle time series that are shorter than the @@ -97,11 +103,13 @@ def flatmap_transform( sampling_indices = [len(target)] decoder_fields = set( - [self.target_in, self.observed_in] + self.decoder_series_fields + self.shared_series_fields + self.decoder_series_fields ) ts_fields_counter = Counter( - self.ts_fields + [self.target_in] + self.decoder_series_fields + self.encoder_series_fields + + self.shared_series_fields + + self.decoder_series_fields ) for sampling_idx in sampling_indices: @@ -129,16 +137,13 @@ def flatmap_transform( out[self._past(ts_field)] = past_piece.transpose() # in prediction mode, don't provide decode-values - if not is_train and ( - ts_field in [self.target_in, self.observed_in] - ): + if not is_train and (ts_field in self.shared_series_fields): continue if ts_field in decoder_fields: - d3: Any = () if ts_field in [ - self.target_in, - self.observed_in, - ] else (len(ts),) + d3: Any = () if ts_field in self.shared_series_fields else ( + len(ts), + ) forking_dec_field = np.zeros( shape=(self.enc_len, self.dec_len) + d3 ) @@ -158,8 +163,8 @@ def flatmap_transform( pad_indicator[:pad_length] = True out[self._past(self.is_pad_out)] = pad_indicator - # So far pad forecast_start_out not in use - out[self.forecast_start_out] = shift_timestamp( + # So far pad forecast_start not in use + out[FieldName.FORECAST_START] = shift_timestamp( out[self.start_in], sampling_idx ) diff --git a/test/model/seq2seq/test_forking_sequence_splitter.py b/test/model/seq2seq/test_forking_sequence_splitter.py index 42d2881bc6..d88338d0ab 100644 --- a/test/model/seq2seq/test_forking_sequence_splitter.py +++ b/test/model/seq2seq/test_forking_sequence_splitter.py @@ -124,7 +124,6 @@ def make_dataset(N, train_length): train_sampler=TSplitSampler(), enc_len=5, dec_len=3, - target_in=FieldName.TARGET, encoder_series_fields=[ FieldName.FEAT_AGE, FieldName.FEAT_TIME, diff --git a/test/model/seq2seq/test_model.py b/test/model/seq2seq/test_model.py index 9da983308c..9cbd5ebfed 100644 --- a/test/model/seq2seq/test_model.py +++ b/test/model/seq2seq/test_model.py @@ -50,7 +50,7 @@ def test_accuracy( num_batches_per_epoch=100, hybridize=hybridize, quantiles=quantiles ) - accuracy_test(Estimator, hyperparameters, accuracy=0.25) + accuracy_test(Estimator, hyperparameters, accuracy=0.20) @pytest.mark.parametrize("use_feat_dynamic_real", [True, False]) From a3bf607b4fd4c4fe1bc6b0b8a208ec61041fb6eb Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Tue, 21 Apr 2020 18:42:19 +0200 Subject: [PATCH 24/44] Addressing Jaspers Review --- src/gluonts/model/estimator.py | 13 ++++--------- src/gluonts/model/seq2seq/_forking_estimator.py | 8 ++++---- src/gluonts/model/seq2seq/_mq_dnn_estimator.py | 12 ++++++------ src/gluonts/model/seq2seq/_seq2seq_estimator.py | 2 +- 4 files changed, 15 insertions(+), 20 deletions(-) diff --git a/src/gluonts/model/estimator.py b/src/gluonts/model/estimator.py index 0668d19ce6..121bc61cb0 100644 --- a/src/gluonts/model/estimator.py +++ b/src/gluonts/model/estimator.py @@ -77,12 +77,9 @@ def derive_auto_fields(cls, train_iter): def from_inputs(cls, train_iter, **params): # auto_params usually include `use_feat_dynamic_real`, `use_feat_static_cat` and `cardinality` auto_params = cls.derive_auto_fields(train_iter) - # FIXME: probably params should take precedence over auto_params, since they were deliberately set, - # however, on that case this method does not make sense, since if params says `use_feat_dynamic_real`=True - # but `auto_params`=False, then this will lead to an error, since the appropriate data does not exist. - # This the only context in which this method makes sense is when auto_params take precedence, which could - # lead to overwriting of explicit parameters. In this case a warning should be issued. - return cls.from_hyperparameters(**auto_params, **params) + # user specified 'params' will take precedence: + params = {**auto_params, **params} + return cls.from_hyperparameters(params) class DummyEstimator(Estimator): @@ -141,9 +138,7 @@ def from_hyperparameters(cls, **hyperparameters) -> "GluonEstimator": ) try: - trainer = hyperparameters.get("trainer") - if not isinstance(trainer, Trainer): - trainer = from_hyperparameters(Trainer, **hyperparameters) + trainer = from_hyperparameters(Trainer, **hyperparameters) return cls( **Model(**{**hyperparameters, "trainer": trainer}).__dict__ diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index b4a3f73de0..981fbc9bae 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -139,14 +139,14 @@ def __init__( assert ( prediction_length > 0 ), "The value of `prediction_length` should be > 0" - assert (cardinality and use_feat_static_cat) or ( - not (cardinality or use_feat_static_cat) + assert ( + use_feat_static_cat or not cardinality ), "You should set `cardinality` if and only if `use_feat_static_cat=True`" assert cardinality is None or all( - [c > 0 for c in cardinality] + c > 0 for c in cardinality ), "Elements of `cardinality` should be > 0" assert embedding_dimension is None or all( - [e > 0 for e in embedding_dimension] + e > 0 for e in embedding_dimension ), "Elements of `embedding_dimension` should be > 0" self.encoder = encoder diff --git a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py index 1a896988b4..6dcd57766c 100644 --- a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py +++ b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py @@ -63,17 +63,17 @@ def __init__( d > 0 for d in decoder_mlp_dim_seq ), "Elements of `mlp_hidden_dimension_seq` should be > 0" assert channels_seq is None or all( - [d > 0 for d in channels_seq] + d > 0 for d in channels_seq ), "Elements of `channels_seq` should be > 0" assert dilation_seq is None or all( - [d > 0 for d in dilation_seq] + d > 0 for d in dilation_seq ), "Elements of `dilation_seq` should be > 0" # TODO: add support for kernel size=1 assert kernel_size_seq is None or all( - [d > 1 for d in kernel_size_seq] + d > 1 for d in kernel_size_seq ), "Elements of `kernel_size_seq` should be > 0" assert quantiles is None or all( - [0 <= d <= 1 for d in quantiles] + 0 <= d <= 1 for d in quantiles ), "Elements of `quantiles` should be >= 0 and <= 1" self.decoder_mlp_dim_seq = ( @@ -176,10 +176,10 @@ def __init__( prediction_length > 0 ), f"Invalid prediction length: {prediction_length}." assert decoder_mlp_dim_seq is None or all( - [d > 0 for d in decoder_mlp_dim_seq] + d > 0 for d in decoder_mlp_dim_seq ), "Elements of `mlp_hidden_dimension_seq` should be > 0" assert quantiles is None or all( - [0 <= d <= 1 for d in quantiles] + 0 <= d <= 1 for d in quantiles ), "Elements of `quantiles` should be >= 0 and <= 1" self.decoder_mlp_dim_seq = ( diff --git a/src/gluonts/model/seq2seq/_seq2seq_estimator.py b/src/gluonts/model/seq2seq/_seq2seq_estimator.py index cf9756300b..14712ec66f 100644 --- a/src/gluonts/model/seq2seq/_seq2seq_estimator.py +++ b/src/gluonts/model/seq2seq/_seq2seq_estimator.py @@ -74,7 +74,7 @@ def __init__( context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert quantiles is None or all( - [0 <= d <= 1 for d in quantiles] + 0 <= d <= 1 for d in quantiles ), "Elements of `quantiles` should be >= 0 and <= 1" super().__init__(trainer=trainer) From ed594aca0e7784f3c9db0d2194a90fc9facbf09e Mon Sep 17 00:00:00 2001 From: Jasper Schulz Date: Wed, 22 Apr 2020 16:41:51 +0200 Subject: [PATCH 25/44] Update src/gluonts/model/estimator.py --- src/gluonts/model/estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gluonts/model/estimator.py b/src/gluonts/model/estimator.py index 121bc61cb0..2558965e79 100644 --- a/src/gluonts/model/estimator.py +++ b/src/gluonts/model/estimator.py @@ -79,7 +79,7 @@ def from_inputs(cls, train_iter, **params): auto_params = cls.derive_auto_fields(train_iter) # user specified 'params' will take precedence: params = {**auto_params, **params} - return cls.from_hyperparameters(params) + return cls.from_hyperparameters(**params) class DummyEstimator(Estimator): From edc17fb9dda9bf4016d95affbf143db922d3407f Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Thu, 23 Apr 2020 20:58:09 +0200 Subject: [PATCH 26/44] Backwards compatibility and minor fixes. --- src/gluonts/dataset/stat.py | 10 ++++-- src/gluonts/model/estimator.py | 2 +- src/gluonts/model/predictor.py | 5 ++- .../model/seq2seq/_forking_estimator.py | 4 +++ src/gluonts/transform/field.py | 11 ++++++- test/model/seq2seq/test_model.py | 32 +++++++++++++++++++ 6 files changed, 59 insertions(+), 5 deletions(-) diff --git a/src/gluonts/dataset/stat.py b/src/gluonts/dataset/stat.py index 191c6f02b9..633757b219 100644 --- a/src/gluonts/dataset/stat.py +++ b/src/gluonts/dataset/stat.py @@ -139,7 +139,9 @@ def __eq__(self, other): # TODO: reorganize modules to avoid circular dependency # TODO: and substitute Any with Dataset -def calculate_dataset_statistics(ts_dataset: Any) -> DatasetStatistics: +def calculate_dataset_statistics( + ts_dataset: Any, backwards_compatibility=True +) -> DatasetStatistics: """ Computes the statistics of a given Dataset. @@ -147,6 +149,9 @@ def calculate_dataset_statistics(ts_dataset: Any) -> DatasetStatistics: ---------- ts_dataset Dataset of which to compute the statistics. + backwards_compatibility + Ensures backwards compatibility regarding the naming of certain Fields. + For example, 'dynamic_feat' is also accepted as FieldName.FEAT_DYNAMIC_REAL Returns ------- @@ -300,7 +305,7 @@ def calculate_dataset_statistics(ts_dataset: Any) -> DatasetStatistics: feat_dynamic_real = ( ts[FieldName.FEAT_DYNAMIC_REAL] if FieldName.FEAT_DYNAMIC_REAL in ts - else None + else (ts["dynamic_feat"] if "dynamic_feat" in ts else None) ) if feat_dynamic_real is None: @@ -316,6 +321,7 @@ def calculate_dataset_statistics(ts_dataset: Any) -> DatasetStatistics: if num_feat_dynamic_real is None: # first num_feat_dynamic_real found num_feat_dynamic_real = feat_dynamic_real.shape[0] + # TODO: could assert that always same feat_dynamic_real key is used else: assert_data_error( num_feat_dynamic_real == feat_dynamic_real.shape[0], diff --git a/src/gluonts/model/estimator.py b/src/gluonts/model/estimator.py index 121bc61cb0..2558965e79 100644 --- a/src/gluonts/model/estimator.py +++ b/src/gluonts/model/estimator.py @@ -79,7 +79,7 @@ def from_inputs(cls, train_iter, **params): auto_params = cls.derive_auto_fields(train_iter) # user specified 'params' will take precedence: params = {**auto_params, **params} - return cls.from_hyperparameters(params) + return cls.from_hyperparameters(**params) class DummyEstimator(Estimator): diff --git a/src/gluonts/model/predictor.py b/src/gluonts/model/predictor.py index a3031107de..3b474190db 100644 --- a/src/gluonts/model/predictor.py +++ b/src/gluonts/model/predictor.py @@ -162,8 +162,11 @@ def derive_auto_fields(cls, train_iter): @classmethod def from_inputs(cls, train_iter, **params): + # auto_params usually include `use_feat_dynamic_real`, `use_feat_static_cat` and `cardinality` auto_params = cls.derive_auto_fields(train_iter) - return cls.from_hyperparameters(**auto_params, **params) + # user specified 'params' will take precedence: + params = {**auto_params, **params} + return cls.from_hyperparameters(**params) class RepresentablePredictor(Predictor): diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index 981fbc9bae..114a2f99a3 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -232,6 +232,10 @@ def create_transformation(self) -> Transformation: dynamic_feat_fields.append(FieldName.FEAT_AGE) if self.use_feat_dynamic_real: + # Backwards compatibility: + chain.append( + RenameFields({"dynamic_feat": FieldName.FEAT_DYNAMIC_REAL}) + ) dynamic_feat_fields.append(FieldName.FEAT_DYNAMIC_REAL) # we need to make sure that there is always some dynamic input diff --git a/src/gluonts/transform/field.py b/src/gluonts/transform/field.py index 2e39320c22..e126051749 100644 --- a/src/gluonts/transform/field.py +++ b/src/gluonts/transform/field.py @@ -22,7 +22,7 @@ class RenameFields(SimpleTransformation): """ - Rename fields using a mapping + Rename fields using a mapping, if source field present. Parameters ---------- @@ -48,6 +48,15 @@ def transform(self, data: DataEntry): class RemoveFields(SimpleTransformation): + """" + Remove field names if present. + + Parameters + ---------- + field_names + List of names of the fields that will be removed + """ + @validated() def __init__(self, field_names: List[str]) -> None: self.field_names = field_names diff --git a/test/model/seq2seq/test_model.py b/test/model/seq2seq/test_model.py index 9cbd5ebfed..6fcd628945 100644 --- a/test/model/seq2seq/test_model.py +++ b/test/model/seq2seq/test_model.py @@ -93,3 +93,35 @@ def test_repr(Estimator, repr_test, hyperparameters): def test_serialize(Estimator, serialize_test, hyperparameters): serialize_test(Estimator, hyperparameters) + + +def test_backwards_compatibility(): + hps = { + "freq": "D", + "prediction_length": 3, + "quantiles": [0.5, 0.1], + "epochs": 3, + "num_batches_per_epoch": 3, + "use_feat_dynamic_real": True, + } + + dataset_train, dataset_test = make_dummy_datasets_with_features( + cardinality=[3, 10], + num_feat_dynamic_real=2, + freq=hps["freq"], + prediction_length=hps["prediction_length"], + ) + + for entry in dataset_train: + entry["dynamic_feat"] = entry["feat_dynamic_real"] + del entry["feat_dynamic_real"] + + for entry in dataset_test: + entry["dynamic_feat"] = entry["feat_dynamic_real"] + del entry["feat_dynamic_real"] + + estimator = MQCNNEstimator.from_inputs(dataset_train, **hps) + + predictor = estimator.train(dataset_train) + forecasts = list(predictor.predict(dataset_test)) + assert len(forecasts) == len(dataset_test) From 32271e7e2b439deba477ababa60d424149d83529 Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Thu, 30 Apr 2020 17:39:52 +0200 Subject: [PATCH 27/44] Improvements to model thoughput. --- src/gluonts/dataset/loader.py | 5 +++-- src/gluonts/model/seq2seq/_forking_estimator.py | 15 ++++----------- src/gluonts/model/seq2seq/_transform.py | 4 ++++ src/gluonts/transform/feature.py | 10 +++++----- src/gluonts/transform/field.py | 3 +-- 5 files changed, 17 insertions(+), 20 deletions(-) diff --git a/src/gluonts/dataset/loader.py b/src/gluonts/dataset/loader.py index 0b749bdcc5..ea363c43c8 100644 --- a/src/gluonts/dataset/loader.py +++ b/src/gluonts/dataset/loader.py @@ -133,7 +133,7 @@ class TrainDataLoader(DataLoader): Note that using large prefetching batch will provide smoother bootstrapping performance, but will consume more shared_memory. Using smaller number may forfeit the purpose of using multiple worker processes, try reduce `num_workers` in this case. - By default it defaults to `num_workers * 2`. + By default `num_workers * 2`. dtype Floating point type to use. Default is np.float32. shuffle_for_training @@ -141,6 +141,7 @@ class TrainDataLoader(DataLoader): num_batches_for_shuffling The effective number of batches among which samples are shuffled. If num_batches_for_shuffling = 8 and batch_size = 8 then the next batch will be randomly sampled from about 64 samples. + By default 1, since this can have a hit on throughput. """ def __init__( @@ -154,7 +155,7 @@ def __init__( num_prefetch: Optional[int] = None, dtype: DType = np.float32, shuffle_for_training: bool = True, - num_batches_for_shuffling: int = 8, + num_batches_for_shuffling: int = 1, **kwargs ) -> None: assert dataset, "empty dataset" diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index 114a2f99a3..0ae5775934 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -195,9 +195,6 @@ def create_transformation(self) -> Transformation: chain.extend( [ RemoveFields(field_names=remove_field_names), - AsNumpyArray( - field=FieldName.TARGET, expected_ndim=1, dtype=self.dtype - ), AddObservedValuesIndicator( target_field=FieldName.TARGET, output_field=FieldName.OBSERVED_VALUES, @@ -268,15 +265,11 @@ def create_transformation(self) -> Transformation: if not self.use_feat_static_cat: chain.append( - SetField(output_field=FieldName.FEAT_STATIC_CAT, value=[0.0]), + SetField( + output_field=FieldName.FEAT_STATIC_CAT, + value=np.array([0.0]), + ), ) - chain.append( - AsNumpyArray( - field=FieldName.FEAT_STATIC_CAT, - expected_ndim=1, - dtype=self.dtype, - ), - ) # --- SAMPLE AND CUT THE TIME-SERIES --- diff --git a/src/gluonts/model/seq2seq/_transform.py b/src/gluonts/model/seq2seq/_transform.py index 9629efd873..717f99d383 100644 --- a/src/gluonts/model/seq2seq/_transform.py +++ b/src/gluonts/model/seq2seq/_transform.py @@ -116,6 +116,8 @@ def flatmap_transform( # ensure start index is not negative start_idx = max(0, sampling_idx - self.enc_len) + # irrelevant data should have been removed by now in the + # transformation chain, so copying everything is ok out = data.copy() for ts_field in list(ts_fields_counter.keys()): @@ -149,6 +151,8 @@ def flatmap_transform( ) skip = max(0, self.enc_len - sampling_idx) + # This section takes by far the longest time computationally: + # This scales linearly in self.enc_len and linearly in self.dec_len for dec_field, idx in zip( forking_dec_field[skip:], range(start_idx + 1, start_idx + self.enc_len + 1), diff --git a/src/gluonts/transform/feature.py b/src/gluonts/transform/feature.py index d382a305a0..44afa8e99a 100644 --- a/src/gluonts/transform/feature.py +++ b/src/gluonts/transform/feature.py @@ -68,15 +68,15 @@ def __init__( def transform(self, data: DataEntry) -> DataEntry: value = data[self.target_field] - nan_indices = np.where(np.isnan(value)) nan_entries = np.isnan(value) if self.convert_nans: - value[nan_indices] = self.dummy_value + value[np.where(nan_entries)] = self.dummy_value + data[self.target_field] = value - data[self.target_field] = value - # Invert bool array so that missing values are zeros and store as float - data[self.output_field] = np.invert(nan_entries).astype(self.dtype) + data[self.output_field] = np.invert( + nan_entries, out=nan_entries + ).astype(self.dtype, copy=False) return data diff --git a/src/gluonts/transform/field.py b/src/gluonts/transform/field.py index e126051749..d390b65bbf 100644 --- a/src/gluonts/transform/field.py +++ b/src/gluonts/transform/field.py @@ -63,8 +63,7 @@ def __init__(self, field_names: List[str]) -> None: def transform(self, data: DataEntry) -> DataEntry: for k in self.field_names: - if k in data.keys(): - del data[k] + data.pop(k, None) return data From 7920a03a869f888693cc3a810ab6971015a61da6 Mon Sep 17 00:00:00 2001 From: Bernie Wang Date: Fri, 1 May 2020 23:21:23 -0700 Subject: [PATCH 28/44] allow decoding features --- src/gluonts/block/decoder.py | 96 +++++++++++++++++++ .../model/seq2seq/_forking_estimator.py | 1 + 2 files changed, 97 insertions(+) diff --git a/src/gluonts/block/decoder.py b/src/gluonts/block/decoder.py index deabc87175..5001c6dacb 100644 --- a/src/gluonts/block/decoder.py +++ b/src/gluonts/block/decoder.py @@ -138,6 +138,102 @@ def hybrid_forward( return mlp_output +class ForkingMLPDecoderWithFutureFeat(Seq2SeqDecoder): + """ + Multilayer perceptron decoder for sequence-to-sequence models. + + See [WTN+17]_ for details. + + Parameters + ---------- + dec_len + length of the decoder (usually the number of forecasted time steps). + + final_dim + dimensionality of the output per time step (number of predicted + quantiles). + + hidden_dimension_sequence + number of hidden units for each MLP layer. + """ + + @validated() + def __init__( + self, + dec_len: int, + final_dim: int, + hidden_dimension_sequence: List[int] = list([]), + **kwargs, + ) -> None: + super().__init__(**kwargs) + + self.dec_len = dec_len + self.final_dims = final_dim + + with self.name_scope(): + self.model = nn.HybridSequential() + + for layer_no, layer_dim in enumerate(hidden_dimension_sequence): + layer = nn.Dense( + dec_len * layer_dim, + flatten=False, + activation="relu", + prefix=f"mlp_{layer_no:#02d}'_", + ) + self.model.add(layer) + + layer = nn.Dense( + dec_len * final_dim, + flatten=False, + activation="softrelu", + prefix=f"mlp_{len(hidden_dimension_sequence):#02d}'_", + ) + self.model.add(layer) + + # TODO: add support for static input at some point + def hybrid_forward( + self, + F, + dynamic_input: Tensor, + dynamic_input_decode: Tensor, + static_input: Tensor = None, + ) -> Tensor: + """ + ForkingMLPDecoder forward call. + + Parameters + ---------- + F + A module that can either refer to the Symbol API or the NDArray + API in MXNet. + + dynamic_input + dynamic_features, shape (batch_size, encoder_length, num_features) + or (N, T, C). + + dynamic_input + dynamic_features, shape (batch_size, encoder_length, decoder_length, num_features) + or (N, T, T, C). + + static_input + not used in this decoder. + + Returns + ------- + Tensor + mlp output, shape (batch_size, encoder_length, dec_len, final_dims). + + """ + mlp_output = self.model(dynamic_input) + mlp_output = mlp_output.reshape( + shape=(0, 0, self.dec_len, self.final_dims) + ) + mlp_output = F.concat( + mlp_output, dynamic_input_decode, dim=-1 + ) # TODO: would -1 work? + return mlp_output + + class OneShotDecoder(Seq2SeqDecoder): """ OneShotDecoder. diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index 0ae5775934..7211fa9904 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -281,6 +281,7 @@ def create_transformation(self) -> Transformation: enc_len=self.context_length, dec_len=self.prediction_length, encoder_series_fields=[FieldName.FEAT_DYNAMIC], + # decoder_series_fileds=[FieldName.FEAT_TIME], shared_series_fields=[FieldName.OBSERVED_VALUES], ), ) From fdb011df7fd395e932d69ec9f2a4782f9c3e5798 Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Mon, 4 May 2020 16:55:58 +0200 Subject: [PATCH 29/44] Temprorariliy added unconditional caching. --- src/gluonts/dataset/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gluonts/dataset/common.py b/src/gluonts/dataset/common.py index 7a5fba678d..c9c8544c50 100644 --- a/src/gluonts/dataset/common.py +++ b/src/gluonts/dataset/common.py @@ -195,7 +195,8 @@ def __init__( path: Path, freq: str, one_dim_target: bool = True, - cache: bool = False, + # FIXME: only changed this temporarily + cache: bool = True, ) -> None: self.cache = cache self.path = path From b2800467655d83349a4a9218c37a9ac4e3faf2cf Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Mon, 4 May 2020 18:21:50 +0200 Subject: [PATCH 30/44] Enabled multiprocessing by default. --- src/gluonts/dataset/parallelized_loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gluonts/dataset/parallelized_loader.py b/src/gluonts/dataset/parallelized_loader.py index 7121204e02..c4fc342512 100644 --- a/src/gluonts/dataset/parallelized_loader.py +++ b/src/gluonts/dataset/parallelized_loader.py @@ -576,8 +576,8 @@ def __init__( self.dtype = dtype - # TODO: switch to default multiprocessing.cpu_count() here - default_num_workers = 0 + # FIXME: switched permanently on for MQCNN + default_num_workers = multiprocessing.cpu_count() self.num_workers = ( num_workers if num_workers is not None From 2ed3c1957d1fde2fc8a59e5616e70636822d196b Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Mon, 4 May 2020 19:59:50 +0200 Subject: [PATCH 31/44] Standartized comments. --- src/gluonts/dataset/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gluonts/dataset/common.py b/src/gluonts/dataset/common.py index c9c8544c50..544e448535 100644 --- a/src/gluonts/dataset/common.py +++ b/src/gluonts/dataset/common.py @@ -195,7 +195,7 @@ def __init__( path: Path, freq: str, one_dim_target: bool = True, - # FIXME: only changed this temporarily + # FIXME: switched permanently on for MQCNN cache: bool = True, ) -> None: self.cache = cache From 1c57e45d504bb7f9ba18cac40dc529b96a596cdd Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Mon, 4 May 2020 20:40:34 +0200 Subject: [PATCH 32/44] Small bug fixes. --- src/gluonts/dataset/parallelized_loader.py | 2 +- src/gluonts/model/seq2seq/_transform.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gluonts/dataset/parallelized_loader.py b/src/gluonts/dataset/parallelized_loader.py index c4fc342512..67ddbae515 100644 --- a/src/gluonts/dataset/parallelized_loader.py +++ b/src/gluonts/dataset/parallelized_loader.py @@ -577,7 +577,7 @@ def __init__( self.dtype = dtype # FIXME: switched permanently on for MQCNN - default_num_workers = multiprocessing.cpu_count() + default_num_workers = int(multiprocessing.cpu_count() * (3 / 5)) self.num_workers = ( num_workers if num_workers is not None diff --git a/src/gluonts/model/seq2seq/_transform.py b/src/gluonts/model/seq2seq/_transform.py index 717f99d383..a25cf10ef7 100644 --- a/src/gluonts/model/seq2seq/_transform.py +++ b/src/gluonts/model/seq2seq/_transform.py @@ -32,7 +32,7 @@ def pad_to_size(xs, size): return xs pad_width = ([(0, 0)] * (xs.ndim - 1)) + [(pad_length, 0)] - return np.pad(xs, pad_width) + return np.pad(xs, mode="constant", pad_width=pad_width) class ForkingSequenceSplitter(FlatMapTransformation): From 4fce760dde8e6c8bd5f1da2e92c4c9db010f712d Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Tue, 5 May 2020 20:31:23 +0200 Subject: [PATCH 33/44] making caching and multiprocessing always on a local change --- src/gluonts/dataset/common.py | 3 +- src/gluonts/dataset/parallelized_loader.py | 4 +-- .../model/seq2seq/_mq_dnn_estimator.py | 31 +++++++++++++++++++ .../entry_point_scripts/train_entry_point.py | 2 +- test/dataset/test_variable_length.py | 6 ++-- test/model/seq2seq/test_model.py | 3 ++ 6 files changed, 42 insertions(+), 7 deletions(-) diff --git a/src/gluonts/dataset/common.py b/src/gluonts/dataset/common.py index 544e448535..7a5fba678d 100644 --- a/src/gluonts/dataset/common.py +++ b/src/gluonts/dataset/common.py @@ -195,8 +195,7 @@ def __init__( path: Path, freq: str, one_dim_target: bool = True, - # FIXME: switched permanently on for MQCNN - cache: bool = True, + cache: bool = False, ) -> None: self.cache = cache self.path = path diff --git a/src/gluonts/dataset/parallelized_loader.py b/src/gluonts/dataset/parallelized_loader.py index 67ddbae515..7121204e02 100644 --- a/src/gluonts/dataset/parallelized_loader.py +++ b/src/gluonts/dataset/parallelized_loader.py @@ -576,8 +576,8 @@ def __init__( self.dtype = dtype - # FIXME: switched permanently on for MQCNN - default_num_workers = int(multiprocessing.cpu_count() * (3 / 5)) + # TODO: switch to default multiprocessing.cpu_count() here + default_num_workers = 0 self.num_workers = ( num_workers if num_workers is not None diff --git a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py index 6dcd57766c..6d25aef9b2 100644 --- a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py +++ b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py @@ -12,6 +12,7 @@ # permissions and limitations under the License. # Standard library imports +import multiprocessing from typing import List, Optional # Third-party imports @@ -19,6 +20,7 @@ import mxnet as mx # First-party imports +from gluonts.dataset.common import Dataset, ListDataset from gluonts.dataset.stat import calculate_dataset_statistics from gluonts.block.decoder import ForkingMLPDecoder from gluonts.block.encoder import HierarchicalCausalConv1DEncoder, RNNEncoder @@ -154,6 +156,35 @@ def derive_auto_fields(cls, train_iter): "cardinality": [len(cats) for cats in stats.feat_static_cat], } + # FIXME: for now we always want the dataset to be cached and utilize multiprocessing. + def train( + self, + training_data: Dataset, + validation_data: Optional[Dataset] = None, + num_workers: Optional[int] = None, + **kwargs, + ): + cached_train_data = ListDataset( + data_iter=list(training_data), freq=self.freq + ) + cached_validation_data = ( + None + if validation_data is None + else ListDataset(data_iter=list(validation_data), freq=self.freq) + ) + num_workers = ( + num_workers + if num_workers is not None + else int(multiprocessing.cpu_count() * (1 / 2)) + ) + + return super().train( + training_data=cached_train_data, + validation_data=cached_validation_data, + num_workers=num_workers, + **kwargs, + ) + class MQRNNEstimator(ForkingSeq2SeqEstimator): """ diff --git a/src/gluonts/nursery/sagemaker_sdk/entry_point_scripts/train_entry_point.py b/src/gluonts/nursery/sagemaker_sdk/entry_point_scripts/train_entry_point.py index 62b2cf535e..5f57b79a89 100644 --- a/src/gluonts/nursery/sagemaker_sdk/entry_point_scripts/train_entry_point.py +++ b/src/gluonts/nursery/sagemaker_sdk/entry_point_scripts/train_entry_point.py @@ -73,7 +73,7 @@ def train(arguments): evaluator = Evaluator(quantiles=eval(arguments.quantiles)) agg_metrics, item_metrics = evaluator( - ts_it, forecast_it, num_series=len(dataset.test) + ts_it, forecast_it, num_series=len(list(dataset.test)) ) # required for metric tracking. diff --git a/test/dataset/test_variable_length.py b/test/dataset/test_variable_length.py index 5ac7a8d79a..8734e141a0 100644 --- a/test/dataset/test_variable_length.py +++ b/test/dataset/test_variable_length.py @@ -100,9 +100,11 @@ def train_loader( kwargs.update(override_args) if is_train: - return TrainDataLoader(num_batches_per_epoch=22, **kwargs) + return TrainDataLoader( + num_batches_per_epoch=22, num_workers=0, **kwargs + ) else: - return InferenceDataLoader(**kwargs) + return InferenceDataLoader(num_workers=0, **kwargs) return train_loader diff --git a/test/model/seq2seq/test_model.py b/test/model/seq2seq/test_model.py index 6fcd628945..9deae10319 100644 --- a/test/model/seq2seq/test_model.py +++ b/test/model/seq2seq/test_model.py @@ -31,6 +31,7 @@ def hyperparameters(dsinfo): num_batches_per_epoch=1, quantiles=[0.1, 0.5, 0.9], use_symbol_block_predictor=True, + num_workers=0, ) @@ -71,6 +72,7 @@ def test_mqcnn_covariate_smoke_test( "add_time_feature": add_time_feature, "add_age_feature": add_age_feature, "hybridize": hybridize, + "num_workers": 0, } dataset_train, dataset_test = make_dummy_datasets_with_features( @@ -103,6 +105,7 @@ def test_backwards_compatibility(): "epochs": 3, "num_batches_per_epoch": 3, "use_feat_dynamic_real": True, + "num_workers": 0, } dataset_train, dataset_test = make_dummy_datasets_with_features( From 12673bc1bc76e8d86112ae2a3e141b65f379b609 Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Tue, 5 May 2020 20:35:02 +0200 Subject: [PATCH 34/44] mend --- src/gluonts/model/seq2seq/_mq_dnn_estimator.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py index 6d25aef9b2..88dfb695bc 100644 --- a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py +++ b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py @@ -162,6 +162,7 @@ def train( training_data: Dataset, validation_data: Optional[Dataset] = None, num_workers: Optional[int] = None, + num_prefetch: Optional[int] = None, **kwargs, ): cached_train_data = ListDataset( @@ -182,6 +183,7 @@ def train( training_data=cached_train_data, validation_data=cached_validation_data, num_workers=num_workers, + num_prefetch=num_prefetch, **kwargs, ) From a020a4775a554e15a7d9be348b3e73e74e8b2e8c Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Fri, 8 May 2020 12:00:47 +0200 Subject: [PATCH 35/44] Backwards compatibility fix. --- src/gluonts/block/decoder.py | 37 +++++++++---------- src/gluonts/dataset/field_names.py | 1 + src/gluonts/dataset/stat.py | 34 +++++++---------- src/gluonts/model/seq2seq/_forking_network.py | 10 ++++- src/gluonts/model/seq2seq/_transform.py | 5 +++ test/model/deepstate/test_model.py | 2 +- test/model/seq2seq/test_model.py | 18 +++++---- 7 files changed, 58 insertions(+), 49 deletions(-) diff --git a/src/gluonts/block/decoder.py b/src/gluonts/block/decoder.py index 5001c6dacb..6bbc816b2c 100644 --- a/src/gluonts/block/decoder.py +++ b/src/gluonts/block/decoder.py @@ -77,7 +77,7 @@ def __init__( self, dec_len: int, final_dim: int, - hidden_dimension_sequence: List[int] = list([]), + hidden_dimension_sequence: List[int] = [], **kwargs, ) -> None: super().__init__(**kwargs) @@ -117,11 +117,10 @@ def hybrid_forward( F A module that can either refer to the Symbol API or the NDArray API in MXNet. - dynamic_input - dynamic_features, shape (batch_size, sequence_length, num_features) - or (N, T, C). - + dynamic_features, shape (batch_size, sequence_length, num_features) or (N, T, C) + where sequence_length is equal to the encoder length, and num_features is equal + to channel_seq[-1] for the MQCNN for example. static_input not used in this decoder. @@ -162,7 +161,7 @@ def __init__( self, dec_len: int, final_dim: int, - hidden_dimension_sequence: List[int] = list([]), + hidden_dimension_sequence: List[int] = [], **kwargs, ) -> None: super().__init__(**kwargs) @@ -194,9 +193,9 @@ def __init__( def hybrid_forward( self, F, + dynamic_output_encoder: Tensor, dynamic_input: Tensor, - dynamic_input_decode: Tensor, - static_input: Tensor = None, + static_output_encoder: Tensor = None, ) -> Tensor: """ ForkingMLPDecoder forward call. @@ -206,16 +205,14 @@ def hybrid_forward( F A module that can either refer to the Symbol API or the NDArray API in MXNet. - + dynamic_input_encoder + dynamic_features, shape (batch_size, sequence_length, num_features) or (N, T, C) + where sequence_length is equal to the encoder length, and num_features is equal + to channel_seq[-1] for the MQCNN for example. dynamic_input - dynamic_features, shape (batch_size, encoder_length, num_features) + dynamic_features, shape (batch_size, encoder_length, decoder_length, num_features_02) or (N, T, C). - - dynamic_input - dynamic_features, shape (batch_size, encoder_length, decoder_length, num_features) - or (N, T, T, C). - - static_input + static_input_encoder not used in this decoder. Returns @@ -224,13 +221,13 @@ def hybrid_forward( mlp output, shape (batch_size, encoder_length, dec_len, final_dims). """ - mlp_output = self.model(dynamic_input) + mlp_output = self.model(dynamic_output_encoder) mlp_output = mlp_output.reshape( shape=(0, 0, self.dec_len, self.final_dims) ) - mlp_output = F.concat( - mlp_output, dynamic_input_decode, dim=-1 - ) # TODO: would -1 work? + # mlp_output = F.concat( + # mlp_output, dynamic_input, dim=-1 + # ) # TODO: would -1 work? return mlp_output diff --git a/src/gluonts/dataset/field_names.py b/src/gluonts/dataset/field_names.py index 0e0a6ff7f8..d686c4f26f 100644 --- a/src/gluonts/dataset/field_names.py +++ b/src/gluonts/dataset/field_names.py @@ -27,6 +27,7 @@ class FieldName: FEAT_STATIC_REAL = "feat_static_real" FEAT_DYNAMIC_CAT = "feat_dynamic_cat" FEAT_DYNAMIC_REAL = "feat_dynamic_real" + FEAT_DYNAMIC_REAL_LEGACY = "dynamic_feat" FEAT_DYNAMIC = "feat_dynamic" diff --git a/src/gluonts/dataset/stat.py b/src/gluonts/dataset/stat.py index 633757b219..bc94d2604d 100644 --- a/src/gluonts/dataset/stat.py +++ b/src/gluonts/dataset/stat.py @@ -139,9 +139,7 @@ def __eq__(self, other): # TODO: reorganize modules to avoid circular dependency # TODO: and substitute Any with Dataset -def calculate_dataset_statistics( - ts_dataset: Any, backwards_compatibility=True -) -> DatasetStatistics: +def calculate_dataset_statistics(ts_dataset: Any) -> DatasetStatistics: """ Computes the statistics of a given Dataset. @@ -149,9 +147,6 @@ def calculate_dataset_statistics( ---------- ts_dataset Dataset of which to compute the statistics. - backwards_compatibility - Ensures backwards compatibility regarding the naming of certain Fields. - For example, 'dynamic_feat' is also accepted as FieldName.FEAT_DYNAMIC_REAL Returns ------- @@ -276,14 +271,14 @@ def calculate_dataset_statistics( else: if num_feat_dynamic_cat is None: # first num_feat_dynamic_cat found - num_feat_dynamic_cat = feat_dynamic_cat.shape[0] + num_feat_dynamic_cat = len(feat_dynamic_cat) else: assert_data_error( - num_feat_dynamic_cat == feat_dynamic_cat.shape[0], + num_feat_dynamic_cat == len(feat_dynamic_cat), "Found instances with different number of features in " "feat_dynamic_cat, found one with {} and another with {}.", num_feat_dynamic_cat, - feat_dynamic_cat.shape[0], + len(feat_dynamic_cat), ) assert_data_error( @@ -291,7 +286,7 @@ def calculate_dataset_statistics( "Features values have to be finite and cannot exceed single " "precision floating point range.", ) - num_feat_dynamic_cat_time_steps = feat_dynamic_cat.shape[1] + num_feat_dynamic_cat_time_steps = len(feat_dynamic_cat[0]) assert_data_error( num_feat_dynamic_cat_time_steps == len(target), "Each feature in feat_dynamic_cat has to have the same length as " @@ -302,11 +297,11 @@ def calculate_dataset_statistics( ) # FEAT_DYNAMIC_REAL - feat_dynamic_real = ( - ts[FieldName.FEAT_DYNAMIC_REAL] - if FieldName.FEAT_DYNAMIC_REAL in ts - else (ts["dynamic_feat"] if "dynamic_feat" in ts else None) - ) + feat_dynamic_real = None + if FieldName.FEAT_DYNAMIC_REAL in ts: + feat_dynamic_real = ts[FieldName.FEAT_DYNAMIC_REAL] + elif FieldName.FEAT_DYNAMIC_REAL_LEGACY in ts: + feat_dynamic_real = ts[FieldName.FEAT_DYNAMIC_REAL_LEGACY] if feat_dynamic_real is None: # feat_dynamic_real not found, check it was the first ts we encounter or @@ -320,15 +315,14 @@ def calculate_dataset_statistics( else: if num_feat_dynamic_real is None: # first num_feat_dynamic_real found - num_feat_dynamic_real = feat_dynamic_real.shape[0] - # TODO: could assert that always same feat_dynamic_real key is used + num_feat_dynamic_real = len(feat_dynamic_real) else: assert_data_error( - num_feat_dynamic_real == feat_dynamic_real.shape[0], + num_feat_dynamic_real == len(feat_dynamic_real), "Found instances with different number of features in " "feat_dynamic_real, found one with {} and another with {}.", num_feat_dynamic_real, - feat_dynamic_real.shape[0], + len(feat_dynamic_real), ) assert_data_error( @@ -336,7 +330,7 @@ def calculate_dataset_statistics( "Features values have to be finite and cannot exceed single " "precision floating point range.", ) - num_feat_dynamic_real_time_steps = feat_dynamic_real.shape[1] + num_feat_dynamic_real_time_steps = len(feat_dynamic_real[0]) assert_data_error( num_feat_dynamic_real_time_steps == len(target), "Each feature in feat_dynamic_real has to have the same length as " diff --git a/src/gluonts/model/seq2seq/_forking_network.py b/src/gluonts/model/seq2seq/_forking_network.py index 5e783fa769..5b55af3db7 100644 --- a/src/gluonts/model/seq2seq/_forking_network.py +++ b/src/gluonts/model/seq2seq/_forking_network.py @@ -145,8 +145,11 @@ def get_decoder_network_output( ) # arguments: dynamic_input, static_input + # TODO: optimize what we pass to the decoder for the prediction case, + # where we we only need to pass the encoder output for the last time step dec_output = self.decoder(dec_input_dynamic, dec_input_static) + # the output shape should be: (batch_size, enc_len, dec_len, final_dims) return dec_output @@ -159,7 +162,7 @@ def hybrid_forward( past_target: Tensor, past_feat_dynamic: Tensor, feat_static_cat: Tensor, - past_observed_values: Tensor, # FOR SOME REASON NOT USED??? + past_observed_values: Tensor, future_observed_values: Tensor, ) -> Tensor: """ @@ -175,6 +178,9 @@ def hybrid_forward( shape (batch_size, encoder_length, num_feature_static_cat) past_feat_dynamic shape (batch_size, encoder_length, num_feature_dynamic) + future_feat_dynamic + shape (batch_size, encoder_length, decoder_length, num_feature_dynamic) + # or shape (batch_size, decoder_length, num_feature_dynamic) replicated for each of the encoder steps past_observed_values: Tensor shape (batch_size, encoder_length, 1) future_observed_values: Tensor @@ -240,8 +246,10 @@ def hybrid_forward( past_observed_values, ) + # We only care about the output of the decoder for the last time step fcst_output = F.slice_axis(dec_output, axis=1, begin=-1, end=None) fcst_output = F.squeeze(fcst_output, axis=1) + predictions = self.quantile_proj(fcst_output).swapaxes(2, 1) return predictions diff --git a/src/gluonts/model/seq2seq/_transform.py b/src/gluonts/model/seq2seq/_transform.py index a25cf10ef7..bc4ecf4a7e 100644 --- a/src/gluonts/model/seq2seq/_transform.py +++ b/src/gluonts/model/seq2seq/_transform.py @@ -142,6 +142,11 @@ def flatmap_transform( if not is_train and (ts_field in self.shared_series_fields): continue + # TODO: do the same to the future dynamic feat as we do to the target + + # This is were some of the forking magic happens: + # For each of the encoder_len time-steps at which the decoder is applied we slice the + # corresponding inputs called decoder_fields to the appropriate dec_len if ts_field in decoder_fields: d3: Any = () if ts_field in self.shared_series_fields else ( len(ts), diff --git a/test/model/deepstate/test_model.py b/test/model/deepstate/test_model.py index 40bf602146..2a6216e1b4 100644 --- a/test/model/deepstate/test_model.py +++ b/test/model/deepstate/test_model.py @@ -35,7 +35,7 @@ def hyperparameters(dsinfo): def test_accuracy(accuracy_test, hyperparameters): - hyperparameters.update(num_batches_per_epoch=100) + hyperparameters.update(num_batches_per_epoch=200) accuracy_test(DeepStateEstimator, hyperparameters, accuracy=0.5) diff --git a/test/model/seq2seq/test_model.py b/test/model/seq2seq/test_model.py index 9deae10319..df54f1dbb8 100644 --- a/test/model/seq2seq/test_model.py +++ b/test/model/seq2seq/test_model.py @@ -115,13 +115,17 @@ def test_backwards_compatibility(): prediction_length=hps["prediction_length"], ) - for entry in dataset_train: - entry["dynamic_feat"] = entry["feat_dynamic_real"] - del entry["feat_dynamic_real"] - - for entry in dataset_test: - entry["dynamic_feat"] = entry["feat_dynamic_real"] - del entry["feat_dynamic_real"] + for i in range(len(dataset_train)): + dataset_train.list_data[i]["dynamic_feat"] = dataset_train.list_data[ + i + ]["feat_dynamic_real"] + del dataset_train.list_data[i]["feat_dynamic_real"] + + for i in range(len(dataset_test)): + dataset_test.list_data[i]["dynamic_feat"] = dataset_test.list_data[i][ + "feat_dynamic_real" + ] + del dataset_test.list_data[i]["feat_dynamic_real"] estimator = MQCNNEstimator.from_inputs(dataset_train, **hps) From e9787819bfda90095fb52e7bed0c25f3f7b4e5ec Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Fri, 8 May 2020 12:17:08 +0200 Subject: [PATCH 36/44] Removing deepstate noise. --- test/model/deepstate/test_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/model/deepstate/test_model.py b/test/model/deepstate/test_model.py index 2a6216e1b4..fd17230b14 100644 --- a/test/model/deepstate/test_model.py +++ b/test/model/deepstate/test_model.py @@ -20,7 +20,7 @@ def hyperparameters(dsinfo): return dict( ctx="cpu", - epochs=1, + epochs=3, learning_rate=1e-2, hybridize=False, num_cells=2, @@ -35,7 +35,7 @@ def hyperparameters(dsinfo): def test_accuracy(accuracy_test, hyperparameters): - hyperparameters.update(num_batches_per_epoch=200) + hyperparameters.update(num_batches_per_epoch=100) accuracy_test(DeepStateEstimator, hyperparameters, accuracy=0.5) From af5eb104af680a40a10b1b419dd32528e7648bbb Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Fri, 8 May 2020 12:41:11 +0200 Subject: [PATCH 37/44] Removing deepstate noise. --- test/dataset/test_loader.py | 2 +- test/model/deepstate/test_model.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/dataset/test_loader.py b/test/dataset/test_loader.py index e2503e6d5c..d527da54a0 100644 --- a/test/dataset/test_loader.py +++ b/test/dataset/test_loader.py @@ -109,7 +109,7 @@ def test_io_speed() -> None: # name of method, loading function and min allowed throughput fixtures = [ - ("baseline", baseline, 70_000), + ("baseline", baseline, 65_000), # ('json.loads', load_json, xxx), ("ujson.loads", load_ujson, 20_000), ("JsonLinesFile", load_json_lines_file, 10_000), diff --git a/test/model/deepstate/test_model.py b/test/model/deepstate/test_model.py index fd17230b14..0493d592e6 100644 --- a/test/model/deepstate/test_model.py +++ b/test/model/deepstate/test_model.py @@ -20,7 +20,7 @@ def hyperparameters(dsinfo): return dict( ctx="cpu", - epochs=3, + epochs=1, learning_rate=1e-2, hybridize=False, num_cells=2, @@ -37,7 +37,7 @@ def hyperparameters(dsinfo): def test_accuracy(accuracy_test, hyperparameters): hyperparameters.update(num_batches_per_epoch=100) - accuracy_test(DeepStateEstimator, hyperparameters, accuracy=0.5) + accuracy_test(DeepStateEstimator, hyperparameters, accuracy=0.75) def test_repr(repr_test, hyperparameters): From e3ad55457950e2c2b4729035d972aced414a24cd Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Fri, 8 May 2020 13:14:27 +0200 Subject: [PATCH 38/44] Adjusting read speed baseline for windows. --- test/dataset/test_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/dataset/test_loader.py b/test/dataset/test_loader.py index d527da54a0..fede335e5d 100644 --- a/test/dataset/test_loader.py +++ b/test/dataset/test_loader.py @@ -109,7 +109,7 @@ def test_io_speed() -> None: # name of method, loading function and min allowed throughput fixtures = [ - ("baseline", baseline, 65_000), + ("baseline", baseline, 60_000), # ('json.loads', load_json, xxx), ("ujson.loads", load_ujson, 20_000), ("JsonLinesFile", load_json_lines_file, 10_000), From a7973a414a7092a97dd0df76b824e82cb185f803 Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Tue, 12 May 2020 17:12:43 +0200 Subject: [PATCH 39/44] Added dynamic input to MQCNN decoder. --- src/gluonts/block/decoder.py | 94 ------------------- src/gluonts/block/encoder.py | 2 + .../model/seq2seq/_forking_estimator.py | 13 ++- src/gluonts/model/seq2seq/_forking_network.py | 30 ++++-- .../model/seq2seq/_mq_dnn_estimator.py | 5 +- src/gluonts/model/seq2seq/_transform.py | 46 ++++----- test/model/seq2seq/test_model.py | 6 +- 7 files changed, 60 insertions(+), 136 deletions(-) diff --git a/src/gluonts/block/decoder.py b/src/gluonts/block/decoder.py index 6bbc816b2c..ccb5bd997a 100644 --- a/src/gluonts/block/decoder.py +++ b/src/gluonts/block/decoder.py @@ -137,100 +137,6 @@ def hybrid_forward( return mlp_output -class ForkingMLPDecoderWithFutureFeat(Seq2SeqDecoder): - """ - Multilayer perceptron decoder for sequence-to-sequence models. - - See [WTN+17]_ for details. - - Parameters - ---------- - dec_len - length of the decoder (usually the number of forecasted time steps). - - final_dim - dimensionality of the output per time step (number of predicted - quantiles). - - hidden_dimension_sequence - number of hidden units for each MLP layer. - """ - - @validated() - def __init__( - self, - dec_len: int, - final_dim: int, - hidden_dimension_sequence: List[int] = [], - **kwargs, - ) -> None: - super().__init__(**kwargs) - - self.dec_len = dec_len - self.final_dims = final_dim - - with self.name_scope(): - self.model = nn.HybridSequential() - - for layer_no, layer_dim in enumerate(hidden_dimension_sequence): - layer = nn.Dense( - dec_len * layer_dim, - flatten=False, - activation="relu", - prefix=f"mlp_{layer_no:#02d}'_", - ) - self.model.add(layer) - - layer = nn.Dense( - dec_len * final_dim, - flatten=False, - activation="softrelu", - prefix=f"mlp_{len(hidden_dimension_sequence):#02d}'_", - ) - self.model.add(layer) - - # TODO: add support for static input at some point - def hybrid_forward( - self, - F, - dynamic_output_encoder: Tensor, - dynamic_input: Tensor, - static_output_encoder: Tensor = None, - ) -> Tensor: - """ - ForkingMLPDecoder forward call. - - Parameters - ---------- - F - A module that can either refer to the Symbol API or the NDArray - API in MXNet. - dynamic_input_encoder - dynamic_features, shape (batch_size, sequence_length, num_features) or (N, T, C) - where sequence_length is equal to the encoder length, and num_features is equal - to channel_seq[-1] for the MQCNN for example. - dynamic_input - dynamic_features, shape (batch_size, encoder_length, decoder_length, num_features_02) - or (N, T, C). - static_input_encoder - not used in this decoder. - - Returns - ------- - Tensor - mlp output, shape (batch_size, encoder_length, dec_len, final_dims). - - """ - mlp_output = self.model(dynamic_output_encoder) - mlp_output = mlp_output.reshape( - shape=(0, 0, self.dec_len, self.final_dims) - ) - # mlp_output = F.concat( - # mlp_output, dynamic_input, dim=-1 - # ) # TODO: would -1 work? - return mlp_output - - class OneShotDecoder(Seq2SeqDecoder): """ OneShotDecoder. diff --git a/src/gluonts/block/encoder.py b/src/gluonts/block/encoder.py index 76e1cd7fa9..f2a4550d85 100644 --- a/src/gluonts/block/encoder.py +++ b/src/gluonts/block/encoder.py @@ -111,6 +111,7 @@ def _assemble_inputs( return inputs +# TODO: fix handling of static features class HierarchicalCausalConv1DEncoder(Seq2SeqEncoder): """ Defines a stack of dilated convolutions as the encoder. @@ -216,6 +217,7 @@ def hybrid_forward( elif self.use_dynamic_feat: inputs = F.concat(target, dynamic_features, dim=2) # (N, T, C) else: + # For now, static features only used when dynamic feat enabled inputs = target # NTC -> NCT (or NCW) diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index 7211fa9904..cb9b5b421d 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -41,7 +41,6 @@ RenameFields, AddConstFeature, RemoveFields, - AsNumpyArray, AddObservedValuesIndicator, SetField, ) @@ -280,9 +279,15 @@ def create_transformation(self) -> Transformation: train_sampler=TestSplitSampler(), enc_len=self.context_length, dec_len=self.prediction_length, - encoder_series_fields=[FieldName.FEAT_DYNAMIC], - # decoder_series_fileds=[FieldName.FEAT_TIME], - shared_series_fields=[FieldName.OBSERVED_VALUES], + encoder_series_fields=[ + FieldName.OBSERVED_VALUES, + FieldName.FEAT_DYNAMIC, + ], + decoder_series_fields=[ + FieldName.OBSERVED_VALUES, + FieldName.FEAT_DYNAMIC, + ], + prediction_time_decoder_exclude=[FieldName.OBSERVED_VALUES], ), ) diff --git a/src/gluonts/model/seq2seq/_forking_network.py b/src/gluonts/model/seq2seq/_forking_network.py index 5b55af3db7..6a500a3729 100644 --- a/src/gluonts/model/seq2seq/_forking_network.py +++ b/src/gluonts/model/seq2seq/_forking_network.py @@ -104,6 +104,7 @@ def get_decoder_network_output( F, past_target: Tensor, past_feat_dynamic: Tensor, + future_feat_dynamic: Tensor, feat_static_cat: Tensor, past_observed_values: Tensor, ) -> Tensor: @@ -144,10 +145,20 @@ def get_decoder_network_output( enc_output_static, enc_output_dynamic, F.zeros(shape=(1,)) ) + # flatten the last two dimensions: + # => (batch_size, encoder_length, decoder_length * num_feature_dynamic) + future_feat_dynamic = F.reshape(future_feat_dynamic, shape=(0, 0, -1)) + + # concatenate output of decoder and future_feat_dynamic covariates: + # => (batch_size, encoder_length, num_dec_input_dynamic + num_future_feat_dynamic) + total_dec_input_dynamic = F.concat( + dec_input_dynamic, future_feat_dynamic, dim=2 + ) + # arguments: dynamic_input, static_input # TODO: optimize what we pass to the decoder for the prediction case, # where we we only need to pass the encoder output for the last time step - dec_output = self.decoder(dec_input_dynamic, dec_input_static) + dec_output = self.decoder(total_dec_input_dynamic, dec_input_static) # the output shape should be: (batch_size, enc_len, dec_len, final_dims) return dec_output @@ -158,9 +169,10 @@ class ForkingSeq2SeqTrainingNetwork(ForkingSeq2SeqNetworkBase): def hybrid_forward( self, F, - future_target: Tensor, past_target: Tensor, + future_target: Tensor, past_feat_dynamic: Tensor, + future_feat_dynamic: Tensor, feat_static_cat: Tensor, past_observed_values: Tensor, future_observed_values: Tensor, @@ -170,17 +182,16 @@ def hybrid_forward( ---------- F: mx.symbol or mx.ndarray Gluon function space - future_target: Tensor - shape (batch_size, encoder_length, decoder_length) past_target: Tensor shape (batch_size, encoder_length, 1) - feat_static_cat - shape (batch_size, encoder_length, num_feature_static_cat) + future_target: Tensor + shape (batch_size, encoder_length, decoder_length) past_feat_dynamic shape (batch_size, encoder_length, num_feature_dynamic) future_feat_dynamic shape (batch_size, encoder_length, decoder_length, num_feature_dynamic) - # or shape (batch_size, decoder_length, num_feature_dynamic) replicated for each of the encoder steps + feat_static_cat + shape (batch_size, encoder_length, num_feature_static_cat) past_observed_values: Tensor shape (batch_size, encoder_length, 1) future_observed_values: Tensor @@ -194,6 +205,7 @@ def hybrid_forward( F, past_target, past_feat_dynamic, + future_feat_dynamic, feat_static_cat, past_observed_values, ) @@ -216,6 +228,7 @@ def hybrid_forward( F, past_target: Tensor, past_feat_dynamic: Tensor, + future_feat_dynamic: Tensor, feat_static_cat: Tensor, past_observed_values: Tensor, ) -> Tensor: @@ -230,6 +243,8 @@ def hybrid_forward( shape (batch_size, encoder_length, num_feature_static_cat) past_feat_dynamic shape (batch_size, encoder_length, num_feature_dynamic) + future_feat_dynamic + shape (batch_size, encoder_length, decoder_length, num_feature_dynamic) past_observed_values: Tensor shape (batch_size, encoder_length, 1) @@ -242,6 +257,7 @@ def hybrid_forward( F, past_target, past_feat_dynamic, + future_feat_dynamic, feat_static_cat, past_observed_values, ) diff --git a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py index 88dfb695bc..dee7412fc1 100644 --- a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py +++ b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py @@ -157,6 +157,9 @@ def derive_auto_fields(cls, train_iter): } # FIXME: for now we always want the dataset to be cached and utilize multiprocessing. + # TODO it properly: Enable caching of the dataset in the `_load_datasets` function of the shell, + # and pass `num_workers` from train_env in the `run_train_and_test` method to `run_train`, + # which in turn has to pass it to train(...) def train( self, training_data: Dataset, @@ -176,7 +179,7 @@ def train( num_workers = ( num_workers if num_workers is not None - else int(multiprocessing.cpu_count() * (1 / 2)) + else int(np.ceil(np.sqrt(multiprocessing.cpu_count()))) ) return super().train( diff --git a/src/gluonts/model/seq2seq/_transform.py b/src/gluonts/model/seq2seq/_transform.py index bc4ecf4a7e..a36231b2d5 100644 --- a/src/gluonts/model/seq2seq/_transform.py +++ b/src/gluonts/model/seq2seq/_transform.py @@ -47,7 +47,7 @@ def __init__( target_field=FieldName.TARGET, encoder_series_fields: Optional[List[str]] = None, decoder_series_fields: Optional[List[str]] = None, - shared_series_fields: Optional[List[str]] = None, + prediction_time_decoder_exclude: Optional[List[str]] = None, is_pad_out: str = "is_pad", start_input_field: str = "start", ) -> None: @@ -61,16 +61,18 @@ def __init__( self.target_field = target_field self.encoder_series_fields = ( - encoder_series_fields if encoder_series_fields is not None else [] + encoder_series_fields + [self.target_field] + if encoder_series_fields is not None + else [self.target_field] ) self.decoder_series_fields = ( - decoder_series_fields if decoder_series_fields is not None else [] + decoder_series_fields + [self.target_field] + if decoder_series_fields is not None + else [self.target_field] ) - # defines the fields that are shared among encoder and decoder, - # this includes the target by default - self.shared_series_fields = ( - shared_series_fields + [self.target_field] - if shared_series_fields is not None + self.prediction_time_decoder_exclude = ( + prediction_time_decoder_exclude + [self.target_field] + if prediction_time_decoder_exclude is not None else [self.target_field] ) @@ -102,14 +104,8 @@ def flatmap_transform( else: sampling_indices = [len(target)] - decoder_fields = set( - self.shared_series_fields + self.decoder_series_fields - ) - ts_fields_counter = Counter( - self.encoder_series_fields - + self.shared_series_fields - + self.decoder_series_fields + set(self.encoder_series_fields + self.decoder_series_fields) ) for sampling_idx in sampling_indices: @@ -138,21 +134,19 @@ def flatmap_transform( out[self._past(ts_field)] = past_piece.transpose() - # in prediction mode, don't provide decode-values - if not is_train and (ts_field in self.shared_series_fields): + # exclude some fields at prediction time + if ( + not is_train + and ts_field in self.prediction_time_decoder_exclude + ): continue - # TODO: do the same to the future dynamic feat as we do to the target - # This is were some of the forking magic happens: # For each of the encoder_len time-steps at which the decoder is applied we slice the # corresponding inputs called decoder_fields to the appropriate dec_len - if ts_field in decoder_fields: - d3: Any = () if ts_field in self.shared_series_fields else ( - len(ts), - ) + if ts_field in self.decoder_series_fields: forking_dec_field = np.zeros( - shape=(self.enc_len, self.dec_len) + d3 + shape=(self.enc_len, self.dec_len, len(ts)) ) skip = max(0, self.enc_len - sampling_idx) @@ -162,9 +156,9 @@ def flatmap_transform( forking_dec_field[skip:], range(start_idx + 1, start_idx + self.enc_len + 1), ): - dec_field[:] = ts[:, idx : idx + self.dec_len] + dec_field[:] = ts[:, idx : idx + self.dec_len].T - out[self._future(ts_field)] = forking_dec_field + out[self._future(ts_field)] = np.squeeze(forking_dec_field) # So far pad indicator not in use pad_indicator = np.zeros(self.enc_len) diff --git a/test/model/seq2seq/test_model.py b/test/model/seq2seq/test_model.py index df54f1dbb8..85b3acea9d 100644 --- a/test/model/seq2seq/test_model.py +++ b/test/model/seq2seq/test_model.py @@ -31,7 +31,6 @@ def hyperparameters(dsinfo): num_batches_per_epoch=1, quantiles=[0.1, 0.5, 0.9], use_symbol_block_predictor=True, - num_workers=0, ) @@ -72,7 +71,6 @@ def test_mqcnn_covariate_smoke_test( "add_time_feature": add_time_feature, "add_age_feature": add_age_feature, "hybridize": hybridize, - "num_workers": 0, } dataset_train, dataset_test = make_dummy_datasets_with_features( @@ -84,7 +82,7 @@ def test_mqcnn_covariate_smoke_test( estimator = MQCNNEstimator.from_hyperparameters(**hps) - predictor = estimator.train(dataset_train) + predictor = estimator.train(dataset_train, num_workers=0) forecasts = list(predictor.predict(dataset_test)) assert len(forecasts) == len(dataset_test) @@ -129,6 +127,6 @@ def test_backwards_compatibility(): estimator = MQCNNEstimator.from_inputs(dataset_train, **hps) - predictor = estimator.train(dataset_train) + predictor = estimator.train(dataset_train, num_workers=0) forecasts = list(predictor.predict(dataset_test)) assert len(forecasts) == len(dataset_test) From 63b2565dc915dc525843564cc650310053853b80 Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Tue, 12 May 2020 19:00:36 +0200 Subject: [PATCH 40/44] Added toggle option for dynamic future feat. --- src/gluonts/block/enc2dec.py | 83 +++++++++++++++---- .../model/seq2seq/_forking_estimator.py | 19 +++-- src/gluonts/model/seq2seq/_forking_network.py | 16 +--- .../model/seq2seq/_mq_dnn_estimator.py | 2 + src/gluonts/model/seq2seq/_seq2seq_network.py | 2 +- src/gluonts/model/seq2seq/_transform.py | 31 +++++-- test/model/seq2seq/test_model.py | 8 +- 7 files changed, 114 insertions(+), 47 deletions(-) diff --git a/src/gluonts/block/enc2dec.py b/src/gluonts/block/enc2dec.py index 7f61a63f59..9bc58f8567 100644 --- a/src/gluonts/block/enc2dec.py +++ b/src/gluonts/block/enc2dec.py @@ -38,7 +38,7 @@ def hybrid_forward( F, encoder_output_static: Tensor, encoder_output_dynamic: Tensor, - future_features: Tensor, + future_features_dynamic: Tensor, ) -> Tuple[Tensor, Tensor, Tensor]: """ Parameters @@ -48,10 +48,10 @@ def hybrid_forward( shape (batch_size, num_features) or (N, C) encoder_output_dynamic - shape (batch_size, context_length, num_features) or (N, T, C) + shape (batch_size, sequence_length, num_features) or (N, T, C) - future_features - shape (batch_size, prediction_length, num_features) or (N, T, C) + future_features_dynamic + shape (batch_size, sequence_length, prediction_length, num_features) or (N, T, P, C`) Returns @@ -59,12 +59,8 @@ def hybrid_forward( Tensor shape (batch_size, num_features) or (N, C) - Tensor - shape (batch_size, prediction_length, num_features) or (N, T, C) - Tensor shape (batch_size, sequence_length, num_features) or (N, T, C) - """ pass @@ -72,7 +68,7 @@ def hybrid_forward( class PassThroughEnc2Dec(Seq2SeqEnc2Dec): """ Simplest class for passing encoder tensors do decoder. Passes through - tensors. + tensors, except that future_features_dynamic is dropped. """ def hybrid_forward( @@ -80,8 +76,8 @@ def hybrid_forward( F, encoder_output_static: Tensor, encoder_output_dynamic: Tensor, - future_features: Tensor, - ) -> Tuple[Tensor, Tensor, Tensor]: + future_features_dynamic: Tensor, + ) -> Tuple[Tensor, Tensor]: """ Parameters ---------- @@ -90,10 +86,10 @@ def hybrid_forward( shape (batch_size, num_features) or (N, C) encoder_output_dynamic - shape (batch_size, context_length, num_features) or (N, T, C) + shape (batch_size, sequence_length, num_features) or (N, T, C) - future_features - shape (batch_size, prediction_length, num_features) or (N, T, C) + future_features_dynamic + shape (batch_size, sequence_length, prediction_length, num_features) or (N, T, P, C`) Returns @@ -102,10 +98,63 @@ def hybrid_forward( shape (batch_size, num_features) or (N, C) Tensor - shape (batch_size, prediction_length, num_features) or (N, T, C) + shape (batch_size, prediction_length, num_features_02) or (N, T, C) + """ + return encoder_output_static, encoder_output_dynamic - Tensor + +class FutureFeatIntegratorEnc2Dec(Seq2SeqEnc2Dec): + """ + Integrates the encoder_ouput_dynamic and future_features_dynamic into one + and passes them through as the dynamic input to the decoder. + """ + + def hybrid_forward( + self, + F, + encoder_output_static: Tensor, + encoder_output_dynamic: Tensor, + future_features_dynamic: Tensor, + ) -> Tuple[Tensor, Tensor]: + """ + Parameters + ---------- + + encoder_output_static + shape (batch_size, num_features) or (N, C) + + encoder_output_dynamic shape (batch_size, sequence_length, num_features) or (N, T, C) + future_features_dynamic + shape (batch_size, sequence_length, prediction_length, num_features) or (N, T, P, C`) + + + Returns + ------- + Tensor + shape (batch_size, num_features) or (N, C) + + Tensor + shape (batch_size, prediction_length, num_features_02) or (N, T, C) + + Tensor + shape (1,) """ - return encoder_output_static, encoder_output_dynamic, future_features + + # flatten the last two dimensions: + # => (batch_size, encoder_length, decoder_length * num_feature_dynamic) + future_features_dynamic = F.reshape( + future_features_dynamic, shape=(0, 0, -1) + ) + + # concatenate output of decoder and future_feat_dynamic covariates: + # => (batch_size, encoder_length, num_dec_input_dynamic + num_future_feat_dynamic) + total_dec_input_dynamic = F.concat( + encoder_output_dynamic, future_features_dynamic, dim=2 + ) + + return ( + encoder_output_static, + total_dec_input_dynamic, + ) diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index cb9b5b421d..b06d1a55a2 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -19,7 +19,10 @@ # First-party imports from gluonts.block.decoder import Seq2SeqDecoder -from gluonts.block.enc2dec import PassThroughEnc2Dec +from gluonts.block.enc2dec import ( + PassThroughEnc2Dec, + FutureFeatIntegratorEnc2Dec, +) from gluonts.block.encoder import Seq2SeqEncoder from gluonts.block.quantile_output import QuantileOutput from gluonts.core.component import validated, DType @@ -127,6 +130,7 @@ def __init__( embedding_dimension: List[int] = None, add_time_feature: bool = False, add_age_feature: bool = False, + enable_decoder_dynamic_feature: bool = True, trainer: Trainer = Trainer(), dtype: DType = np.float32, ) -> None: @@ -173,6 +177,7 @@ def __init__( self.use_dynamic_feat = ( use_feat_dynamic_real or add_age_feature or add_time_feature ) + self.enable_decoder_dynamic_feature = enable_decoder_dynamic_feature self.dtype = dtype def create_transformation(self) -> Transformation: @@ -283,10 +288,12 @@ def create_transformation(self) -> Transformation: FieldName.OBSERVED_VALUES, FieldName.FEAT_DYNAMIC, ], - decoder_series_fields=[ - FieldName.OBSERVED_VALUES, - FieldName.FEAT_DYNAMIC, - ], + decoder_series_fields=[FieldName.OBSERVED_VALUES] + + ( + [FieldName.FEAT_DYNAMIC] + if self.enable_decoder_dynamic_feature + else [] + ), prediction_time_decoder_exclude=[FieldName.OBSERVED_VALUES], ), ) @@ -296,7 +303,7 @@ def create_transformation(self) -> Transformation: def create_training_network(self) -> ForkingSeq2SeqNetworkBase: return ForkingSeq2SeqTrainingNetwork( encoder=self.encoder, - enc2dec=PassThroughEnc2Dec(), + enc2dec=FutureFeatIntegratorEnc2Dec(), decoder=self.decoder, quantile_output=self.quantile_output, context_length=self.context_length, diff --git a/src/gluonts/model/seq2seq/_forking_network.py b/src/gluonts/model/seq2seq/_forking_network.py index 6a500a3729..4bfc1a5821 100644 --- a/src/gluonts/model/seq2seq/_forking_network.py +++ b/src/gluonts/model/seq2seq/_forking_network.py @@ -141,24 +141,14 @@ def get_decoder_network_output( ) # arguments: encoder_output_static, encoder_output_dynamic, future_features - dec_input_static, dec_input_dynamic, _ = self.enc2dec( - enc_output_static, enc_output_dynamic, F.zeros(shape=(1,)) - ) - - # flatten the last two dimensions: - # => (batch_size, encoder_length, decoder_length * num_feature_dynamic) - future_feat_dynamic = F.reshape(future_feat_dynamic, shape=(0, 0, -1)) - - # concatenate output of decoder and future_feat_dynamic covariates: - # => (batch_size, encoder_length, num_dec_input_dynamic + num_future_feat_dynamic) - total_dec_input_dynamic = F.concat( - dec_input_dynamic, future_feat_dynamic, dim=2 + dec_input_static, dec_input_dynamic = self.enc2dec( + enc_output_static, enc_output_dynamic, future_feat_dynamic ) # arguments: dynamic_input, static_input # TODO: optimize what we pass to the decoder for the prediction case, # where we we only need to pass the encoder output for the last time step - dec_output = self.decoder(total_dec_input_dynamic, dec_input_static) + dec_output = self.decoder(dec_input_dynamic, dec_input_static) # the output shape should be: (batch_size, enc_len, dec_len, final_dims) return dec_output diff --git a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py index dee7412fc1..f8576fc1af 100644 --- a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py +++ b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py @@ -48,6 +48,7 @@ def __init__( embedding_dimension: List[int] = None, add_time_feature: bool = False, add_age_feature: bool = False, + enable_decoder_dynamic_feature: bool = True, seed: Optional[int] = None, decoder_mlp_dim_seq: Optional[List[int]] = None, channels_seq: Optional[List[int]] = None, @@ -139,6 +140,7 @@ def __init__( context_length=context_length, use_feat_dynamic_real=use_feat_dynamic_real, use_feat_static_cat=use_feat_static_cat, + enable_decoder_dynamic_feature=enable_decoder_dynamic_feature, cardinality=cardinality, embedding_dimension=embedding_dimension, add_time_feature=add_time_feature, diff --git a/src/gluonts/model/seq2seq/_seq2seq_network.py b/src/gluonts/model/seq2seq/_seq2seq_network.py index 4db4e8dd8b..566b41297b 100644 --- a/src/gluonts/model/seq2seq/_seq2seq_network.py +++ b/src/gluonts/model/seq2seq/_seq2seq_network.py @@ -89,7 +89,7 @@ def compute_decoder_outputs( encoder_output_static, encoder_output_dynamic = self.encoder( scaled_target, embedded_cat, past_feat_dynamic_real ) - decoder_input_static, _, decoder_input_dynamic = self.enc2dec( + decoder_input_static, decoder_input_dynamic = self.enc2dec( encoder_output_static, encoder_output_dynamic, future_feat_dynamic_real, diff --git a/src/gluonts/model/seq2seq/_transform.py b/src/gluonts/model/seq2seq/_transform.py index a36231b2d5..9ab15422cb 100644 --- a/src/gluonts/model/seq2seq/_transform.py +++ b/src/gluonts/model/seq2seq/_transform.py @@ -70,12 +70,19 @@ def __init__( if decoder_series_fields is not None else [self.target_field] ) + + # Fields that are not used at prediction time for the decoder self.prediction_time_decoder_exclude = ( prediction_time_decoder_exclude + [self.target_field] if prediction_time_decoder_exclude is not None else [self.target_field] ) + # Fields that are disabled for the decoder (dummy fields still created) + self.decoder_disabled_fields = list( + set(self.encoder_series_fields) - set(self.decoder_series_fields) + ) + self.is_pad_out = is_pad_out self.start_in = start_input_field @@ -144,19 +151,25 @@ def flatmap_transform( # This is were some of the forking magic happens: # For each of the encoder_len time-steps at which the decoder is applied we slice the # corresponding inputs called decoder_fields to the appropriate dec_len - if ts_field in self.decoder_series_fields: + if ( + ts_field + in self.decoder_series_fields + + self.decoder_disabled_fields + ): forking_dec_field = np.zeros( shape=(self.enc_len, self.dec_len, len(ts)) ) - skip = max(0, self.enc_len - sampling_idx) - # This section takes by far the longest time computationally: - # This scales linearly in self.enc_len and linearly in self.dec_len - for dec_field, idx in zip( - forking_dec_field[skip:], - range(start_idx + 1, start_idx + self.enc_len + 1), - ): - dec_field[:] = ts[:, idx : idx + self.dec_len].T + # in case it's not disabled we copy the actual values + if ts_field not in self.decoder_disabled_fields: + skip = max(0, self.enc_len - sampling_idx) + # This section takes by far the longest time computationally: + # This scales linearly in self.enc_len and linearly in self.dec_len + for dec_field, idx in zip( + forking_dec_field[skip:], + range(start_idx + 1, start_idx + self.enc_len + 1), + ): + dec_field[:] = ts[:, idx : idx + self.dec_len].T out[self._future(ts_field)] = np.squeeze(forking_dec_field) diff --git a/test/model/seq2seq/test_model.py b/test/model/seq2seq/test_model.py index 85b3acea9d..a6f8f054e8 100644 --- a/test/model/seq2seq/test_model.py +++ b/test/model/seq2seq/test_model.py @@ -56,9 +56,14 @@ def test_accuracy( @pytest.mark.parametrize("use_feat_dynamic_real", [True, False]) @pytest.mark.parametrize("add_time_feature", [True, False]) @pytest.mark.parametrize("add_age_feature", [True, False]) +@pytest.mark.parametrize("enable_decoder_dynamic_feature", [True, False]) @pytest.mark.parametrize("hybridize", [True, False]) def test_mqcnn_covariate_smoke_test( - use_feat_dynamic_real, add_time_feature, add_age_feature, hybridize + use_feat_dynamic_real, + add_time_feature, + add_age_feature, + enable_decoder_dynamic_feature, + hybridize, ): hps = { "seed": 42, @@ -70,6 +75,7 @@ def test_mqcnn_covariate_smoke_test( "use_feat_dynamic_real": use_feat_dynamic_real, "add_time_feature": add_time_feature, "add_age_feature": add_age_feature, + "enable_decoder_dynamic_feature": enable_decoder_dynamic_feature, "hybridize": hybridize, } From 18e45bc2d0058a6a7704612df36b89eb1514395c Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Tue, 12 May 2020 19:08:25 +0200 Subject: [PATCH 41/44] Changed default of future dynamic to disabled. --- src/gluonts/model/seq2seq/_forking_estimator.py | 2 +- src/gluonts/model/seq2seq/_mq_dnn_estimator.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index b06d1a55a2..1613186e8a 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -130,7 +130,7 @@ def __init__( embedding_dimension: List[int] = None, add_time_feature: bool = False, add_age_feature: bool = False, - enable_decoder_dynamic_feature: bool = True, + enable_decoder_dynamic_feature: bool = False, trainer: Trainer = Trainer(), dtype: DType = np.float32, ) -> None: diff --git a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py index f8576fc1af..7080f871ea 100644 --- a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py +++ b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py @@ -48,7 +48,7 @@ def __init__( embedding_dimension: List[int] = None, add_time_feature: bool = False, add_age_feature: bool = False, - enable_decoder_dynamic_feature: bool = True, + enable_decoder_dynamic_feature: bool = False, seed: Optional[int] = None, decoder_mlp_dim_seq: Optional[List[int]] = None, channels_seq: Optional[List[int]] = None, From c636cd84d904696a0c1b43b2e361ad3fd0fd6792 Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Wed, 13 May 2020 14:17:28 +0200 Subject: [PATCH 42/44] Turning user specified arguments into implications. --- .../model/seq2seq/_mq_dnn_estimator.py | 41 ++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py index 7080f871ea..cc011f8786 100644 --- a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py +++ b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py @@ -18,6 +18,7 @@ # Third-party imports import numpy as np import mxnet as mx +import logging # First-party imports from gluonts.dataset.common import Dataset, ListDataset @@ -106,6 +107,8 @@ def __init__( f"{len(self.dilation_seq)} vs. {len(self.kernel_size_seq)}" ) + print("Use dynamic real", use_feat_dynamic_real) + if seed: np.random.seed(seed) mx.random.seed(seed) @@ -181,9 +184,12 @@ def train( num_workers = ( num_workers if num_workers is not None - else int(np.ceil(np.sqrt(multiprocessing.cpu_count()))) + else min(4, int(np.ceil(np.sqrt(multiprocessing.cpu_count())))) ) + logger = logging.getLogger(__name__) + logger.info(f"gluonts[multiprocessing]: num_workers={num_workers}") + return super().train( training_data=cached_train_data, validation_data=cached_validation_data, @@ -192,6 +198,39 @@ def train( **kwargs, ) + @classmethod + def from_inputs(cls, train_iter, **params): + # auto_params usually include `use_feat_dynamic_real`, `use_feat_static_cat` and `cardinality` + auto_params = cls.derive_auto_fields(train_iter) + + # user defined arguments become implications + if ( + "use_feat_dynamic_real" in params.keys() + and params["use_feat_dynamic_real"] + and not auto_params["use_feat_dynamic_real"] + ): + logger = logging.getLogger(__name__) + logger.warning( + f"gluonts[from_inputs]: use_feat_dynamic_real set to False since it is not present in the data." + ) + params["use_feat_dynamic_real"] = False + + if ( + "use_feat_static_cat" in params.keys() + and params["use_feat_static_cat"] + and not auto_params["use_feat_static_cat"] + ): + logger = logging.getLogger(__name__) + logger.warning( + f"gluonts[from_inputs]: use_feat_static_cat set to False since it is not present in the data." + ) + params["use_feat_static_cat"] = False + params["cardinality"] = None + + # user specified 'params' will take precedence: + params = {**auto_params, **params} + return cls.from_hyperparameters(**params) + class MQRNNEstimator(ForkingSeq2SeqEstimator): """ From 442edf560df48df29178b219002e3a2738db6dde Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Mon, 18 May 2020 21:09:49 +0200 Subject: [PATCH 43/44] Adding documentation for MQCNN parameters, removing non gluonts code. --- src/gluonts/block/encoder.py | 2 +- .../model/seq2seq/_forking_estimator.py | 39 +++-- src/gluonts/model/seq2seq/_forking_network.py | 12 +- .../model/seq2seq/_mq_dnn_estimator.py | 148 +++++++++--------- test/model/seq2seq/test_model.py | 27 ++++ 5 files changed, 134 insertions(+), 94 deletions(-) diff --git a/src/gluonts/block/encoder.py b/src/gluonts/block/encoder.py index f2a4550d85..ceff6b740f 100644 --- a/src/gluonts/block/encoder.py +++ b/src/gluonts/block/encoder.py @@ -238,7 +238,7 @@ def hybrid_forward( class RNNEncoder(Seq2SeqEncoder): """ - Defines RNN encoder that uses covariates and target as input to the RNN if desired. + Defines RNN encoder that uses covariates and target as input to the RNN if desired. Parameters ---------- diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index 1613186e8a..936782f37e 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -19,10 +19,7 @@ # First-party imports from gluonts.block.decoder import Seq2SeqDecoder -from gluonts.block.enc2dec import ( - PassThroughEnc2Dec, - FutureFeatIntegratorEnc2Dec, -) +from gluonts.block.enc2dec import FutureFeatIntegratorEnc2Dec from gluonts.block.encoder import Seq2SeqEncoder from gluonts.block.quantile_output import QuantileOutput from gluonts.core.component import validated, DType @@ -89,11 +86,11 @@ class ForkingSeq2SeqEstimator(GluonEstimator): quantile_output quantile output freq - frequency of the time series + frequency of the time series. prediction_length - length of the decoding sequence + length of the decoding sequence. context_length - length of the encoding sequence (prediction_length is used if None) + length of the encoding sequence (default: 4 * prediction_length) use_feat_dynamic_real Whether to use the ``feat_dynamic_real`` field from the data (default: False) use_feat_static_cat: @@ -105,12 +102,18 @@ class ForkingSeq2SeqEstimator(GluonEstimator): Dimension of the embeddings for categorical features (default: [min(50, (cat+1)//2) for cat in cardinality]) add_time_feature - Adds a set of time features. + Adds a set of time features. (default: False) add_age_feature - Adds an age feature. + Adds an age feature. (default: False) The age feature starts with a small value at the start of the time series and grows over time. + enable_decoder_dynamic_feature + Whether the decoder should also be provided with the dynamic features (``age``, ``time`` + and ``feat_dynamic_real`` if enabled respectively). (default: True) + It makes sense to disable this, if you dont have ``feat_dynamic_real`` for the prediction range. trainer trainer (default: Trainer()) + scaling + Whether to automatically scale the target values (default: False) dtype (default: np.float32) """ @@ -128,10 +131,11 @@ def __init__( use_feat_static_cat: bool = False, cardinality: List[int] = None, embedding_dimension: List[int] = None, - add_time_feature: bool = False, - add_age_feature: bool = False, - enable_decoder_dynamic_feature: bool = False, + add_time_feature: bool = True, + add_age_feature: bool = True, + enable_decoder_dynamic_feature: bool = True, trainer: Trainer = Trainer(), + scaling: bool = False, dtype: DType = np.float32, ) -> None: super().__init__(trainer=trainer) @@ -160,7 +164,7 @@ def __init__( self.context_length = ( context_length if context_length is not None - else self.prediction_length + else 4 * self.prediction_length ) self.use_feat_dynamic_real = use_feat_dynamic_real self.use_feat_static_cat = use_feat_static_cat @@ -178,6 +182,7 @@ def __init__( use_feat_dynamic_real or add_age_feature or add_time_feature ) self.enable_decoder_dynamic_feature = enable_decoder_dynamic_feature + self.scaling = scaling self.dtype = dtype def create_transformation(self) -> Transformation: @@ -235,7 +240,11 @@ def create_transformation(self) -> Transformation: if self.use_feat_dynamic_real: # Backwards compatibility: chain.append( - RenameFields({"dynamic_feat": FieldName.FEAT_DYNAMIC_REAL}) + RenameFields( + { + FieldName.FEAT_DYNAMIC_REAL_LEGACY: FieldName.FEAT_DYNAMIC_REAL + } + ) ) dynamic_feat_fields.append(FieldName.FEAT_DYNAMIC_REAL) @@ -309,6 +318,7 @@ def create_training_network(self) -> ForkingSeq2SeqNetworkBase: context_length=self.context_length, cardinality=self.cardinality, embedding_dimension=self.embedding_dimension, + scaling=self.scaling, dtype=self.dtype, ) @@ -331,6 +341,7 @@ def create_predictor( context_length=self.context_length, cardinality=self.cardinality, embedding_dimension=self.embedding_dimension, + scaling=self.scaling, dtype=self.dtype, ) diff --git a/src/gluonts/model/seq2seq/_forking_network.py b/src/gluonts/model/seq2seq/_forking_network.py index 4bfc1a5821..735431b648 100644 --- a/src/gluonts/model/seq2seq/_forking_network.py +++ b/src/gluonts/model/seq2seq/_forking_network.py @@ -52,6 +52,8 @@ class ForkingSeq2SeqNetworkBase(gluon.HybridBlock): number of values of each categorical feature. embedding_dimension: List[int], dimension of the embeddings for categorical features + scaling + Whether to automatically scale the target values (default: True) dtype (default: np.float32) kwargs: dict @@ -68,6 +70,7 @@ def __init__( context_length: int, cardinality: List[int], embedding_dimension: List[int], + scaling: bool = True, dtype: DType = np.float32, **kwargs, ) -> None: @@ -80,11 +83,10 @@ def __init__( self.context_length = context_length self.cardinality = cardinality self.embedding_dimension = embedding_dimension + self.scaling = scaling self.dtype = dtype - # TODO: implement scaling - scaling = False - if scaling: + if self.scaling: self.scaler = MeanScaler(keepdims=True) else: self.scaler = NOPScaler(keepdims=True) @@ -111,7 +113,7 @@ def get_decoder_network_output( # scale is computed on the context length last units of the past target # scale shape is (batch_size, 1, *target_shape) - _, scale = self.scaler( + scaled_past_target, scale = self.scaler( past_target.slice_axis( axis=1, begin=-self.context_length, end=None ), @@ -137,7 +139,7 @@ def get_decoder_network_output( # arguments: target, static_features, dynamic_features enc_output_static, enc_output_dynamic = self.encoder( - past_target, feat_static_real, past_feat_dynamic_extended + scaled_past_target, feat_static_real, past_feat_dynamic_extended ) # arguments: encoder_output_static, encoder_output_dynamic, future_features diff --git a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py index cc011f8786..5f1a922485 100644 --- a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py +++ b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py @@ -35,6 +35,65 @@ class MQCNNEstimator(ForkingSeq2SeqEstimator): """ An :class:`MQDNNEstimator` with a Convolutional Neural Network (CNN) as an encoder and a multi-quantile MLP as a decoder. Implements the MQ-CNN Forecaster, proposed in [WTN+17]_. + + Parameters + ---------- + freq + Time granularity of the data. + prediction_length + Length of the prediction, also known as 'horizon'. + context_length + Number of time units that condition the predictions, also known as 'lookback period'. + (default: 4 * prediction_length) + use_feat_dynamic_real + Whether to use the ``feat_dynamic_real`` field from the data. (default: False) + Automatically inferred when creating the MQCNNEstimator with the `from_inputs` class method. + use_feat_static_cat: + Whether to use the ``feat_static_cat`` field from the data. (default: False) + Automatically inferred when creating the MQCNNEstimator with the `from_inputs` class method. + cardinality: + Number of values of each categorical feature. + This must be set if ``use_feat_static_cat == True`` (default: None) + Automatically inferred when creating the MQCNNEstimator with the `from_inputs` class method. + embedding_dimension: + Dimension of the embeddings for categorical features. (default: [min(50, (cat+1)//2) for cat in cardinality]) + add_time_feature + Adds a set of time features. (default: False) + add_age_feature + Adds an age feature. (default: False) + The age feature starts with a small value at the start of the time series and grows over time. + enable_decoder_dynamic_feature + Whether the decoder should also be provided with the dynamic features (``age``, ``time`` + and ``feat_dynamic_real`` if enabled respectively). (default: True) + It makes sense to disable this, if you dont have ``feat_dynamic_real`` for the prediction range. + seed + Will set the specified int seed for numpy anc MXNet if specified. (default: None) + decoder_mlp_dim_seq + The dimensionalities of the Multi Layer Perceptron layers of the decoder. + (default: [30]) + channels_seq + The number of channels (i.e. filters or convolutions) for each layer of the HierarchicalCausalConv1DEncoder. + More channels usually correspond to better performance and larger network size. + (default: [30, 30, 30]) + dilation_seq + The dilation of the convolutions in each layer of the HierarchicalCausalConv1DEncoder. + Greater numbers correspond to a greater receptive field of the network, which is usually + better with longer context_length. (Same length as channels_seq) (default: [1, 3, 5]) + kernel_size_seq + The kernel sizes (i.e. window size) of the convolutions in each layer of the HierarchicalCausalConv1DEncoder. + (Same length as channels_seq) (default: [7, 3, 3]) + use_residual + Whether the hierarchical encoder should additionally pass the unaltered + past target to the decoder. (default: True) + quantiles + The list of quantiles that will be optimized for, and predicted by, the model. + Optimizing for more quantiles than are of direct interest to you can result + in improved performance due to a regularizing effect. + (default: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]) + trainer + The GluonTS trainer to use for training. (default: Trainer()) + scaling + Whether to automatically scale the target values. (default: False) """ @validated() @@ -58,6 +117,7 @@ def __init__( use_residual: bool = True, quantiles: Optional[List[float]] = None, trainer: Trainer = Trainer(), + scaling: bool = False, ) -> None: assert ( @@ -81,16 +141,16 @@ def __init__( ), "Elements of `quantiles` should be >= 0 and <= 1" self.decoder_mlp_dim_seq = ( - decoder_mlp_dim_seq if decoder_mlp_dim_seq is not None else [20] + decoder_mlp_dim_seq if decoder_mlp_dim_seq is not None else [30] ) self.channels_seq = ( channels_seq if channels_seq is not None else [30, 30, 30] ) self.dilation_seq = ( - dilation_seq if dilation_seq is not None else [1, 3, 9] + dilation_seq if dilation_seq is not None else [1, 3, 5] ) self.kernel_size_seq = ( - kernel_size_seq if kernel_size_seq is not None else [3, 3, 3] + kernel_size_seq if kernel_size_seq is not None else [7, 3, 3] ) self.quantiles = ( quantiles @@ -107,8 +167,6 @@ def __init__( f"{len(self.dilation_seq)} vs. {len(self.kernel_size_seq)}" ) - print("Use dynamic real", use_feat_dynamic_real) - if seed: np.random.seed(seed) mx.random.seed(seed) @@ -149,87 +207,25 @@ def __init__( add_time_feature=add_time_feature, add_age_feature=add_age_feature, trainer=trainer, + scaling=scaling, ) @classmethod def derive_auto_fields(cls, train_iter): stats = calculate_dataset_statistics(train_iter) - return { + auto_fields = { "use_feat_dynamic_real": stats.num_feat_dynamic_real > 0, "use_feat_static_cat": bool(stats.feat_static_cat), "cardinality": [len(cats) for cats in stats.feat_static_cat], } - # FIXME: for now we always want the dataset to be cached and utilize multiprocessing. - # TODO it properly: Enable caching of the dataset in the `_load_datasets` function of the shell, - # and pass `num_workers` from train_env in the `run_train_and_test` method to `run_train`, - # which in turn has to pass it to train(...) - def train( - self, - training_data: Dataset, - validation_data: Optional[Dataset] = None, - num_workers: Optional[int] = None, - num_prefetch: Optional[int] = None, - **kwargs, - ): - cached_train_data = ListDataset( - data_iter=list(training_data), freq=self.freq - ) - cached_validation_data = ( - None - if validation_data is None - else ListDataset(data_iter=list(validation_data), freq=self.freq) - ) - num_workers = ( - num_workers - if num_workers is not None - else min(4, int(np.ceil(np.sqrt(multiprocessing.cpu_count())))) - ) - logger = logging.getLogger(__name__) - logger.info(f"gluonts[multiprocessing]: num_workers={num_workers}") - - return super().train( - training_data=cached_train_data, - validation_data=cached_validation_data, - num_workers=num_workers, - num_prefetch=num_prefetch, - **kwargs, + logger.info( + f"gluonts[from_inputs]: use_feat_dynamic_real set to '{auto_fields['use_feat_dynamic_real']}', and use use_feat_static_cat to '{auto_fields['use_feat_static_cat']}' with cardinality of '{auto_fields['cardinality']}'" ) - @classmethod - def from_inputs(cls, train_iter, **params): - # auto_params usually include `use_feat_dynamic_real`, `use_feat_static_cat` and `cardinality` - auto_params = cls.derive_auto_fields(train_iter) - - # user defined arguments become implications - if ( - "use_feat_dynamic_real" in params.keys() - and params["use_feat_dynamic_real"] - and not auto_params["use_feat_dynamic_real"] - ): - logger = logging.getLogger(__name__) - logger.warning( - f"gluonts[from_inputs]: use_feat_dynamic_real set to False since it is not present in the data." - ) - params["use_feat_dynamic_real"] = False - - if ( - "use_feat_static_cat" in params.keys() - and params["use_feat_static_cat"] - and not auto_params["use_feat_static_cat"] - ): - logger = logging.getLogger(__name__) - logger.warning( - f"gluonts[from_inputs]: use_feat_static_cat set to False since it is not present in the data." - ) - params["use_feat_static_cat"] = False - params["cardinality"] = None - - # user specified 'params' will take precedence: - params = {**auto_params, **params} - return cls.from_hyperparameters(**params) + return auto_fields class MQRNNEstimator(ForkingSeq2SeqEstimator): @@ -247,6 +243,7 @@ def __init__( decoder_mlp_dim_seq: List[int] = None, trainer: Trainer = Trainer(), quantiles: List[float] = None, + scaling: bool = True, ) -> None: assert ( @@ -260,10 +257,12 @@ def __init__( ), "Elements of `quantiles` should be >= 0 and <= 1" self.decoder_mlp_dim_seq = ( - decoder_mlp_dim_seq if decoder_mlp_dim_seq is not None else [20] + decoder_mlp_dim_seq if decoder_mlp_dim_seq is not None else [30] ) self.quantiles = ( - quantiles if quantiles is not None else [0.1, 0.5, 0.9] + quantiles + if quantiles is not None + else [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] ) # `use_static_feat` and `use_dynamic_feat` always True because network @@ -295,4 +294,5 @@ def __init__( prediction_length=prediction_length, context_length=context_length, trainer=trainer, + scaling=scaling, ) diff --git a/test/model/seq2seq/test_model.py b/test/model/seq2seq/test_model.py index a6f8f054e8..f9e6beee14 100644 --- a/test/model/seq2seq/test_model.py +++ b/test/model/seq2seq/test_model.py @@ -93,6 +93,33 @@ def test_mqcnn_covariate_smoke_test( assert len(forecasts) == len(dataset_test) +# Test scaling and from inputs +@pytest.mark.parametrize("scaling", [True, False]) +def test_mqcnn_scaling_smoke_test(scaling): + hps = { + "seed": 42, + "freq": "D", + "prediction_length": 3, + "quantiles": [0.5, 0.1], + "epochs": 3, + "num_batches_per_epoch": 3, + "scaling": scaling, + } + + dataset_train, dataset_test = make_dummy_datasets_with_features( + cardinality=[3, 10], + num_feat_dynamic_real=2, + freq=hps["freq"], + prediction_length=hps["prediction_length"], + ) + + estimator = MQCNNEstimator.from_inputs(dataset_train, **hps) + + predictor = estimator.train(dataset_train, num_workers=0) + forecasts = list(predictor.predict(dataset_test)) + assert len(forecasts) == len(dataset_test) + + def test_repr(Estimator, repr_test, hyperparameters): repr_test(Estimator, hyperparameters) From d5699faed3fcba9f6485fa3010e06b27e9d60e35 Mon Sep 17 00:00:00 2001 From: Aaron Spieler Date: Mon, 18 May 2020 21:24:49 +0200 Subject: [PATCH 44/44] Removing backwards compatibility. --- src/gluonts/dataset/field_names.py | 1 - src/gluonts/dataset/stat.py | 2 - .../model/seq2seq/_forking_estimator.py | 8 ---- .../model/seq2seq/_mq_dnn_estimator.py | 4 +- test/model/seq2seq/test_model.py | 37 ------------------- 5 files changed, 3 insertions(+), 49 deletions(-) diff --git a/src/gluonts/dataset/field_names.py b/src/gluonts/dataset/field_names.py index d686c4f26f..0e0a6ff7f8 100644 --- a/src/gluonts/dataset/field_names.py +++ b/src/gluonts/dataset/field_names.py @@ -27,7 +27,6 @@ class FieldName: FEAT_STATIC_REAL = "feat_static_real" FEAT_DYNAMIC_CAT = "feat_dynamic_cat" FEAT_DYNAMIC_REAL = "feat_dynamic_real" - FEAT_DYNAMIC_REAL_LEGACY = "dynamic_feat" FEAT_DYNAMIC = "feat_dynamic" diff --git a/src/gluonts/dataset/stat.py b/src/gluonts/dataset/stat.py index bc94d2604d..cfe8914c98 100644 --- a/src/gluonts/dataset/stat.py +++ b/src/gluonts/dataset/stat.py @@ -300,8 +300,6 @@ def calculate_dataset_statistics(ts_dataset: Any) -> DatasetStatistics: feat_dynamic_real = None if FieldName.FEAT_DYNAMIC_REAL in ts: feat_dynamic_real = ts[FieldName.FEAT_DYNAMIC_REAL] - elif FieldName.FEAT_DYNAMIC_REAL_LEGACY in ts: - feat_dynamic_real = ts[FieldName.FEAT_DYNAMIC_REAL_LEGACY] if feat_dynamic_real is None: # feat_dynamic_real not found, check it was the first ts we encounter or diff --git a/src/gluonts/model/seq2seq/_forking_estimator.py b/src/gluonts/model/seq2seq/_forking_estimator.py index 936782f37e..b0dda76e99 100644 --- a/src/gluonts/model/seq2seq/_forking_estimator.py +++ b/src/gluonts/model/seq2seq/_forking_estimator.py @@ -238,14 +238,6 @@ def create_transformation(self) -> Transformation: dynamic_feat_fields.append(FieldName.FEAT_AGE) if self.use_feat_dynamic_real: - # Backwards compatibility: - chain.append( - RenameFields( - { - FieldName.FEAT_DYNAMIC_REAL_LEGACY: FieldName.FEAT_DYNAMIC_REAL - } - ) - ) dynamic_feat_fields.append(FieldName.FEAT_DYNAMIC_REAL) # we need to make sure that there is always some dynamic input diff --git a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py index 5f1a922485..bd31ad9bc6 100644 --- a/src/gluonts/model/seq2seq/_mq_dnn_estimator.py +++ b/src/gluonts/model/seq2seq/_mq_dnn_estimator.py @@ -222,7 +222,9 @@ def derive_auto_fields(cls, train_iter): logger = logging.getLogger(__name__) logger.info( - f"gluonts[from_inputs]: use_feat_dynamic_real set to '{auto_fields['use_feat_dynamic_real']}', and use use_feat_static_cat to '{auto_fields['use_feat_static_cat']}' with cardinality of '{auto_fields['cardinality']}'" + f"gluonts[from_inputs]: use_feat_dynamic_real set to " + f"'{auto_fields['use_feat_dynamic_real']}', and use use_feat_static_cat to " + f"'{auto_fields['use_feat_static_cat']}' with cardinality of '{auto_fields['cardinality']}'" ) return auto_fields diff --git a/test/model/seq2seq/test_model.py b/test/model/seq2seq/test_model.py index f9e6beee14..752abf1436 100644 --- a/test/model/seq2seq/test_model.py +++ b/test/model/seq2seq/test_model.py @@ -126,40 +126,3 @@ def test_repr(Estimator, repr_test, hyperparameters): def test_serialize(Estimator, serialize_test, hyperparameters): serialize_test(Estimator, hyperparameters) - - -def test_backwards_compatibility(): - hps = { - "freq": "D", - "prediction_length": 3, - "quantiles": [0.5, 0.1], - "epochs": 3, - "num_batches_per_epoch": 3, - "use_feat_dynamic_real": True, - "num_workers": 0, - } - - dataset_train, dataset_test = make_dummy_datasets_with_features( - cardinality=[3, 10], - num_feat_dynamic_real=2, - freq=hps["freq"], - prediction_length=hps["prediction_length"], - ) - - for i in range(len(dataset_train)): - dataset_train.list_data[i]["dynamic_feat"] = dataset_train.list_data[ - i - ]["feat_dynamic_real"] - del dataset_train.list_data[i]["feat_dynamic_real"] - - for i in range(len(dataset_test)): - dataset_test.list_data[i]["dynamic_feat"] = dataset_test.list_data[i][ - "feat_dynamic_real" - ] - del dataset_test.list_data[i]["feat_dynamic_real"] - - estimator = MQCNNEstimator.from_inputs(dataset_train, **hps) - - predictor = estimator.train(dataset_train, num_workers=0) - forecasts = list(predictor.predict(dataset_test)) - assert len(forecasts) == len(dataset_test)