diff --git a/docs/getting_started/models.md b/docs/getting_started/models.md index 7516a5217b..fb7db0adc9 100644 --- a/docs/getting_started/models.md +++ b/docs/getting_started/models.md @@ -79,4 +79,4 @@ NPTS | Local | Un [Prophet]: https://github.com/awslabs/gluonts/blob/dev/src/gluonts/ext/prophet/_predictor.py [NaiveSeasonal]: https://github.com/awslabs/gluonts/blob/dev/src/gluonts/model/seasonal_naive/_predictor.py [Naive2]: https://github.com/awslabs/gluonts/blob/dev/src/gluonts/ext/naive_2/_predictor.py -[NPTS]: https://github.com/awslabs/gluonts/blob/dev/src/gluonts/ext/npts/_predictor.py +[NPTS]: https://github.com/awslabs/gluonts/blob/dev/src/gluonts/model/npts/_predictor.py diff --git a/docs/tutorials/data_manipulation/pandasdataframes.md.template b/docs/tutorials/data_manipulation/pandasdataframes.md.template index fc52e76a70..05ecfb1aed 100644 --- a/docs/tutorials/data_manipulation/pandasdataframes.md.template +++ b/docs/tutorials/data_manipulation/pandasdataframes.md.template @@ -123,8 +123,8 @@ from gluonts.dataset.pandas import PandasDataset max_end = max(df.groupby("item_id").apply(lambda _df: _df.index[-1])) dfs_dict = {} -for item_id, gdf in df.groupby("item_id"): - new_index = pd.date_range(gdf.index[0], end=max_end, freq="1D") +for item_id, gdf in df_missing_val.groupby("item_id"): + new_index = pd.date_range(gdf.index[0], end=max_end, freq="1H") dfs_dict[item_id] = gdf.reindex(new_index).drop("item_id", axis=1) ds = PandasDataset(dfs_dict, target="target") diff --git a/src/gluonts/mx/prelude.py b/src/gluonts/mx/prelude.py index 81b9b7ed79..e1820dba56 100644 --- a/src/gluonts/mx/prelude.py +++ b/src/gluonts/mx/prelude.py @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. -# flake8: noqa: F401 +# flake8: noqa: F401, F403 from .component import * from .serde import * diff --git a/src/gluonts/nursery/daf/requirements.txt b/src/gluonts/nursery/daf/requirements.txt index e3c5e9424c..9f3fa92724 100644 --- a/src/gluonts/nursery/daf/requirements.txt +++ b/src/gluonts/nursery/daf/requirements.txt @@ -3,5 +3,5 @@ tensorboard==2.3.0 numpy==1.22.0 pandas==1.1.5 scikit-learn==0.23.2 -scipy==1.5.2 +scipy==1.10.0 matplotlib==3.3.2 \ No newline at end of file diff --git a/src/gluonts/nursery/temporal_hierarchical_forecasting/eval/evaluation.py b/src/gluonts/nursery/temporal_hierarchical_forecasting/eval/evaluation.py index 5369e1d2b5..f8812196d1 100644 --- a/src/gluonts/nursery/temporal_hierarchical_forecasting/eval/evaluation.py +++ b/src/gluonts/nursery/temporal_hierarchical_forecasting/eval/evaluation.py @@ -11,6 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. +from itertools import tee from typing import List import numpy as np @@ -87,27 +88,48 @@ def evaluate_forecasts_at_all_levels( evaluator, metrics: List[str] = ["mean_wQuantileLoss"], ): + # Order of forecasts obtained e.g., in case of the hierarchy: [6M, 2M, 1M] + # (ts_1_6M, ts_1_2M, ts_1_1M, ts_2_6M, ts_2_2M, ts_2_1M, ...) forecast_at_all_levels_unpacked_it = unpack_forecasts( forecast_at_all_levels_it=forecast_at_all_levels_it, temporal_hierarchy=temporal_hierarchy, target_temporal_hierarchy=temporal_hierarchy, ) - # First get item metrics for all time series for all frequencies; these are per time series metrics. - # Then we aggregate the metrics by slicing according to the hierarchy. - # `metrics_per_ts` is a dataframe where columns contain all item metrics; - # number of rows = num_levels x num_ts, with the row ordering: - # (ts_1_6M, ts_1_2M, ts_1_1M, ts_2_6M, ts_2_2M, ts_2_1M, ...) - _, metrics_per_ts = evaluator( - ts_iterator=test_ts_at_all_levels_it, - fcst_iterator=forecast_at_all_levels_unpacked_it, + num_levels = len(temporal_hierarchy.agg_multiples) + + # In one go, we can get item metrics for time series that have the same frequency. + # So we create `num_levels` copies of the iterator and obtain the item metrics + # for each level independently. + forecast_at_all_levels_unpacked_it_set = tee( + forecast_at_all_levels_unpacked_it, + num_levels, ) + test_ts_at_all_levels_it_set = tee(test_ts_at_all_levels_it, num_levels) + + # Since forecasts for all granularities are in the same iterable, + # we need a way to iterate through forecasts skipping some elements. + def skip_iter(it, num_skips: int, offset: int): + for _ in range(offset): + next(it) + for item in it: + for _ in range(num_skips): + next(it, None) # None: in case the `it` is already exhausted. + yield item - num_levels = len(temporal_hierarchy.agg_multiples) metrics_to_return = {} for level in range(num_levels): - agg_metrics_level, _ = evaluator.get_aggregate_metrics( - metrics_per_ts.iloc[level:None:num_levels] + agg_metrics_level, metrics_per_ts_level = evaluator( + ts_iterator=skip_iter( + test_ts_at_all_levels_it_set[level], + num_skips=num_levels - 1, + offset=level, + ), + fcst_iterator=skip_iter( + forecast_at_all_levels_unpacked_it_set[level], + num_skips=num_levels - 1, + offset=level, + ), ) for metric_name in metrics: diff --git a/src/gluonts/nursery/temporal_hierarchical_forecasting/model/cop_deepar/_estimator.py b/src/gluonts/nursery/temporal_hierarchical_forecasting/model/cop_deepar/_estimator.py index 1369107ff5..03bb055f00 100644 --- a/src/gluonts/nursery/temporal_hierarchical_forecasting/model/cop_deepar/_estimator.py +++ b/src/gluonts/nursery/temporal_hierarchical_forecasting/model/cop_deepar/_estimator.py @@ -13,7 +13,7 @@ from copy import deepcopy from functools import partial -from typing import List, Tuple +from typing import List, Tuple, Optional from mxnet.gluon import HybridBlock import numpy as np @@ -27,7 +27,6 @@ TrainDataLoader, ValidationDataLoader, ) -from gluonts.env import env from gluonts.model.predictor import Predictor from gluonts.mx.distribution import GaussianOutput from gluonts.mx.model.estimator import GluonEstimator @@ -42,8 +41,9 @@ SelectFields, SimpleTransformation, Transformation, + MissingValueImputation, + RollingMeanValueImputation, ) - from gluonts.nursery.temporal_hierarchical_forecasting.model.cop_deepar.gluonts_fixes import ( batchify_with_dict, DeepAREstimatorForCOP, @@ -172,6 +172,9 @@ def __init__( return_forecasts_at_all_levels: bool = False, naive_reconciliation: bool = False, dtype: Type = np.float32, + impute_missing_values: bool = False, + imputation_method: Optional[MissingValueImputation] = None, + num_imputation_samples: int = 1, ) -> None: super().__init__(trainer=trainer, dtype=dtype) @@ -203,11 +206,20 @@ def __init__( assert self.base_estimator_type == DeepAREstimatorForCOP - if "distr_output" not in base_estimator_hps: - base_estimator_hps["distr_output"] = GaussianOutput() + base_estimator_hps.setdefault("distr_output", GaussianOutput()) print(f"Distribution output: {base_estimator_hps['distr_output']}") + base_estimator_hps.setdefault( + "impute_missing_values", impute_missing_values + ) + + base_estimator_hps.setdefault("imputation_method", imputation_method) + + base_estimator_hps.setdefault( + "num_imputation_samples", num_imputation_samples + ) + self.estimators = [] for agg_multiple, freq_str in zip( self.temporal_hierarchy.agg_multiples, @@ -224,6 +236,14 @@ def __init__( num_nodes = self.temporal_hierarchy.num_leaves // agg_multiple lags_seq = [lag for lag in lags_seq if lag >= num_nodes] + # adapt window_length if RollingMeanValueImputation is used + if isinstance(imputation_method, RollingMeanValueImputation): + base_estimator_hps_agg[ + "imputation_method" + ] = RollingMeanValueImputation( + window_size=imputation_method.window_size // agg_multiple + ) + # Hack to enforce correct serialization of lags_seq and history length # (only works when set in constructor). if agg_multiple != 1: @@ -352,7 +372,6 @@ def create_predictor( + self.agg_feature_adder, prediction_net=prediction_network, batch_size=self.batch_size, - freq=self.freq, prediction_length=self.prediction_length, ctx=self.trainer.ctx, dtype=self.dtype, diff --git a/src/gluonts/nursery/temporal_hierarchical_forecasting/model/cop_deepar/gluonts_fixes.py b/src/gluonts/nursery/temporal_hierarchical_forecasting/model/cop_deepar/gluonts_fixes.py index baa4939e15..a8ac13bc14 100644 --- a/src/gluonts/nursery/temporal_hierarchical_forecasting/model/cop_deepar/gluonts_fixes.py +++ b/src/gluonts/nursery/temporal_hierarchical_forecasting/model/cop_deepar/gluonts_fixes.py @@ -77,7 +77,6 @@ def __init__( prediction_net: mx.gluon.HybridBlock, batch_size: int, prediction_length: int, - freq: str, ctx: mx.Context, input_transform: Transformation, lead_time: int = 0, @@ -151,7 +150,6 @@ def deserialize( input_transform=transform, prediction_net=prediction_net, batch_size=parameters["batch_size"], - freq=parameters["freq"], prediction_length=parameters["prediction_length"], ctx=parameters["ctx"], dtype=parameters["dtype"], diff --git a/src/gluonts/nursery/temporal_hierarchical_forecasting/train_and_eval_cop_deepar.py b/src/gluonts/nursery/temporal_hierarchical_forecasting/train_and_eval_cop_deepar.py index 1702c14503..aac5607ed4 100644 --- a/src/gluonts/nursery/temporal_hierarchical_forecasting/train_and_eval_cop_deepar.py +++ b/src/gluonts/nursery/temporal_hierarchical_forecasting/train_and_eval_cop_deepar.py @@ -22,6 +22,9 @@ ) +EVALUATE_ALL_LEVELS = True + + def main(): dataset = get_dataset("exchange_rate", regenerate=False) diff --git a/src/gluonts/torch/model/estimator.py b/src/gluonts/torch/model/estimator.py index f0c71771e1..003d88f7fa 100644 --- a/src/gluonts/torch/model/estimator.py +++ b/src/gluonts/torch/model/estimator.py @@ -213,10 +213,15 @@ def train_model( ckpt_path=ckpt_path, ) - logger.info(f"Loading best model from {checkpoint.best_model_path}") - best_model = training_network.load_from_checkpoint( - checkpoint.best_model_path - ) + if checkpoint.best_model_path != "": + logger.info( + f"Loading best model from {checkpoint.best_model_path}" + ) + best_model = training_network.load_from_checkpoint( + checkpoint.best_model_path + ) + else: + best_model = training_network return TrainOutput( transformation=transformation, diff --git a/src/gluonts/torch/model/predictor.py b/src/gluonts/torch/model/predictor.py index aa3ba592c1..fdb5ac3e7d 100644 --- a/src/gluonts/torch/model/predictor.py +++ b/src/gluonts/torch/model/predictor.py @@ -73,12 +73,12 @@ def network(self) -> nn.Module: def predict( self, dataset: Dataset, num_samples: Optional[int] = None ) -> Iterator[Forecast]: - self.input_transform += SelectFields( - self.input_names + self.required_fields, allow_missing=True - ) inference_data_loader = InferenceDataLoader( dataset, - transform=self.input_transform, + transform=self.input_transform + + SelectFields( + self.input_names + self.required_fields, allow_missing=True + ), batch_size=self.batch_size, stack_fn=lambda data: batchify(data, self.device), ) diff --git a/src/gluonts/torch/prelude.py b/src/gluonts/torch/prelude.py index 8a981851d4..a66ac9fd05 100644 --- a/src/gluonts/torch/prelude.py +++ b/src/gluonts/torch/prelude.py @@ -11,7 +11,7 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. -# flake8: noqa: F401 +# flake8: noqa: F401, F403 from .component import * from .model.forecast_generator import *