Merge branch 'dev' into add_fit_to_gaussian

awslabs · Aug 9, 2023 · 8c0be99 · 8c0be99
2 parents e841367 + a944581
commit 8c0be99
Show file tree

Hide file tree

Showing 11 changed files with 80 additions and 33 deletions.
diff --git a/docs/getting_started/models.md b/docs/getting_started/models.md
@@ -79,4 +79,4 @@ NPTS | Local | Un
 [Prophet]: https://github.com/awslabs/gluonts/blob/dev/src/gluonts/ext/prophet/_predictor.py
 [NaiveSeasonal]: https://github.com/awslabs/gluonts/blob/dev/src/gluonts/model/seasonal_naive/_predictor.py
 [Naive2]: https://github.com/awslabs/gluonts/blob/dev/src/gluonts/ext/naive_2/_predictor.py
-[NPTS]: https://github.com/awslabs/gluonts/blob/dev/src/gluonts/ext/npts/_predictor.py
+[NPTS]: https://github.com/awslabs/gluonts/blob/dev/src/gluonts/model/npts/_predictor.py
diff --git a/docs/tutorials/data_manipulation/pandasdataframes.md.template b/docs/tutorials/data_manipulation/pandasdataframes.md.template
@@ -123,8 +123,8 @@ from gluonts.dataset.pandas import PandasDataset
 
 max_end = max(df.groupby("item_id").apply(lambda _df: _df.index[-1]))
 dfs_dict = {}
-for item_id, gdf in df.groupby("item_id"):
- new_index = pd.date_range(gdf.index[0], end=max_end, freq="1D")
+for item_id, gdf in df_missing_val.groupby("item_id"):
+ new_index = pd.date_range(gdf.index[0], end=max_end, freq="1H")
  dfs_dict[item_id] = gdf.reindex(new_index).drop("item_id", axis=1)
 
 ds = PandasDataset(dfs_dict, target="target")

diff --git a/src/gluonts/mx/prelude.py b/src/gluonts/mx/prelude.py
@@ -11,7 +11,7 @@
 # express or implied. See the License for the specific language governing
 # permissions and limitations under the License.
 
-# flake8: noqa: F401
+# flake8: noqa: F401, F403
 
 from .component import *
 from .serde import *

diff --git a/src/gluonts/nursery/daf/requirements.txt b/src/gluonts/nursery/daf/requirements.txt
@@ -3,5 +3,5 @@ tensorboard==2.3.0
 numpy==1.22.0
 pandas==1.1.5
 scikit-learn==0.23.2
-scipy==1.5.2
+scipy==1.10.0
 matplotlib==3.3.2
diff --git a/src/gluonts/nursery/temporal_hierarchical_forecasting/eval/evaluation.py b/src/gluonts/nursery/temporal_hierarchical_forecasting/eval/evaluation.py
@@ -11,6 +11,7 @@
 # express or implied. See the License for the specific language governing
 # permissions and limitations under the License.
 
+from itertools import tee
 from typing import List
 
 import numpy as np
@@ -87,27 +88,48 @@ def evaluate_forecasts_at_all_levels(
  evaluator,
  metrics: List[str] = ["mean_wQuantileLoss"],
 ):
+ # Order of forecasts obtained e.g., in case of the hierarchy: [6M, 2M, 1M]
+ # (ts_1_6M, ts_1_2M, ts_1_1M, ts_2_6M, ts_2_2M, ts_2_1M, ...)
  forecast_at_all_levels_unpacked_it = unpack_forecasts(
  forecast_at_all_levels_it=forecast_at_all_levels_it,
  temporal_hierarchy=temporal_hierarchy,
  target_temporal_hierarchy=temporal_hierarchy,
  )
 
- # First get item metrics for all time series for all frequencies; these are per time series metrics.
- # Then we aggregate the metrics by slicing according to the hierarchy.
- # `metrics_per_ts` is a dataframe where columns contain all item metrics;
- # number of rows = num_levels x num_ts, with the row ordering:
- #  (ts_1_6M, ts_1_2M, ts_1_1M, ts_2_6M, ts_2_2M, ts_2_1M, ...)
- _, metrics_per_ts = evaluator(
- ts_iterator=test_ts_at_all_levels_it,
- fcst_iterator=forecast_at_all_levels_unpacked_it,
+ num_levels = len(temporal_hierarchy.agg_multiples)
+
+ # In one go, we can get item metrics for time series that have the same frequency.
+ # So we create `num_levels` copies of the iterator and obtain the item metrics
+ # for each level independently.
+ forecast_at_all_levels_unpacked_it_set = tee(
+ forecast_at_all_levels_unpacked_it,
+ num_levels,
  )
+ test_ts_at_all_levels_it_set = tee(test_ts_at_all_levels_it, num_levels)
+
+ # Since forecasts for all granularities are in the same iterable,
+ # we need a way to iterate through forecasts skipping some elements.
+ def skip_iter(it, num_skips: int, offset: int):
+ for _ in range(offset):
+ next(it)
+ for item in it:
+ for _ in range(num_skips):
+ next(it, None) # None: in case the `it` is already exhausted.
+ yield item
 
- num_levels = len(temporal_hierarchy.agg_multiples)
  metrics_to_return = {}
  for level in range(num_levels):
- agg_metrics_level, _ = evaluator.get_aggregate_metrics(
- metrics_per_ts.iloc[level:None:num_levels]
+ agg_metrics_level, metrics_per_ts_level = evaluator(
+ ts_iterator=skip_iter(
+ test_ts_at_all_levels_it_set[level],
+ num_skips=num_levels - 1,
+ offset=level,
+ ),
+ fcst_iterator=skip_iter(
+ forecast_at_all_levels_unpacked_it_set[level],
+ num_skips=num_levels - 1,
+ offset=level,
+ ),
  )
 
  for metric_name in metrics:

diff --git a/src/gluonts/nursery/temporal_hierarchical_forecasting/model/cop_deepar/_estimator.py b/src/gluonts/nursery/temporal_hierarchical_forecasting/model/cop_deepar/_estimator.py
@@ -13,7 +13,7 @@
 
 from copy import deepcopy
 from functools import partial
-from typing import List, Tuple
+from typing import List, Tuple, Optional
 
 from mxnet.gluon import HybridBlock
 import numpy as np
@@ -27,7 +27,6 @@
  TrainDataLoader,
  ValidationDataLoader,
 )
-from gluonts.env import env
 from gluonts.model.predictor import Predictor
 from gluonts.mx.distribution import GaussianOutput
 from gluonts.mx.model.estimator import GluonEstimator
@@ -42,8 +41,9 @@
  SelectFields,
  SimpleTransformation,
  Transformation,
+ MissingValueImputation,
+ RollingMeanValueImputation,
 )
-
 from gluonts.nursery.temporal_hierarchical_forecasting.model.cop_deepar.gluonts_fixes import (
  batchify_with_dict,
  DeepAREstimatorForCOP,
@@ -172,6 +172,9 @@ def __init__(
  return_forecasts_at_all_levels: bool = False,
  naive_reconciliation: bool = False,
  dtype: Type = np.float32,
+ impute_missing_values: bool = False,
+ imputation_method: Optional[MissingValueImputation] = None,
+ num_imputation_samples: int = 1,
  ) -> None:
  super().__init__(trainer=trainer, dtype=dtype)
 
@@ -203,11 +206,20 @@ def __init__(
 
  assert self.base_estimator_type == DeepAREstimatorForCOP
 
- if "distr_output" not in base_estimator_hps:
- base_estimator_hps["distr_output"] = GaussianOutput()
+ base_estimator_hps.setdefault("distr_output", GaussianOutput())
 
  print(f"Distribution output: {base_estimator_hps['distr_output']}")
 
+ base_estimator_hps.setdefault(
+ "impute_missing_values", impute_missing_values
+ )
+
+ base_estimator_hps.setdefault("imputation_method", imputation_method)
+
+ base_estimator_hps.setdefault(
+ "num_imputation_samples", num_imputation_samples
+ )
+
  self.estimators = []
  for agg_multiple, freq_str in zip(
  self.temporal_hierarchy.agg_multiples,
@@ -224,6 +236,14 @@ def __init__(
  num_nodes = self.temporal_hierarchy.num_leaves // agg_multiple
  lags_seq = [lag for lag in lags_seq if lag >= num_nodes]
 
+ # adapt window_length if RollingMeanValueImputation is used
+ if isinstance(imputation_method, RollingMeanValueImputation):
+ base_estimator_hps_agg[
+ "imputation_method"
+ ] = RollingMeanValueImputation(
+ window_size=imputation_method.window_size // agg_multiple
+ )
+
  # Hack to enforce correct serialization of lags_seq and history length
  # (only works when set in constructor).
  if agg_multiple != 1:
@@ -352,7 +372,6 @@ def create_predictor(
  + self.agg_feature_adder,
  prediction_net=prediction_network,
  batch_size=self.batch_size,
- freq=self.freq,
  prediction_length=self.prediction_length,
  ctx=self.trainer.ctx,
  dtype=self.dtype,

diff --git a/src/gluonts/nursery/temporal_hierarchical_forecasting/model/cop_deepar/gluonts_fixes.py b/src/gluonts/nursery/temporal_hierarchical_forecasting/model/cop_deepar/gluonts_fixes.py
@@ -77,7 +77,6 @@ def __init__(
  prediction_net: mx.gluon.HybridBlock,
  batch_size: int,
  prediction_length: int,
- freq: str,
  ctx: mx.Context,
  input_transform: Transformation,
  lead_time: int = 0,
@@ -151,7 +150,6 @@ def deserialize(
  input_transform=transform,
  prediction_net=prediction_net,
  batch_size=parameters["batch_size"],
- freq=parameters["freq"],
  prediction_length=parameters["prediction_length"],
  ctx=parameters["ctx"],
  dtype=parameters["dtype"],

diff --git a/src/gluonts/nursery/temporal_hierarchical_forecasting/train_and_eval_cop_deepar.py b/src/gluonts/nursery/temporal_hierarchical_forecasting/train_and_eval_cop_deepar.py
@@ -22,6 +22,9 @@
 )
 
 
+EVALUATE_ALL_LEVELS = True
+
+
 def main():
  dataset = get_dataset("exchange_rate", regenerate=False)
 

diff --git a/src/gluonts/torch/model/estimator.py b/src/gluonts/torch/model/estimator.py
@@ -213,10 +213,15 @@ def train_model(
  ckpt_path=ckpt_path,
  )
 
- logger.info(f"Loading best model from {checkpoint.best_model_path}")
- best_model = training_network.load_from_checkpoint(
- checkpoint.best_model_path
- )
+ if checkpoint.best_model_path != "":
+ logger.info(
+ f"Loading best model from {checkpoint.best_model_path}"
+ )
+ best_model = training_network.load_from_checkpoint(
+ checkpoint.best_model_path
+ )
+ else:
+ best_model = training_network
 
  return TrainOutput(
  transformation=transformation,

diff --git a/src/gluonts/torch/model/predictor.py b/src/gluonts/torch/model/predictor.py
@@ -73,12 +73,12 @@ def network(self) -> nn.Module:
  def predict(
  self, dataset: Dataset, num_samples: Optional[int] = None
  ) -> Iterator[Forecast]:
- self.input_transform += SelectFields(
- self.input_names + self.required_fields, allow_missing=True
- )
  inference_data_loader = InferenceDataLoader(
  dataset,
- transform=self.input_transform,
+ transform=self.input_transform
+ + SelectFields(
+ self.input_names + self.required_fields, allow_missing=True
+ ),
  batch_size=self.batch_size,
  stack_fn=lambda data: batchify(data, self.device),
  )

diff --git a/src/gluonts/torch/prelude.py b/src/gluonts/torch/prelude.py
@@ -11,7 +11,7 @@
 # express or implied. See the License for the specific language governing
 # permissions and limitations under the License.
 
-# flake8: noqa: F401
+# flake8: noqa: F401, F403
 
 from .component import *
 from .model.forecast_generator import *