Skip to content

Commit

Permalink
Merge branch 'dev' into add_fit_to_gaussian
Browse files Browse the repository at this point in the history
  • Loading branch information
melopeo authored Aug 9, 2023
2 parents e841367 + a944581 commit 8c0be99
Show file tree
Hide file tree
Showing 11 changed files with 80 additions and 33 deletions.
2 changes: 1 addition & 1 deletion docs/getting_started/models.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,4 @@ NPTS | Local | Un
[Prophet]: https://github.com/awslabs/gluonts/blob/dev/src/gluonts/ext/prophet/_predictor.py
[NaiveSeasonal]: https://github.com/awslabs/gluonts/blob/dev/src/gluonts/model/seasonal_naive/_predictor.py
[Naive2]: https://github.com/awslabs/gluonts/blob/dev/src/gluonts/ext/naive_2/_predictor.py
[NPTS]: https://github.com/awslabs/gluonts/blob/dev/src/gluonts/ext/npts/_predictor.py
[NPTS]: https://github.com/awslabs/gluonts/blob/dev/src/gluonts/model/npts/_predictor.py
4 changes: 2 additions & 2 deletions docs/tutorials/data_manipulation/pandasdataframes.md.template
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ from gluonts.dataset.pandas import PandasDataset

max_end = max(df.groupby("item_id").apply(lambda _df: _df.index[-1]))
dfs_dict = {}
for item_id, gdf in df.groupby("item_id"):
new_index = pd.date_range(gdf.index[0], end=max_end, freq="1D")
for item_id, gdf in df_missing_val.groupby("item_id"):
new_index = pd.date_range(gdf.index[0], end=max_end, freq="1H")
dfs_dict[item_id] = gdf.reindex(new_index).drop("item_id", axis=1)

ds = PandasDataset(dfs_dict, target="target")
Expand Down
2 changes: 1 addition & 1 deletion src/gluonts/mx/prelude.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

# flake8: noqa: F401
# flake8: noqa: F401, F403

from .component import *
from .serde import *
Expand Down
2 changes: 1 addition & 1 deletion src/gluonts/nursery/daf/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ tensorboard==2.3.0
numpy==1.22.0
pandas==1.1.5
scikit-learn==0.23.2
scipy==1.5.2
scipy==1.10.0
matplotlib==3.3.2
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

from itertools import tee
from typing import List

import numpy as np
Expand Down Expand Up @@ -87,27 +88,48 @@ def evaluate_forecasts_at_all_levels(
evaluator,
metrics: List[str] = ["mean_wQuantileLoss"],
):
# Order of forecasts obtained e.g., in case of the hierarchy: [6M, 2M, 1M]
# (ts_1_6M, ts_1_2M, ts_1_1M, ts_2_6M, ts_2_2M, ts_2_1M, ...)
forecast_at_all_levels_unpacked_it = unpack_forecasts(
forecast_at_all_levels_it=forecast_at_all_levels_it,
temporal_hierarchy=temporal_hierarchy,
target_temporal_hierarchy=temporal_hierarchy,
)

# First get item metrics for all time series for all frequencies; these are per time series metrics.
# Then we aggregate the metrics by slicing according to the hierarchy.
# `metrics_per_ts` is a dataframe where columns contain all item metrics;
# number of rows = num_levels x num_ts, with the row ordering:
# (ts_1_6M, ts_1_2M, ts_1_1M, ts_2_6M, ts_2_2M, ts_2_1M, ...)
_, metrics_per_ts = evaluator(
ts_iterator=test_ts_at_all_levels_it,
fcst_iterator=forecast_at_all_levels_unpacked_it,
num_levels = len(temporal_hierarchy.agg_multiples)

# In one go, we can get item metrics for time series that have the same frequency.
# So we create `num_levels` copies of the iterator and obtain the item metrics
# for each level independently.
forecast_at_all_levels_unpacked_it_set = tee(
forecast_at_all_levels_unpacked_it,
num_levels,
)
test_ts_at_all_levels_it_set = tee(test_ts_at_all_levels_it, num_levels)

# Since forecasts for all granularities are in the same iterable,
# we need a way to iterate through forecasts skipping some elements.
def skip_iter(it, num_skips: int, offset: int):
for _ in range(offset):
next(it)
for item in it:
for _ in range(num_skips):
next(it, None) # None: in case the `it` is already exhausted.
yield item

num_levels = len(temporal_hierarchy.agg_multiples)
metrics_to_return = {}
for level in range(num_levels):
agg_metrics_level, _ = evaluator.get_aggregate_metrics(
metrics_per_ts.iloc[level:None:num_levels]
agg_metrics_level, metrics_per_ts_level = evaluator(
ts_iterator=skip_iter(
test_ts_at_all_levels_it_set[level],
num_skips=num_levels - 1,
offset=level,
),
fcst_iterator=skip_iter(
forecast_at_all_levels_unpacked_it_set[level],
num_skips=num_levels - 1,
offset=level,
),
)

for metric_name in metrics:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from copy import deepcopy
from functools import partial
from typing import List, Tuple
from typing import List, Tuple, Optional

from mxnet.gluon import HybridBlock
import numpy as np
Expand All @@ -27,7 +27,6 @@
TrainDataLoader,
ValidationDataLoader,
)
from gluonts.env import env
from gluonts.model.predictor import Predictor
from gluonts.mx.distribution import GaussianOutput
from gluonts.mx.model.estimator import GluonEstimator
Expand All @@ -42,8 +41,9 @@
SelectFields,
SimpleTransformation,
Transformation,
MissingValueImputation,
RollingMeanValueImputation,
)

from gluonts.nursery.temporal_hierarchical_forecasting.model.cop_deepar.gluonts_fixes import (
batchify_with_dict,
DeepAREstimatorForCOP,
Expand Down Expand Up @@ -172,6 +172,9 @@ def __init__(
return_forecasts_at_all_levels: bool = False,
naive_reconciliation: bool = False,
dtype: Type = np.float32,
impute_missing_values: bool = False,
imputation_method: Optional[MissingValueImputation] = None,
num_imputation_samples: int = 1,
) -> None:
super().__init__(trainer=trainer, dtype=dtype)

Expand Down Expand Up @@ -203,11 +206,20 @@ def __init__(

assert self.base_estimator_type == DeepAREstimatorForCOP

if "distr_output" not in base_estimator_hps:
base_estimator_hps["distr_output"] = GaussianOutput()
base_estimator_hps.setdefault("distr_output", GaussianOutput())

print(f"Distribution output: {base_estimator_hps['distr_output']}")

base_estimator_hps.setdefault(
"impute_missing_values", impute_missing_values
)

base_estimator_hps.setdefault("imputation_method", imputation_method)

base_estimator_hps.setdefault(
"num_imputation_samples", num_imputation_samples
)

self.estimators = []
for agg_multiple, freq_str in zip(
self.temporal_hierarchy.agg_multiples,
Expand All @@ -224,6 +236,14 @@ def __init__(
num_nodes = self.temporal_hierarchy.num_leaves // agg_multiple
lags_seq = [lag for lag in lags_seq if lag >= num_nodes]

# adapt window_length if RollingMeanValueImputation is used
if isinstance(imputation_method, RollingMeanValueImputation):
base_estimator_hps_agg[
"imputation_method"
] = RollingMeanValueImputation(
window_size=imputation_method.window_size // agg_multiple
)

# Hack to enforce correct serialization of lags_seq and history length
# (only works when set in constructor).
if agg_multiple != 1:
Expand Down Expand Up @@ -352,7 +372,6 @@ def create_predictor(
+ self.agg_feature_adder,
prediction_net=prediction_network,
batch_size=self.batch_size,
freq=self.freq,
prediction_length=self.prediction_length,
ctx=self.trainer.ctx,
dtype=self.dtype,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ def __init__(
prediction_net: mx.gluon.HybridBlock,
batch_size: int,
prediction_length: int,
freq: str,
ctx: mx.Context,
input_transform: Transformation,
lead_time: int = 0,
Expand Down Expand Up @@ -151,7 +150,6 @@ def deserialize(
input_transform=transform,
prediction_net=prediction_net,
batch_size=parameters["batch_size"],
freq=parameters["freq"],
prediction_length=parameters["prediction_length"],
ctx=parameters["ctx"],
dtype=parameters["dtype"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
)


EVALUATE_ALL_LEVELS = True


def main():
dataset = get_dataset("exchange_rate", regenerate=False)

Expand Down
13 changes: 9 additions & 4 deletions src/gluonts/torch/model/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,10 +213,15 @@ def train_model(
ckpt_path=ckpt_path,
)

logger.info(f"Loading best model from {checkpoint.best_model_path}")
best_model = training_network.load_from_checkpoint(
checkpoint.best_model_path
)
if checkpoint.best_model_path != "":
logger.info(
f"Loading best model from {checkpoint.best_model_path}"
)
best_model = training_network.load_from_checkpoint(
checkpoint.best_model_path
)
else:
best_model = training_network

return TrainOutput(
transformation=transformation,
Expand Down
8 changes: 4 additions & 4 deletions src/gluonts/torch/model/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,12 @@ def network(self) -> nn.Module:
def predict(
self, dataset: Dataset, num_samples: Optional[int] = None
) -> Iterator[Forecast]:
self.input_transform += SelectFields(
self.input_names + self.required_fields, allow_missing=True
)
inference_data_loader = InferenceDataLoader(
dataset,
transform=self.input_transform,
transform=self.input_transform
+ SelectFields(
self.input_names + self.required_fields, allow_missing=True
),
batch_size=self.batch_size,
stack_fn=lambda data: batchify(data, self.device),
)
Expand Down
2 changes: 1 addition & 1 deletion src/gluonts/torch/prelude.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

# flake8: noqa: F401
# flake8: noqa: F401, F403

from .component import *
from .model.forecast_generator import *
Expand Down

0 comments on commit 8c0be99

Please sign in to comment.