Skip to content

Commit

Permalink
Merge pull request #73 from Nixtla/PERMBU
Browse files Browse the repository at this point in the history
PERMBU
  • Loading branch information
kdgutier authored Oct 13, 2022
2 parents 2fe973e + 8ed43cc commit bb06f6c
Show file tree
Hide file tree
Showing 12 changed files with 1,010 additions and 48 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
[![PyPi](https://img.shields.io/pypi/v/hierarchicalforecast?color=blue)](https://pypi.org/project/hierarchicalforecast/)
[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://github.com/Nixtla/hierarchicalforecast/blob/main/LICENSE)

**HierarchicalForecast** offers a collection of reconciliation methods, including `BottomUp`, `TopDown`, `MiddleOut`, `MinTrace` and `ERM`.
**HierarchicalForecast** offers a collection of reconciliation methods, including `BottomUp`, `TopDown`, `MiddleOut`, `MinTrace` and `ERM`. And Probabilistic coherent predictions including `MinTrace-normality`, `Bootstrap`, and `PERM-BU` methods.
</div>

## 🎊 Features
Expand All @@ -22,6 +22,10 @@
- `MiddleOut`: It anchors the base predictions in a middle level. The levels above the base predictions use the bottom-up approach, while the levels below use a top-down.
- `MinTrace`: Minimizes the total forecast variance of the space of coherent forecasts, with the Minimum Trace reconciliation.
- `ERM`: Optimizes the reconciliation matrix minimizing an L1 regularized objective.
* Probabilistic coherent methods:
- `MinTrace-normality`: Uses MinTrace variance-covariance closed form matrix under a normality assumption.
- `Bootstrap`: Generates distribution of hierarchically reconciled predictions using Gamakumara's bootstrap approach.
- `PERM-BU`: Minimizes the total forecast variance of the space of coherent forecasts, with the Minimum Trace reconciliation.

Missing something? Please open an issue here or write us in [![Slack](https://img.shields.io/badge/Slack-4A154B?&logo=slack&logoColor=white)](https://join.slack.com/t/nixtlaworkspace/shared_invite/zt-135dssye9-fWTzMpv2WBthq8NK0Yvu6A)

Expand Down
12 changes: 12 additions & 0 deletions hierarchicalforecast/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,18 @@
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.OptimalCombination.reconcile': ( 'methods.html#optimalcombination.reconcile',
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.PERMBU': ( 'methods.html#permbu',
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.PERMBU._nonzero_indexes_by_row': ( 'methods.html#permbu._nonzero_indexes_by_row',
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.PERMBU._obtain_ranks': ( 'methods.html#permbu._obtain_ranks',
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.PERMBU._permutate_predictions': ( 'methods.html#permbu._permutate_predictions',
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.PERMBU._permutate_samples': ( 'methods.html#permbu._permutate_samples',
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.PERMBU.reconcile': ( 'methods.html#permbu.reconcile',
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.TopDown': ( 'methods.html#topdown',
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.TopDown.__init__': ( 'methods.html#topdown.__init__',
Expand Down
185 changes: 183 additions & 2 deletions hierarchicalforecast/methods.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/methods.ipynb.

# %% auto 0
__all__ = ['BottomUp', 'TopDown', 'MiddleOut', 'MinTrace', 'OptimalCombination', 'ERM']
__all__ = ['BottomUp', 'TopDown', 'MiddleOut', 'MinTrace', 'OptimalCombination', 'ERM', 'PERMBU']

# %% ../nbs/methods.ipynb 2
import warnings
Expand All @@ -11,9 +11,12 @@

import numpy as np
from numba import njit

from scipy.stats import norm
from statsmodels.stats.moment_helpers import cov2corr

from sklearn.preprocessing import OneHotEncoder

# %% ../nbs/methods.ipynb 4
def _reconcile(S: np.ndarray, P: np.ndarray, W: np.ndarray,
y_hat: np.ndarray, SP: np.ndarray = None,
Expand Down Expand Up @@ -57,7 +60,7 @@ def _reconcile(S: np.ndarray, P: np.ndarray, W: np.ndarray,
res[f'hi-{lv}'] = res['mean'] + zs * sigmah
return res

# %% ../nbs/methods.ipynb 5
# %% ../nbs/methods.ipynb 6
def bottom_up(S: np.ndarray,
y_hat: np.ndarray,
idx_bottom: List[int],
Expand Down Expand Up @@ -745,3 +748,181 @@ def reconcile(self,
bootstrap=bootstrap, bootstrap_samples=bootstrap_samples)

__call__ = reconcile

# %% ../nbs/methods.ipynb 59
class PERMBU:
"""PERMBU Probabilistic Reconciliation Class.
The PERM-BU method leverages empirical bottom-level marginal distributions
with empirical copula functions (describing bottom-level dependencies) to
generate the distribution of aggregate-level distributions using BottomUp
reconciliation. The sample reordering technique in the PERM-BU method reinjects
multivariate dependencies into independent bottom-level samples.
**References:**<br>
- [Taieb, Souhaib Ben and Taylor, James W and Hyndman, Rob J. (2017).
Coherent probabilistic forecasts for hierarchical time series.
International conference on machine learning ICML.](https://proceedings.mlr.press/v70/taieb17a.html)
"""
def _obtain_ranks(self, array):
""" Vector ranks
Efficiently obtain vector ranks.
Example `array=[4,2,7,1]` -> `ranks=[2, 1, 3, 0]`.
**Parameters**<br>
`array`: np.array, matrix with floats or integers on which the
ranks will be computed on the second dimension.<br>
**Returns**<br>
`ranks`: np.array, matrix with ranks along the second dimension.<br>
"""
temp = array.argsort(axis=1)
ranks = np.empty_like(temp)
a_range = np.arange(temp.shape[1])
for iRow in range(temp.shape[0]):
ranks[iRow, temp[iRow,:]] = a_range
return ranks

def _permutate_samples(self, samples, permutations):
""" Permutate Samples
Applies efficient vectorized permutation on the samples.
**Parameters**<br>
`samples`: np.array [series,samples], independent base samples.<br>
`permutations`: np.array [series,samples], permutation ranks with wich
which `samples` dependence will be restored see `_obtain_ranks`.<br>
**Returns**<br>
`permutated_samples`: np.array.<br>
"""
# Generate auxiliary and flat permutation indexes
n_rows, n_cols = permutations.shape
aux_row_idx = np.arange(n_rows)[:,None] * n_cols
aux_row_idx = np.repeat(aux_row_idx, repeats=n_cols, axis=1)
permutate_idxs = permutations.flatten() + aux_row_idx.flatten()

# Apply flat permutation indexes and recover original shape
permutated_samples = samples.flatten()
permutated_samples = permutated_samples[permutate_idxs]
permutated_samples = permutated_samples.reshape(n_rows, n_cols)
return permutated_samples

def _permutate_predictions(self, prediction_samples, permutations):
""" Permutate Prediction Samples
Applies permutations to prediction_samples across the horizon.
**Parameters**<br>
`prediction_samples`: np.array [series,horizon,samples], independent
base prediction samples.<br>
`permutations`: np.array [series, samples], permutation ranks with which
`samples` dependence will be restored see `_obtain_ranks`.
it can also apply a random permutation.<br>
**Returns**<br>
`permutated_prediction_samples`: np.array.<br>
"""
# Apply permutation throughout forecast horizon
permutated_prediction_samples = prediction_samples.copy()

_, n_horizon, _ = prediction_samples.shape
for t in range(n_horizon):
permutated_prediction_samples[:,t,:] = \
self._permutate_samples(prediction_samples[:,t,:],
permutations)
return permutated_prediction_samples

def _nonzero_indexes_by_row(self, M):
return [np.nonzero(M[row,:])[0] for row in range(len(M))]

def reconcile(self,
S: np.ndarray,
y_hat_mean: np.ndarray,
y_hat_std: np.ndarray,
y_insample: np.ndarray,
y_hat_insample: np.ndarray,
n_samples: int=None,
seed: int=0):
"""PERMBU Sample Reconciliation Method.
Applies PERMBU reconciliation method as defined by Taieb et. al 2017.
Generating independent base prediction samples, restoring its multivariate
dependence using estimated copula with reordering and applying the BottomUp
aggregation to the new samples.
Algorithm:
1. For all series compute conditional marginals distributions.
2. Compute residuals $\hat{\epsilon}_{i,t}$ and obtain rank permutations.
2. Obtain K-sample from the bottom-level series predictions.
3. Apply recursively through the hierarchical structure:<br>
3.1. For a given aggregate series $i$ and its children series:<br>
3.2. Obtain children's empirical joint using sample reordering copula.<br>
3.2. From the children's joint obtain the aggregate series's samples.
**Parameters:**<br>
`S`: Summing matrix of size (`base`, `bottom`).<br>
`y_hat_mean`: Mean forecast values of size (`base`, `horizon`).<br>
`y_hat_std`: Forecast standard dev. of size (`base`, `horizon`).<br>
`y_insample`: Insample values of size (`base`, `insample_size`).<br>
`y_hat_insample`: Insample values of size (`base`, `insample_size`).<br>
`n_samples`: int, number of normal prediction samples generated.<br>
**Returns:**<br>
`rec_samples`: Reconciliated samples using the PERMBU approach.
"""

# Compute residuals and rank permutations
residuals = y_insample - y_hat_insample
rank_permutations = self._obtain_ranks(residuals)

# Sample h step-ahead base marginal distributions
if n_samples is None:
n_samples = y_insample.shape[1]
np.random.seed(seed)
n_series, n_horizon = y_hat_mean.shape

base_samples = np.array([np.random.normal(
loc=m, scale=s, size=n_samples) for m, s in \
zip(y_hat_mean.flatten(), y_hat_std.flatten())])
base_samples = base_samples.reshape(n_series, n_horizon, n_samples)

# Initialize PERMBU utility
rec_samples = base_samples.copy()
encoder = OneHotEncoder(sparse=False, dtype=np.float32)
hier_links = np.vstack(self._nonzero_indexes_by_row(S.T))

# BottomUp hierarchy traversing
hier_levels = hier_links.shape[1]-1
for level_idx in reversed(range(hier_levels)):
# Obtain aggregation matrix from parent/children links
children_links = np.unique(hier_links[:,level_idx:level_idx+2],
axis=0)
children_idxs = np.unique(children_links[:,1])
parent_idxs = np.unique(children_links[:,0])
Agg = encoder.fit_transform(children_links).T
Agg = Agg[:len(parent_idxs),:]

# Permute children_samples for each prediction step
children_permutations = rank_permutations[children_idxs, :]
children_samples = rec_samples[children_idxs,:,:]
children_samples = self._permutate_predictions(
prediction_samples=children_samples,
permutations=children_permutations)

# Overwrite hier_samples with BottomUp aggregation
# and randomly shuffle parent predictions after aggregation
parent_samples = np.einsum('ab,bhs->ahs', Agg, children_samples)
random_permutation = np.array([
np.random.permutation(np.arange(n_samples)) \
for serie in range(len(parent_samples))])
parent_samples = self._permutate_predictions(
prediction_samples=parent_samples,
permutations=random_permutation)

rec_samples[parent_idxs,:,:] = parent_samples

return rec_samples

__call__ = reconcile
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,18 @@
"execution_count": null,
"id": "c18a4300-5b8f-45b5-92ce-e52f8c4dab20",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1.30M/1.30M [00:00<00:00, 2.22MiB/s]\n",
"INFO:datasetsforecast.utils:Successfully downloaded datasets.zip, 1297279, bytes.\n",
"INFO:datasetsforecast.utils:Decompressing zip file...\n",
"INFO:datasetsforecast.utils:Successfully decompressed data/hierarchical/datasets.zip\n"
]
}
],
"source": [
"Y_df, S, tags = HierarchicalData.load('./data', 'TourismSmall')\n",
"Y_df['ds'] = pd.to_datetime(Y_df['ds'])"
Expand Down Expand Up @@ -558,9 +569,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "hierarchicalforecast",
"language": "python",
"name": "python3"
"name": "hierarchicalforecast"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/fy/h20z78md68jgtxrgcl3h1y4r0000gn/T/ipykernel_82265/1077664338.py:22: PerformanceWarning: dropping on a non-lexsorted multi-index without a level parameter may impact performance.\n",
"/var/folders/fy/h20z78md68jgtxrgcl3h1y4r0000gn/T/ipykernel_5159/1077664338.py:22: PerformanceWarning: dropping on a non-lexsorted multi-index without a level parameter may impact performance.\n",
" evaluation = evaluation.drop('Overall')\n"
]
}
Expand Down Expand Up @@ -1000,9 +1000,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "hierarchicalforecast",
"language": "python",
"name": "python3"
"name": "hierarchicalforecast"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -794,9 +794,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "hierarchicalforecast",
"language": "python",
"name": "python3"
"name": "hierarchicalforecast"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit bb06f6c

Please sign in to comment.