Merge pull request #73 from Nixtla/PERMBU

PERMBU
Nixtla · Oct 13, 2022 · bb06f6c · bb06f6c
2 parents 2fe973e + 8ed43cc
commit bb06f6c
Show file tree

Hide file tree

Showing 12 changed files with 1,010 additions and 48 deletions.
diff --git a/README.md b/README.md
@@ -10,7 +10,7 @@
 [![PyPi](https://img.shields.io/pypi/v/hierarchicalforecast?color=blue)](https://pypi.org/project/hierarchicalforecast/)
 [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://github.com/Nixtla/hierarchicalforecast/blob/main/LICENSE)
 
-**HierarchicalForecast** offers a collection of reconciliation methods, including `BottomUp`, `TopDown`, `MiddleOut`, `MinTrace` and `ERM`. 
+**HierarchicalForecast** offers a collection of reconciliation methods, including `BottomUp`, `TopDown`, `MiddleOut`, `MinTrace` and `ERM`. And Probabilistic coherent predictions including `MinTrace-normality`, `Bootstrap`, and `PERM-BU` methods.
 </div>
 
 ## 🎊 Features 
@@ -22,6 +22,10 @@
     - `MiddleOut`: It anchors the base predictions in a middle level. The levels above the base predictions use the bottom-up approach, while the levels below use a top-down.
     - `MinTrace`: Minimizes the total forecast variance of the space of coherent forecasts, with the Minimum Trace reconciliation.
     - `ERM`: Optimizes the reconciliation matrix minimizing an L1 regularized objective.
+* Probabilistic coherent methods:
+    - `MinTrace-normality`: Uses MinTrace variance-covariance closed form matrix under a normality assumption.
+    - `Bootstrap`: Generates distribution of hierarchically reconciled predictions using Gamakumara's bootstrap approach.
+    - `PERM-BU`: Minimizes the total forecast variance of the space of coherent forecasts, with the Minimum Trace reconciliation.
 
 Missing something? Please open an issue here or write us in [![Slack](https://img.shields.io/badge/Slack-4A154B?&logo=slack&logoColor=white)](https://join.slack.com/t/nixtlaworkspace/shared_invite/zt-135dssye9-fWTzMpv2WBthq8NK0Yvu6A)
 

diff --git a/hierarchicalforecast/_modidx.py b/hierarchicalforecast/_modidx.py
@@ -46,6 +46,18 @@
                                                                                                             'hierarchicalforecast/methods.py'),
                                               'hierarchicalforecast.methods.OptimalCombination.reconcile': ( 'methods.html#optimalcombination.reconcile',
                                                                                                              'hierarchicalforecast/methods.py'),
+                                              'hierarchicalforecast.methods.PERMBU': ( 'methods.html#permbu',
+                                                                                       'hierarchicalforecast/methods.py'),
+                                              'hierarchicalforecast.methods.PERMBU._nonzero_indexes_by_row': ( 'methods.html#permbu._nonzero_indexes_by_row',
+                                                                                                               'hierarchicalforecast/methods.py'),
+                                              'hierarchicalforecast.methods.PERMBU._obtain_ranks': ( 'methods.html#permbu._obtain_ranks',
+                                                                                                     'hierarchicalforecast/methods.py'),
+                                              'hierarchicalforecast.methods.PERMBU._permutate_predictions': ( 'methods.html#permbu._permutate_predictions',
+                                                                                                              'hierarchicalforecast/methods.py'),
+                                              'hierarchicalforecast.methods.PERMBU._permutate_samples': ( 'methods.html#permbu._permutate_samples',
+                                                                                                          'hierarchicalforecast/methods.py'),
+                                              'hierarchicalforecast.methods.PERMBU.reconcile': ( 'methods.html#permbu.reconcile',
+                                                                                                 'hierarchicalforecast/methods.py'),
                                               'hierarchicalforecast.methods.TopDown': ( 'methods.html#topdown',
                                                                                         'hierarchicalforecast/methods.py'),
                                               'hierarchicalforecast.methods.TopDown.__init__': ( 'methods.html#topdown.__init__',

diff --git a/hierarchicalforecast/methods.py b/hierarchicalforecast/methods.py
@@ -1,7 +1,7 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/methods.ipynb.
 
 # %% auto 0
-__all__ = ['BottomUp', 'TopDown', 'MiddleOut', 'MinTrace', 'OptimalCombination', 'ERM']
+__all__ = ['BottomUp', 'TopDown', 'MiddleOut', 'MinTrace', 'OptimalCombination', 'ERM', 'PERMBU']
 
 # %% ../nbs/methods.ipynb 2
 import warnings
@@ -11,9 +11,12 @@
 
 import numpy as np
 from numba import njit
+
 from scipy.stats import norm
 from statsmodels.stats.moment_helpers import cov2corr
 
+from sklearn.preprocessing import OneHotEncoder
+
 # %% ../nbs/methods.ipynb 4
 def _reconcile(S: np.ndarray, P: np.ndarray, W: np.ndarray, 
                y_hat: np.ndarray, SP: np.ndarray = None,
@@ -57,7 +60,7 @@ def _reconcile(S: np.ndarray, P: np.ndarray, W: np.ndarray,
             res[f'hi-{lv}'] = res['mean'] + zs * sigmah
     return res
 
-# %% ../nbs/methods.ipynb 5
+# %% ../nbs/methods.ipynb 6
 def bottom_up(S: np.ndarray,
               y_hat: np.ndarray,
               idx_bottom: List[int],
@@ -745,3 +748,181 @@ def reconcile(self,
                    bootstrap=bootstrap, bootstrap_samples=bootstrap_samples)
 
     __call__ = reconcile
+
+# %% ../nbs/methods.ipynb 59
+class PERMBU:
+    """PERMBU Probabilistic Reconciliation Class.
+
+    The PERM-BU method leverages empirical bottom-level marginal distributions 
+    with empirical copula functions (describing bottom-level dependencies) to 
+    generate the distribution of aggregate-level distributions using BottomUp 
+    reconciliation. The sample reordering technique in the PERM-BU method reinjects 
+    multivariate dependencies into independent bottom-level samples.
+
+    **References:**<br>
+    - [Taieb, Souhaib Ben and Taylor, James W and Hyndman, Rob J. (2017). 
+    Coherent probabilistic forecasts for hierarchical time series. 
+    International conference on machine learning ICML.](https://proceedings.mlr.press/v70/taieb17a.html)
+    """
+    def _obtain_ranks(self, array):
+        """ Vector ranks
+
+        Efficiently obtain vector ranks.
+        Example `array=[4,2,7,1]` -> `ranks=[2, 1, 3, 0]`.
+
+        **Parameters**<br>
+        `array`: np.array, matrix with floats or integers on which the 
+                ranks will be computed on the second dimension.<br>
+
+        **Returns**<br>
+        `ranks`: np.array, matrix with ranks along the second dimension.<br>
+        """
+        temp = array.argsort(axis=1)
+        ranks = np.empty_like(temp)
+        a_range = np.arange(temp.shape[1])
+        for iRow in range(temp.shape[0]):
+          ranks[iRow, temp[iRow,:]] = a_range
+        return ranks
+
+    def _permutate_samples(self, samples, permutations):
+        """ Permutate Samples
+
+        Applies efficient vectorized permutation on the samples.
+
+        **Parameters**<br>
+        `samples`: np.array [series,samples], independent base samples.<br>
+        `permutations`: np.array [series,samples], permutation ranks with wich
+                  which `samples` dependence will be restored see `_obtain_ranks`.<br>
+
+        **Returns**<br>
+        `permutated_samples`: np.array.<br>
+        """
+        # Generate auxiliary and flat permutation indexes
+        n_rows, n_cols = permutations.shape
+        aux_row_idx = np.arange(n_rows)[:,None] * n_cols
+        aux_row_idx = np.repeat(aux_row_idx, repeats=n_cols, axis=1)
+        permutate_idxs = permutations.flatten() + aux_row_idx.flatten()
+
+        # Apply flat permutation indexes and recover original shape
+        permutated_samples = samples.flatten()
+        permutated_samples = permutated_samples[permutate_idxs]
+        permutated_samples = permutated_samples.reshape(n_rows, n_cols)
+        return permutated_samples
+
+    def _permutate_predictions(self, prediction_samples, permutations):
+        """ Permutate Prediction Samples
+
+        Applies permutations to prediction_samples across the horizon.
+
+        **Parameters**<br>
+        `prediction_samples`: np.array [series,horizon,samples], independent 
+                  base prediction samples.<br>
+        `permutations`: np.array [series, samples], permutation ranks with which
+                  `samples` dependence will be restored see `_obtain_ranks`.
+                  it can also apply a random permutation.<br>
+
+        **Returns**<br>
+        `permutated_prediction_samples`: np.array.<br>
+        """
+        # Apply permutation throughout forecast horizon
+        permutated_prediction_samples = prediction_samples.copy()
+
+        _, n_horizon, _ = prediction_samples.shape
+        for t in range(n_horizon):
+            permutated_prediction_samples[:,t,:] = \
+                              self._permutate_samples(prediction_samples[:,t,:],
+                                                      permutations)
+        return permutated_prediction_samples
+
+    def _nonzero_indexes_by_row(self, M):
+        return [np.nonzero(M[row,:])[0] for row in range(len(M))]
+
+    def reconcile(self,
+                  S: np.ndarray,
+                  y_hat_mean: np.ndarray,
+                  y_hat_std: np.ndarray,                  
+                  y_insample: np.ndarray,
+                  y_hat_insample: np.ndarray,
+                  n_samples: int=None,
+                  seed: int=0):
+        """PERMBU Sample Reconciliation Method.
+
+        Applies PERMBU reconciliation method as defined by Taieb et. al 2017.
+        Generating independent base prediction samples, restoring its multivariate
+        dependence using estimated copula with reordering and applying the BottomUp
+        aggregation to the new samples.
+
+        Algorithm:
+        1.   For all series compute conditional marginals distributions.
+        2.   Compute residuals $\hat{\epsilon}_{i,t}$ and obtain rank permutations.
+        2.   Obtain K-sample from the bottom-level series predictions.
+        3.   Apply recursively through the hierarchical structure:<br>
+            3.1.   For a given aggregate series $i$ and its children series:<br>
+            3.2.   Obtain children's empirical joint using sample reordering copula.<br>
+            3.2.   From the children's joint obtain the aggregate series's samples.        
+
+        **Parameters:**<br>
+        `S`: Summing matrix of size (`base`, `bottom`).<br>
+        `y_hat_mean`: Mean forecast values of size (`base`, `horizon`).<br>
+        `y_hat_std`: Forecast standard dev. of size (`base`, `horizon`).<br>
+        `y_insample`: Insample values of size (`base`, `insample_size`).<br>
+        `y_hat_insample`: Insample values of size (`base`, `insample_size`).<br>
+        `n_samples`: int, number of normal prediction samples generated.<br>
+
+        **Returns:**<br>
+        `rec_samples`: Reconciliated samples using the PERMBU approach.
+        """
+
+        # Compute residuals and rank permutations
+        residuals = y_insample - y_hat_insample
+        rank_permutations = self._obtain_ranks(residuals)
+
+        # Sample h step-ahead base marginal distributions
+        if n_samples is None:
+            n_samples = y_insample.shape[1]
+        np.random.seed(seed)
+        n_series, n_horizon = y_hat_mean.shape
+
+        base_samples = np.array([np.random.normal(
+                                  loc=m, scale=s, size=n_samples) for m, s in \
+                                  zip(y_hat_mean.flatten(), y_hat_std.flatten())])
+        base_samples = base_samples.reshape(n_series, n_horizon, n_samples)
+
+        # Initialize PERMBU utility
+        rec_samples = base_samples.copy()
+        encoder = OneHotEncoder(sparse=False, dtype=np.float32)
+        hier_links = np.vstack(self._nonzero_indexes_by_row(S.T))
+
+        # BottomUp hierarchy traversing
+        hier_levels = hier_links.shape[1]-1
+        for level_idx in reversed(range(hier_levels)):
+            # Obtain aggregation matrix from parent/children links
+            children_links = np.unique(hier_links[:,level_idx:level_idx+2], 
+                                       axis=0)
+            children_idxs = np.unique(children_links[:,1])
+            parent_idxs = np.unique(children_links[:,0])
+            Agg = encoder.fit_transform(children_links).T
+            Agg = Agg[:len(parent_idxs),:]
+
+            # Permute children_samples for each prediction step
+            children_permutations = rank_permutations[children_idxs, :]
+            children_samples = rec_samples[children_idxs,:,:]
+            children_samples = self._permutate_predictions(
+                                        prediction_samples=children_samples,
+                                        permutations=children_permutations)
+
+            # Overwrite hier_samples with BottomUp aggregation
+            # and randomly shuffle parent predictions after aggregation
+            parent_samples = np.einsum('ab,bhs->ahs', Agg, children_samples)
+            random_permutation = np.array([
+                                  np.random.permutation(np.arange(n_samples)) \
+                                      for serie in range(len(parent_samples))])
+            parent_samples = self._permutate_predictions(
+                                        prediction_samples=parent_samples,
+                                        permutations=random_permutation)
+
+            rec_samples[parent_idxs,:,:] = parent_samples
+
+        return rec_samples
+
+    __call__ = reconcile
diff --git a/nbs/examples/TourismSmall.ipynb → nbs/examples/1TourismSmall.ipynb b/nbs/examples/TourismSmall.ipynb → nbs/examples/1TourismSmall.ipynb
@@ -73,7 +73,18 @@
    "execution_count": null,
    "id": "c18a4300-5b8f-45b5-92ce-e52f8c4dab20",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 1.30M/1.30M [00:00<00:00, 2.22MiB/s]\n",
+      "INFO:datasetsforecast.utils:Successfully downloaded datasets.zip, 1297279, bytes.\n",
+      "INFO:datasetsforecast.utils:Decompressing zip file...\n",
+      "INFO:datasetsforecast.utils:Successfully decompressed data/hierarchical/datasets.zip\n"
+     ]
+    }
+   ],
    "source": [
     "Y_df, S, tags = HierarchicalData.load('./data', 'TourismSmall')\n",
     "Y_df['ds'] = pd.to_datetime(Y_df['ds'])"
@@ -558,9 +569,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "hierarchicalforecast",
    "language": "python",
-   "name": "python3"
+   "name": "hierarchicalforecast"
   }
  },
  "nbformat": 4,

diff --git a/nbs/examples/AustralianDomesticTourism.ipynb → ...examples/2AustralianDomesticTourism.ipynb b/nbs/examples/AustralianDomesticTourism.ipynb → ...examples/2AustralianDomesticTourism.ipynb
@@ -688,7 +688,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/var/folders/fy/h20z78md68jgtxrgcl3h1y4r0000gn/T/ipykernel_82265/1077664338.py:22: PerformanceWarning: dropping on a non-lexsorted multi-index without a level parameter may impact performance.\n",
+      "/var/folders/fy/h20z78md68jgtxrgcl3h1y4r0000gn/T/ipykernel_5159/1077664338.py:22: PerformanceWarning: dropping on a non-lexsorted multi-index without a level parameter may impact performance.\n",
       "  evaluation = evaluation.drop('Overall')\n"
      ]
     }
@@ -1000,9 +1000,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "hierarchicalforecast",
    "language": "python",
-   "name": "python3"
+   "name": "hierarchicalforecast"
   }
  },
  "nbformat": 4,

diff --git a/...examples/AustralianPrisonPopulation.ipynb → ...xamples/3AustralianPrisonPopulation.ipynb b/...examples/AustralianPrisonPopulation.ipynb → ...xamples/3AustralianPrisonPopulation.ipynb
@@ -794,9 +794,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "hierarchicalforecast",
    "language": "python",
-   "name": "python3"
+   "name": "hierarchicalforecast"
   }
  },
  "nbformat": 4,