Nixtla · AzulGarza · Jul 2, 2022 · Jul 1, 2022 · Jul 1, 2022 · Jul 1, 2022
diff --git a/hierarchicalforecast/_nbdev.py b/hierarchicalforecast/_nbdev.py
@@ -6,6 +6,7 @@
          "HierarchicalEvaluation": "core.ipynb",
          "bottom_up": "methods.ipynb",
          "BottomUp": "methods.ipynb",
+         "is_strictly_hierarchical": "methods.ipynb",
          "top_down": "methods.ipynb",
          "TopDown": "methods.ipynb",
          "crossprod": "methods.ipynb",

diff --git a/hierarchicalforecast/core.py b/hierarchicalforecast/core.py
@@ -25,7 +25,8 @@ class HierarchicalReconciliation:
     def __init__(self, reconcilers: List[Callable]):
         self.reconcilers = reconcilers
 
-    def reconcile(self, Y_h: pd.DataFrame, Y_df: pd.DataFrame, S: pd.DataFrame):
+    def reconcile(self, Y_h: pd.DataFrame, Y_df: pd.DataFrame, S: pd.DataFrame,
+                  tags: Dict[str, np.ndarray]):
         """Reconcile base forecasts.
 
             Parameters
@@ -49,7 +50,8 @@ def reconcile(self, Y_h: pd.DataFrame, Y_df: pd.DataFrame, S: pd.DataFrame):
         common_vals = dict(
             y = Y_df.pivot(columns='ds', values='y').loc[uids].values,
             S = S_.values,
-            idx_bottom = [S_.index.get_loc(col) for col in S.columns]
+            idx_bottom = S_.index.get_indexer(S.columns),
+            levels={key: S_.index.get_indexer(val) for key, val in tags.items()}
         )
         fcsts = Y_h.copy()
         for reconcile_fn in self.reconcilers:

diff --git a/hierarchicalforecast/methods.py b/hierarchicalforecast/methods.py
@@ -1,9 +1,10 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/methods.ipynb (unless otherwise specified).
 
-__all__ = ['bottom_up', 'BottomUp', 'top_down', 'TopDown', 'crossprod', 'min_trace', 'MinTrace', 'erm', 'ERM']
+__all__ = ['bottom_up', 'BottomUp', 'is_strictly_hierarchical', 'top_down', 'TopDown', 'crossprod', 'min_trace',
+           'MinTrace', 'erm', 'ERM']
 
 # Cell
-from typing import List
+from typing import Dict, List
 
 import numpy as np
 from statsmodels.stats.moment_helpers import cov2corr
@@ -37,18 +38,83 @@ def reconcile(self,
 
     __call__ = reconcile
 
+# Cell
+def is_strictly_hierarchical(S: np.ndarray,
+                             levels: Dict[str, np.ndarray]):
+    # main idea:
+    # if S represents a strictly hierarchical structure
+    # the number of paths before the bottom level
+    # should be equal to the number of nodes
+    # of the previuos level
+    levels_ = dict(sorted(levels.items(), key=lambda x: len(x[1])))
+    # removing bottom level
+    levels_.popitem()
+    # making S categorical
+    hiers = [np.argmax(S[idx], axis=0) + 1 for _, idx in levels_.items()]
+    hiers = np.vstack(hiers)
+    paths = np.unique(hiers, axis=1).shape[1]
+    nodes = levels_.popitem()[1].size
+    return paths == nodes
+
+# Cell
+def _get_nodes(S: np.ndarray, levels: Dict[str, np.ndarray]):
+    childs = {}
+    level_names = list(levels.keys())
+    nodes = {}
+    for i_level, level in enumerate(level_names[:-1]):
+        parent = levels[level]
+        child = np.zeros_like(S)
+        idx_child = levels[level_names[i_level+1]]
+        child[idx_child] = S[idx_child]
+        nodes_level = {}
+        for idx_parent_node in parent:
+            parent_node = S[idx_parent_node]
+            idx_node = child * parent_node.astype(bool)
+            idx_node, = np.where(idx_node.sum(axis=1) > 0)
+            nodes_level[idx_parent_node] = idx_node
+        nodes[level] = nodes_level
+    return nodes
+
+# Cell
+def _reconcile_fcst_proportions(S: np.ndarray, y_hat: np.ndarray,
+                                levels: Dict[str, np.ndarray],
+                                nodes: Dict[str, Dict[int, np.ndarray]],
+                                idx_top: int):
+    reconciled = np.zeros_like(y_hat)
+    reconciled[idx_top] = y_hat[idx_top]
+    level_names = list(levels.keys())
+    for i_level, level in enumerate(level_names[:-1]):
+        nodes_level = nodes[level]
+        for idx_parent, idx_childs in nodes_level.items():
+            fcst_parent = reconciled[idx_parent]
+            childs_sum = y_hat[idx_childs].sum()
+            for idx_child in idx_childs:
+                reconciled[idx_child] = y_hat[idx_child] * fcst_parent / childs_sum
+    return reconciled
+
 # Cell
 def top_down(S: np.ndarray,
              y_hat: np.ndarray,
              y: np.ndarray,
-             idx_bottom: List[int],
+             levels: Dict[str, np.ndarray],
              method: str):
+    if not is_strictly_hierarchical(S, levels):
+        raise ValueError('Top down reconciliation requires strictly hierarchical structures.')
+
     n_hiers, n_bottom = S.shape
     idx_top = int(S.sum(axis=1).argmax())
-    #add strictly hierarchical assert
+    levels_ = dict(sorted(levels.items(), key=lambda x: len(x[1])))
+    idx_bottom = levels_[list(levels_)[-1]]
 
     if method == 'forecast_proportions':
-        raise NotImplementedError(f'Method {method} not implemented yet')
+        nodes = _get_nodes(S=S, levels=levels)
+        reconciled = [_reconcile_fcst_proportions(S=S, y_hat=y_hat_[:, None],
+                                                  levels=levels_,
+                                                  nodes=nodes,
+                                                  idx_top=idx_top) \
+                      for y_hat_ in y_hat.T]
+        reconciled = np.hstack(reconciled)
+        return reconciled
     else:
         y_top = y[idx_top]
         y_btm = y[idx_bottom]
@@ -73,9 +139,9 @@ def reconcile(self,
                   S: np.ndarray,
                   y_hat: np.ndarray,
                   y: np.ndarray,
-                  idx_bottom: List[int]):
+                  levels: Dict[str, np.ndarray],):
         return top_down(S=S, y_hat=y_hat, y=y,
-                        idx_bottom=idx_bottom,
+                        levels=levels,
                         method=self.method)
 
     __call__ = reconcile

diff --git a/nbs/core.ipynb b/nbs/core.ipynb
@@ -40,7 +40,7 @@
    "outputs": [],
    "source": [
     "#hide\n",
-    "from fastcore.test import test_close"
+    "from fastcore.test import test_close, test_fail"
    ]
   },
   {
@@ -71,7 +71,8 @@
     "    def __init__(self, reconcilers: List[Callable]):\n",
     "        self.reconcilers = reconcilers\n",
     "        \n",
-    "    def reconcile(self, Y_h: pd.DataFrame, Y_df: pd.DataFrame, S: pd.DataFrame):\n",
+    "    def reconcile(self, Y_h: pd.DataFrame, Y_df: pd.DataFrame, S: pd.DataFrame,\n",
+    "                  tags: Dict[str, np.ndarray]):\n",
     "        \"\"\"Reconcile base forecasts.\n",
     "        \n",
     "            Parameters\n",
@@ -95,7 +96,8 @@
     "        common_vals = dict(\n",
     "            y = Y_df.pivot(columns='ds', values='y').loc[uids].values,\n",
     "            S = S_.values,\n",
-    "            idx_bottom = [S_.index.get_loc(col) for col in S.columns]\n",
+    "            idx_bottom = S_.index.get_indexer(S.columns),\n",
+    "            levels={key: S_.index.get_indexer(val) for key, val in tags.items()}\n",
     "        )\n",
     "        fcsts = Y_h.copy()\n",
     "        for reconcile_fn in self.reconcilers:\n",
@@ -144,6 +146,8 @@
     "df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/tourism.csv')\n",
     "df = df.rename({'Trips': 'y', 'Quarter': 'ds'}, axis=1)\n",
     "df.insert(0, 'Country', 'Australia')\n",
+    "\n",
+    "# non strictly hierarchical structure\n",
     "hiers_grouped = [\n",
     "    ['Country'],\n",
     "    ['Country', 'State'], \n",
@@ -152,7 +156,16 @@
     "    ['Country', 'State', 'Purpose'], \n",
     "    ['Country', 'State', 'Region', 'Purpose']\n",
     "]\n",
-    "hier_df, S, tags = hierarchize(df, hiers_grouped)"
+    "# strictly hierarchical structure\n",
+    "hiers_strictly = [\n",
+    "    ['Country'],\n",
+    "    ['Country', 'State'], \n",
+    "    ['Country', 'State', 'Region'], \n",
+    "]\n",
+    "\n",
+    "# getting df\n",
+    "hier_grouped_df, S_grouped, tags_grouped = hierarchize(df, hiers_grouped)\n",
+    "hier_strict_df, S_strict, tags_strict = hierarchize(df, hiers_strictly)"
    ]
   },
   {
@@ -162,11 +175,47 @@
    "outputs": [],
    "source": [
     "#hide\n",
-    "hier_df['y_model'] = hier_df['y']\n",
+    "hier_grouped_df['y_model'] = hier_grouped_df['y']\n",
     "# we should be able to recover y using the methods\n",
-    "hier_df_h = hier_df.groupby('unique_id').tail(12)\n",
-    "ds_h = hier_df_h['ds'].unique()\n",
-    "hier_df = hier_df.query('~(ds in @ds_h)')"
+    "hier_grouped_df_h = hier_grouped_df.groupby('unique_id').tail(12)\n",
+    "ds_h = hier_grouped_df_h['ds'].unique()\n",
+    "hier_grouped_df = hier_grouped_df.query('~(ds in @ds_h)')\n",
+    "\n",
+    "#hierachical reconciliation\n",
+    "hrec = HierarchicalReconciliation(reconcilers=[\n",
+    "    #these methods should reconstruct the original y\n",
+    "    BottomUp(),\n",
+    "    MinTrace(method='ols'),\n",
+    "    MinTrace(method='wls_struct'),\n",
+    "    MinTrace(method='wls_var'),\n",
+    "    MinTrace(method='mint_shrink'),\n",
+    "    # ERM recovers but needs bigger eps\n",
+    "    ERM(method='exact'),\n",
+    "])\n",
+    "reconciled = hrec.reconcile(hier_grouped_df_h, hier_grouped_df, S_grouped, tags_grouped)\n",
+    "for model in reconciled.drop(columns=['ds', 'y']).columns:\n",
+    "    if 'ERM' in model:\n",
+    "        eps = 3\n",
+    "    else:\n",
+    "        eps = 1e-5\n",
+    "    test_close(reconciled['y'], reconciled[model], eps=eps)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#hide\n",
+    "# top down should break\n",
+    "# with non strictly hierarchical structures\n",
+    "hrec = HierarchicalReconciliation([TopDown(method='average_proportions')])\n",
+    "test_fail(\n",
+    "    hrec.reconcile,\n",
+    "    contains='requires strictly hierarchical structures',\n",
+    "    args=(hier_grouped_df_h, hier_grouped_df, S_grouped, tags_grouped)\n",
+    ")"
    ]
   },
   {
@@ -176,17 +225,52 @@
    "outputs": [],
    "source": [
     "#hide\n",
+    "# methods should work with\n",
+    "# srtictly hierarchical structures\n",
+    "#hide\n",
+    "hier_strict_df['y_model'] = hier_strict_df['y']\n",
+    "# we should be able to recover y using the methods\n",
+    "hier_strict_df_h = hier_strict_df.groupby('unique_id').tail(12)\n",
+    "ds_h = hier_strict_df_h['ds'].unique()\n",
+    "hier_strict_df = hier_strict_df.query('~(ds in @ds_h)')\n",
+    "\n",
+    "# hierarchical reconciliation\n",
     "hrec = HierarchicalReconciliation(reconcilers=[\n",
     "    #these methods should reconstruct the original y\n",
     "    BottomUp(),\n",
     "    MinTrace(method='ols'),\n",
     "    MinTrace(method='wls_struct'),\n",
     "    MinTrace(method='wls_var'),\n",
-    "    MinTrace(method='mint_shrink')\n",
+    "    MinTrace(method='mint_shrink'),\n",
+    "    # top down doesnt recover the original y\n",
+    "    # but it should recover the total level\n",
+    "    TopDown(method='forecast_proportions'),\n",
+    "    TopDown(method='average_proportions'),\n",
+    "    TopDown(method='proportion_averages'),\n",
+    "    # ERM recovers but needs bigger eps\n",
+    "    ERM(method='exact'),\n",
     "])\n",
-    "reconciled = hrec.reconcile(hier_df_h, hier_df, S)\n",
+    "reconciled = hrec.reconcile(hier_strict_df_h, hier_strict_df, S_strict, tags_strict)\n",
     "for model in reconciled.drop(columns=['ds', 'y']).columns:\n",
-    "    test_close(reconciled['y'], reconciled[model])"
+    "    if 'ERM' in model:\n",
+    "        eps = 3\n",
+    "    else:\n",
+    "        eps = 1e-5\n",
+    "    if 'TopDown' in model:\n",
+    "        if 'forecast_proportions' in model:\n",
+    "            test_close(reconciled['y'], reconciled[model], eps)\n",
+    "        else:\n",
+    "            # top down doesnt recover the original y\n",
+    "            test_fail(\n",
+    "                test_close,\n",
+    "                args=(reconciled['y'], reconciled[model], eps),\n",
+    "            )\n",
+    "        # but it should recover the total level\n",
+    "        total_tag = tags_strict['Country']\n",
+    "        test_close(reconciled['y'].loc[total_tag], \n",
+    "                   reconciled[model].loc[total_tag], 1e-2)\n",
+    "    else:\n",
+    "        test_close(reconciled['y'], reconciled[model], eps)"
    ]
   },
   {
@@ -200,7 +284,7 @@
     "#even if their signature includes\n",
     "#that argument\n",
     "hrec = HierarchicalReconciliation([MinTrace(method='ols')])\n",
-    "reconciled = hrec.reconcile(hier_df_h, hier_df.drop(columns=['y_model']), S)\n",
+    "reconciled = hrec.reconcile(hier_grouped_df_h, hier_grouped_df.drop(columns=['y_model']), S_grouped, tags_grouped)\n",
     "for model in reconciled.drop(columns=['ds', 'y']).columns:\n",
     "    test_close(reconciled['y'], reconciled[model])"
    ]
@@ -212,7 +296,7 @@
    "outputs": [],
    "source": [
     "#hide\n",
-    "reconciled.loc[tags['Country/State']]"
+    "reconciled.loc[tags_grouped['Country/State']]"
    ]
   },
   {
@@ -292,7 +376,7 @@
     "evaluator = HierarchicalEvaluation([mse, rmse])\n",
     "evaluator.evaluate(Y_h=reconciled.drop(columns='y'), \n",
     "                   Y_test=reconciled[['ds', 'y']], \n",
-    "                   tags=tags,\n",
+    "                   tags=tags_grouped,\n",
     "                   benchmark='y_model')"
    ]
   }