Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIX/FEAT] Add strictly hierarchical test for top down methods #9

Merged
merged 7 commits into from
Jul 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions hierarchicalforecast/_nbdev.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"HierarchicalEvaluation": "core.ipynb",
"bottom_up": "methods.ipynb",
"BottomUp": "methods.ipynb",
"is_strictly_hierarchical": "methods.ipynb",
"top_down": "methods.ipynb",
"TopDown": "methods.ipynb",
"crossprod": "methods.ipynb",
Expand Down
6 changes: 4 additions & 2 deletions hierarchicalforecast/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ class HierarchicalReconciliation:
def __init__(self, reconcilers: List[Callable]):
self.reconcilers = reconcilers

def reconcile(self, Y_h: pd.DataFrame, Y_df: pd.DataFrame, S: pd.DataFrame):
def reconcile(self, Y_h: pd.DataFrame, Y_df: pd.DataFrame, S: pd.DataFrame,
tags: Dict[str, np.ndarray]):
"""Reconcile base forecasts.

Parameters
Expand All @@ -49,7 +50,8 @@ def reconcile(self, Y_h: pd.DataFrame, Y_df: pd.DataFrame, S: pd.DataFrame):
common_vals = dict(
y = Y_df.pivot(columns='ds', values='y').loc[uids].values,
S = S_.values,
idx_bottom = [S_.index.get_loc(col) for col in S.columns]
idx_bottom = S_.index.get_indexer(S.columns),
levels={key: S_.index.get_indexer(val) for key, val in tags.items()}
)
fcsts = Y_h.copy()
for reconcile_fn in self.reconcilers:
Expand Down
80 changes: 73 additions & 7 deletions hierarchicalforecast/methods.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/methods.ipynb (unless otherwise specified).

__all__ = ['bottom_up', 'BottomUp', 'top_down', 'TopDown', 'crossprod', 'min_trace', 'MinTrace', 'erm', 'ERM']
__all__ = ['bottom_up', 'BottomUp', 'is_strictly_hierarchical', 'top_down', 'TopDown', 'crossprod', 'min_trace',
'MinTrace', 'erm', 'ERM']

# Cell
from typing import List
from typing import Dict, List

import numpy as np
from statsmodels.stats.moment_helpers import cov2corr
Expand Down Expand Up @@ -37,18 +38,83 @@ def reconcile(self,

__call__ = reconcile

# Cell
def is_strictly_hierarchical(S: np.ndarray,
levels: Dict[str, np.ndarray]):
# main idea:
# if S represents a strictly hierarchical structure
# the number of paths before the bottom level
# should be equal to the number of nodes
# of the previuos level
levels_ = dict(sorted(levels.items(), key=lambda x: len(x[1])))
# removing bottom level
levels_.popitem()
# making S categorical
hiers = [np.argmax(S[idx], axis=0) + 1 for _, idx in levels_.items()]
hiers = np.vstack(hiers)
paths = np.unique(hiers, axis=1).shape[1]
nodes = levels_.popitem()[1].size
return paths == nodes

# Cell
def _get_nodes(S: np.ndarray, levels: Dict[str, np.ndarray]):
childs = {}
level_names = list(levels.keys())
nodes = {}
for i_level, level in enumerate(level_names[:-1]):
parent = levels[level]
child = np.zeros_like(S)
idx_child = levels[level_names[i_level+1]]
child[idx_child] = S[idx_child]
nodes_level = {}
for idx_parent_node in parent:
parent_node = S[idx_parent_node]
idx_node = child * parent_node.astype(bool)
idx_node, = np.where(idx_node.sum(axis=1) > 0)
nodes_level[idx_parent_node] = idx_node
nodes[level] = nodes_level
return nodes

# Cell
def _reconcile_fcst_proportions(S: np.ndarray, y_hat: np.ndarray,
levels: Dict[str, np.ndarray],
nodes: Dict[str, Dict[int, np.ndarray]],
idx_top: int):
reconciled = np.zeros_like(y_hat)
reconciled[idx_top] = y_hat[idx_top]
level_names = list(levels.keys())
for i_level, level in enumerate(level_names[:-1]):
nodes_level = nodes[level]
for idx_parent, idx_childs in nodes_level.items():
fcst_parent = reconciled[idx_parent]
childs_sum = y_hat[idx_childs].sum()
for idx_child in idx_childs:
reconciled[idx_child] = y_hat[idx_child] * fcst_parent / childs_sum
return reconciled

# Cell
def top_down(S: np.ndarray,
y_hat: np.ndarray,
y: np.ndarray,
idx_bottom: List[int],
levels: Dict[str, np.ndarray],
method: str):
if not is_strictly_hierarchical(S, levels):
raise ValueError('Top down reconciliation requires strictly hierarchical structures.')

n_hiers, n_bottom = S.shape
idx_top = int(S.sum(axis=1).argmax())
#add strictly hierarchical assert
levels_ = dict(sorted(levels.items(), key=lambda x: len(x[1])))
idx_bottom = levels_[list(levels_)[-1]]

if method == 'forecast_proportions':
raise NotImplementedError(f'Method {method} not implemented yet')
nodes = _get_nodes(S=S, levels=levels)
reconciled = [_reconcile_fcst_proportions(S=S, y_hat=y_hat_[:, None],
levels=levels_,
nodes=nodes,
idx_top=idx_top) \
for y_hat_ in y_hat.T]
reconciled = np.hstack(reconciled)
return reconciled
else:
y_top = y[idx_top]
y_btm = y[idx_bottom]
Expand All @@ -73,9 +139,9 @@ def reconcile(self,
S: np.ndarray,
y_hat: np.ndarray,
y: np.ndarray,
idx_bottom: List[int]):
levels: Dict[str, np.ndarray],):
return top_down(S=S, y_hat=y_hat, y=y,
idx_bottom=idx_bottom,
levels=levels,
method=self.method)

__call__ = reconcile
Expand Down
112 changes: 98 additions & 14 deletions nbs/core.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
"outputs": [],
"source": [
"#hide\n",
"from fastcore.test import test_close"
"from fastcore.test import test_close, test_fail"
]
},
{
Expand Down Expand Up @@ -71,7 +71,8 @@
" def __init__(self, reconcilers: List[Callable]):\n",
" self.reconcilers = reconcilers\n",
" \n",
" def reconcile(self, Y_h: pd.DataFrame, Y_df: pd.DataFrame, S: pd.DataFrame):\n",
" def reconcile(self, Y_h: pd.DataFrame, Y_df: pd.DataFrame, S: pd.DataFrame,\n",
" tags: Dict[str, np.ndarray]):\n",
" \"\"\"Reconcile base forecasts.\n",
" \n",
" Parameters\n",
Expand All @@ -95,7 +96,8 @@
" common_vals = dict(\n",
" y = Y_df.pivot(columns='ds', values='y').loc[uids].values,\n",
" S = S_.values,\n",
" idx_bottom = [S_.index.get_loc(col) for col in S.columns]\n",
" idx_bottom = S_.index.get_indexer(S.columns),\n",
" levels={key: S_.index.get_indexer(val) for key, val in tags.items()}\n",
" )\n",
" fcsts = Y_h.copy()\n",
" for reconcile_fn in self.reconcilers:\n",
Expand Down Expand Up @@ -144,6 +146,8 @@
"df = pd.read_csv('https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/tourism.csv')\n",
"df = df.rename({'Trips': 'y', 'Quarter': 'ds'}, axis=1)\n",
"df.insert(0, 'Country', 'Australia')\n",
"\n",
"# non strictly hierarchical structure\n",
"hiers_grouped = [\n",
" ['Country'],\n",
" ['Country', 'State'], \n",
Expand All @@ -152,7 +156,16 @@
" ['Country', 'State', 'Purpose'], \n",
" ['Country', 'State', 'Region', 'Purpose']\n",
"]\n",
"hier_df, S, tags = hierarchize(df, hiers_grouped)"
"# strictly hierarchical structure\n",
"hiers_strictly = [\n",
" ['Country'],\n",
" ['Country', 'State'], \n",
" ['Country', 'State', 'Region'], \n",
"]\n",
"\n",
"# getting df\n",
"hier_grouped_df, S_grouped, tags_grouped = hierarchize(df, hiers_grouped)\n",
"hier_strict_df, S_strict, tags_strict = hierarchize(df, hiers_strictly)"
]
},
{
Expand All @@ -162,11 +175,47 @@
"outputs": [],
"source": [
"#hide\n",
"hier_df['y_model'] = hier_df['y']\n",
"hier_grouped_df['y_model'] = hier_grouped_df['y']\n",
"# we should be able to recover y using the methods\n",
"hier_df_h = hier_df.groupby('unique_id').tail(12)\n",
"ds_h = hier_df_h['ds'].unique()\n",
"hier_df = hier_df.query('~(ds in @ds_h)')"
"hier_grouped_df_h = hier_grouped_df.groupby('unique_id').tail(12)\n",
"ds_h = hier_grouped_df_h['ds'].unique()\n",
"hier_grouped_df = hier_grouped_df.query('~(ds in @ds_h)')\n",
"\n",
"#hierachical reconciliation\n",
"hrec = HierarchicalReconciliation(reconcilers=[\n",
" #these methods should reconstruct the original y\n",
" BottomUp(),\n",
" MinTrace(method='ols'),\n",
" MinTrace(method='wls_struct'),\n",
" MinTrace(method='wls_var'),\n",
" MinTrace(method='mint_shrink'),\n",
" # ERM recovers but needs bigger eps\n",
" ERM(method='exact'),\n",
"])\n",
"reconciled = hrec.reconcile(hier_grouped_df_h, hier_grouped_df, S_grouped, tags_grouped)\n",
"for model in reconciled.drop(columns=['ds', 'y']).columns:\n",
" if 'ERM' in model:\n",
" eps = 3\n",
" else:\n",
" eps = 1e-5\n",
" test_close(reconciled['y'], reconciled[model], eps=eps)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#hide\n",
"# top down should break\n",
"# with non strictly hierarchical structures\n",
"hrec = HierarchicalReconciliation([TopDown(method='average_proportions')])\n",
"test_fail(\n",
" hrec.reconcile,\n",
" contains='requires strictly hierarchical structures',\n",
" args=(hier_grouped_df_h, hier_grouped_df, S_grouped, tags_grouped)\n",
")"
]
},
{
Expand All @@ -176,17 +225,52 @@
"outputs": [],
"source": [
"#hide\n",
"# methods should work with\n",
"# srtictly hierarchical structures\n",
"#hide\n",
"hier_strict_df['y_model'] = hier_strict_df['y']\n",
"# we should be able to recover y using the methods\n",
"hier_strict_df_h = hier_strict_df.groupby('unique_id').tail(12)\n",
"ds_h = hier_strict_df_h['ds'].unique()\n",
"hier_strict_df = hier_strict_df.query('~(ds in @ds_h)')\n",
"\n",
"# hierarchical reconciliation\n",
"hrec = HierarchicalReconciliation(reconcilers=[\n",
" #these methods should reconstruct the original y\n",
" BottomUp(),\n",
" MinTrace(method='ols'),\n",
" MinTrace(method='wls_struct'),\n",
" MinTrace(method='wls_var'),\n",
" MinTrace(method='mint_shrink')\n",
" MinTrace(method='mint_shrink'),\n",
" # top down doesnt recover the original y\n",
" # but it should recover the total level\n",
" TopDown(method='forecast_proportions'),\n",
" TopDown(method='average_proportions'),\n",
" TopDown(method='proportion_averages'),\n",
" # ERM recovers but needs bigger eps\n",
" ERM(method='exact'),\n",
"])\n",
"reconciled = hrec.reconcile(hier_df_h, hier_df, S)\n",
"reconciled = hrec.reconcile(hier_strict_df_h, hier_strict_df, S_strict, tags_strict)\n",
"for model in reconciled.drop(columns=['ds', 'y']).columns:\n",
" test_close(reconciled['y'], reconciled[model])"
" if 'ERM' in model:\n",
" eps = 3\n",
" else:\n",
" eps = 1e-5\n",
" if 'TopDown' in model:\n",
" if 'forecast_proportions' in model:\n",
" test_close(reconciled['y'], reconciled[model], eps)\n",
" else:\n",
" # top down doesnt recover the original y\n",
" test_fail(\n",
" test_close,\n",
" args=(reconciled['y'], reconciled[model], eps),\n",
" )\n",
" # but it should recover the total level\n",
" total_tag = tags_strict['Country']\n",
" test_close(reconciled['y'].loc[total_tag], \n",
" reconciled[model].loc[total_tag], 1e-2)\n",
" else:\n",
" test_close(reconciled['y'], reconciled[model], eps)"
]
},
{
Expand All @@ -200,7 +284,7 @@
"#even if their signature includes\n",
"#that argument\n",
"hrec = HierarchicalReconciliation([MinTrace(method='ols')])\n",
"reconciled = hrec.reconcile(hier_df_h, hier_df.drop(columns=['y_model']), S)\n",
"reconciled = hrec.reconcile(hier_grouped_df_h, hier_grouped_df.drop(columns=['y_model']), S_grouped, tags_grouped)\n",
"for model in reconciled.drop(columns=['ds', 'y']).columns:\n",
" test_close(reconciled['y'], reconciled[model])"
]
Expand All @@ -212,7 +296,7 @@
"outputs": [],
"source": [
"#hide\n",
"reconciled.loc[tags['Country/State']]"
"reconciled.loc[tags_grouped['Country/State']]"
]
},
{
Expand Down Expand Up @@ -292,7 +376,7 @@
"evaluator = HierarchicalEvaluation([mse, rmse])\n",
"evaluator.evaluate(Y_h=reconciled.drop(columns='y'), \n",
" Y_test=reconciled[['ds', 'y']], \n",
" tags=tags,\n",
" tags=tags_grouped,\n",
" benchmark='y_model')"
]
}
Expand Down
Loading