Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEAT] Probabilistic Forecasting Util Functions #195

Merged
merged 4 commits into from
Jun 6, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion hierarchicalforecast/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,8 @@
'hierarchicalforecast/utils.py'),
'hierarchicalforecast.utils.HierarchicalPlot.plot_summing_matrix': ( 'utils.html#hierarchicalplot.plot_summing_matrix',
'hierarchicalforecast/utils.py'),
'hierarchicalforecast.utils._to_quantiles_df': ( 'utils.html#_to_quantiles_df',
'hierarchicalforecast/utils.py'),
'hierarchicalforecast.utils._to_summing_dataframe': ( 'utils.html#_to_summing_dataframe',
'hierarchicalforecast/utils.py'),
'hierarchicalforecast.utils._to_summing_matrix': ( 'utils.html#_to_summing_matrix',
Expand All @@ -181,5 +183,9 @@
'hierarchicalforecast.utils.cov2corr': ('utils.html#cov2corr', 'hierarchicalforecast/utils.py'),
'hierarchicalforecast.utils.is_strictly_hierarchical': ( 'utils.html#is_strictly_hierarchical',
'hierarchicalforecast/utils.py'),
'hierarchicalforecast.utils.level_to_outputs': ( 'utils.html#level_to_outputs',
'hierarchicalforecast/utils.py'),
'hierarchicalforecast.utils.numpy_balance': ( 'utils.html#numpy_balance',
'hierarchicalforecast/utils.py')}}}
'hierarchicalforecast/utils.py'),
'hierarchicalforecast.utils.quantiles_to_outputs': ( 'utils.html#quantiles_to_outputs',
'hierarchicalforecast/utils.py')}}}
74 changes: 71 additions & 3 deletions hierarchicalforecast/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,75 @@ def cov2corr(cov, return_std=False):
else:
return corr

# %% ../nbs/utils.ipynb 7
# convert levels to output quantile names
def level_to_outputs(level):
dluuo marked this conversation as resolved.
Show resolved Hide resolved
qs = sum([[50-l/2, 50+l/2] for l in level], [])
output_names = sum([[f'-lo-{l}', f'-hi-{l}'] for l in level], [])

sort_idx = np.argsort(qs)
quantiles = np.array(qs)[sort_idx]

# Add default median
quantiles = np.concatenate([np.array([50]), quantiles])
kdgutier marked this conversation as resolved.
Show resolved Hide resolved
quantiles = torch.Tensor(quantiles) / 100
dluuo marked this conversation as resolved.
Show resolved Hide resolved
output_names = list(np.array(output_names)[sort_idx])
output_names.insert(0, '-median')

return quantiles, output_names

# convert quantiles to output quantile names
def quantiles_to_outputs(quantiles):
dluuo marked this conversation as resolved.
Show resolved Hide resolved
output_names = []
for q in quantiles:
if q<.50:
output_names.append(f'-lo-{np.round(100-200*q,2)}')
elif q>.50:
output_names.append(f'-hi-{np.round(100-200*(1-q),2)}')
else:
output_names.append('-median')
return quantiles, output_names

# %% ../nbs/utils.ipynb 8
# given input array of sample forecasts and inptut quantiles/levels,
# output a Pandas Dataframe with columns of quantile predictions
def _to_quantiles_df(samples,
dluuo marked this conversation as resolved.
Show resolved Hide resolved
unique_ids,
dates,
quantiles = None,
level = None,
model_name = "model"):

# Get the shape of the array
N, S, H = samples.shape
dluuo marked this conversation as resolved.
Show resolved Hide resolved

assert N == len(unique_ids)
assert H == len(dates)
assert (quantiles is not None) ^ (level is not None) #check exactly one of quantiles/levels has been input

#create initial dictionary
forecasts_mean = np.mean(forecasts, axis=1).flatten()
unique_ids = np.repeat(unique_ids, H)
ds = np.tile(dates, N)
data = pd.DataFrame({"unique_id":unique_ids, "ds":ds, model_name:forecasts_mean})

#create quantiles and quantile names
quantiles, quantile_names = level_to_outputs(level) if level is not None else quantiles_to_outputs(quantiles)
percentiles = quantiles * 100
col_names = np.array([model_name + quantile_name for quantile_name in quantile_names])

#add quantiles to dataframe
forecasts_quantiles = np.percentile(forecasts, percentiles, axis=1)

forecasts_quantiles = np.transpose(forecasts_quantiles, (1,2,0)) # [Q,H,N] -> [N,H,Q]
forecasts_quantiles = forecasts_quantiles.reshape(-1,len(quantiles))

df = pd.DataFrame(data=forecasts_quantiles,
columns=col_names)

return quantiles, pd.concat([data,df], axis=1).set_index('unique_id')

# %% ../nbs/utils.ipynb 10
def _to_summing_matrix(S_df: pd.DataFrame):
"""Transforms the DataFrame `df` of hierarchies to a summing matrix S."""
categories = [S_df[col].unique() for col in S_df.columns]
Expand All @@ -81,7 +149,7 @@ def _to_summing_matrix(S_df: pd.DataFrame):
tags = dict(zip(S_df.columns, categories))
return S, tags

# %% ../nbs/utils.ipynb 9
# %% ../nbs/utils.ipynb 11
def aggregate_before(df: pd.DataFrame,
spec: List[List[str]],
agg_fn: Callable = np.sum):
Expand Down Expand Up @@ -123,7 +191,7 @@ def aggregate_before(df: pd.DataFrame,
S, tags = _to_summing_matrix(S_df.loc[bottom_hier, hiers_cols])
return Y_df, S, tags

# %% ../nbs/utils.ipynb 10
# %% ../nbs/utils.ipynb 12
def numpy_balance(*arrs):
"""
Fast NumPy implementation of balance function.
Expand Down Expand Up @@ -248,7 +316,7 @@ def aggregate(df: pd.DataFrame,
Y_df = Y_df.set_index('unique_id')
return Y_df, S_df, tags

# %% ../nbs/utils.ipynb 16
# %% ../nbs/utils.ipynb 18
class HierarchicalPlot:
""" Hierarchical Plot

Expand Down
86 changes: 86 additions & 0 deletions nbs/utils.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,92 @@
" return corr"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0665290c",
"metadata": {},
"outputs": [],
"source": [
"#| exporti\n",
"\n",
"# convert levels to output quantile names\n",
"def level_to_outputs(level):\n",
" qs = sum([[50-l/2, 50+l/2] for l in level], [])\n",
" output_names = sum([[f'-lo-{l}', f'-hi-{l}'] for l in level], [])\n",
"\n",
" sort_idx = np.argsort(qs)\n",
" quantiles = np.array(qs)[sort_idx]\n",
"\n",
" # Add default median\n",
" quantiles = np.concatenate([np.array([50]), quantiles])\n",
" quantiles = torch.Tensor(quantiles) / 100\n",
" output_names = list(np.array(output_names)[sort_idx])\n",
" output_names.insert(0, '-median')\n",
" \n",
" return quantiles, output_names\n",
"\n",
"# convert quantiles to output quantile names\n",
"def quantiles_to_outputs(quantiles):\n",
" output_names = []\n",
" for q in quantiles:\n",
" if q<.50:\n",
" output_names.append(f'-lo-{np.round(100-200*q,2)}')\n",
" elif q>.50:\n",
" output_names.append(f'-hi-{np.round(100-200*(1-q),2)}')\n",
" else:\n",
" output_names.append('-median')\n",
" return quantiles, output_names"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d4ffbe55",
"metadata": {},
"outputs": [],
"source": [
"#| exporti\n",
"\n",
"# given input array of sample forecasts and inptut quantiles/levels, \n",
"# output a Pandas Dataframe with columns of quantile predictions\n",
"def _to_quantiles_df(samples, \n",
" unique_ids, \n",
" dates, \n",
" quantiles = None,\n",
" level = None, \n",
" model_name = \"model\"):\n",
" \n",
" # Get the shape of the array\n",
" N, S, H = samples.shape\n",
"\n",
" assert N == len(unique_ids)\n",
" assert H == len(dates)\n",
" assert (quantiles is not None) ^ (level is not None) #check exactly one of quantiles/levels has been input\n",
"\n",
" #create initial dictionary\n",
" forecasts_mean = np.mean(forecasts, axis=1).flatten()\n",
" unique_ids = np.repeat(unique_ids, H)\n",
" ds = np.tile(dates, N)\n",
" data = pd.DataFrame({\"unique_id\":unique_ids, \"ds\":ds, model_name:forecasts_mean})\n",
"\n",
" #create quantiles and quantile names\n",
" quantiles, quantile_names = level_to_outputs(level) if level is not None else quantiles_to_outputs(quantiles)\n",
" percentiles = quantiles * 100\n",
" col_names = np.array([model_name + quantile_name for quantile_name in quantile_names])\n",
" \n",
" #add quantiles to dataframe\n",
" forecasts_quantiles = np.percentile(forecasts, percentiles, axis=1)\n",
"\n",
" forecasts_quantiles = np.transpose(forecasts_quantiles, (1,2,0)) # [Q,H,N] -> [N,H,Q]\n",
" forecasts_quantiles = forecasts_quantiles.reshape(-1,len(quantiles))\n",
"\n",
" df = pd.DataFrame(data=forecasts_quantiles, \n",
" columns=col_names)\n",
" \n",
" return quantiles, pd.concat([data,df], axis=1).set_index('unique_id')"
]
},
{
"cell_type": "markdown",
"id": "3a1f4267",
Expand Down