From 5f733d9b14de89185a1a448df03e0a3caea7c29c Mon Sep 17 00:00:00 2001 From: David Luo Date: Fri, 2 Jun 2023 09:08:29 -0700 Subject: [PATCH 1/4] added probabilistic forecasting util functions --- hierarchicalforecast/_modidx.py | 8 ++- hierarchicalforecast/utils.py | 74 ++++++++++++++++++++++++++-- nbs/utils.ipynb | 86 +++++++++++++++++++++++++++++++++ 3 files changed, 164 insertions(+), 4 deletions(-) diff --git a/hierarchicalforecast/_modidx.py b/hierarchicalforecast/_modidx.py index e51d790a..d415cca0 100644 --- a/hierarchicalforecast/_modidx.py +++ b/hierarchicalforecast/_modidx.py @@ -170,6 +170,8 @@ 'hierarchicalforecast/utils.py'), 'hierarchicalforecast.utils.HierarchicalPlot.plot_summing_matrix': ( 'utils.html#hierarchicalplot.plot_summing_matrix', 'hierarchicalforecast/utils.py'), + 'hierarchicalforecast.utils._to_quantiles_df': ( 'utils.html#_to_quantiles_df', + 'hierarchicalforecast/utils.py'), 'hierarchicalforecast.utils._to_summing_dataframe': ( 'utils.html#_to_summing_dataframe', 'hierarchicalforecast/utils.py'), 'hierarchicalforecast.utils._to_summing_matrix': ( 'utils.html#_to_summing_matrix', @@ -181,5 +183,9 @@ 'hierarchicalforecast.utils.cov2corr': ('utils.html#cov2corr', 'hierarchicalforecast/utils.py'), 'hierarchicalforecast.utils.is_strictly_hierarchical': ( 'utils.html#is_strictly_hierarchical', 'hierarchicalforecast/utils.py'), + 'hierarchicalforecast.utils.level_to_outputs': ( 'utils.html#level_to_outputs', + 'hierarchicalforecast/utils.py'), 'hierarchicalforecast.utils.numpy_balance': ( 'utils.html#numpy_balance', - 'hierarchicalforecast/utils.py')}}} + 'hierarchicalforecast/utils.py'), + 'hierarchicalforecast.utils.quantiles_to_outputs': ( 'utils.html#quantiles_to_outputs', + 'hierarchicalforecast/utils.py')}}} diff --git a/hierarchicalforecast/utils.py b/hierarchicalforecast/utils.py index 8883739c..d3ddc371 100644 --- a/hierarchicalforecast/utils.py +++ b/hierarchicalforecast/utils.py @@ -68,7 +68,75 @@ def cov2corr(cov, return_std=False): else: return corr +# %% ../nbs/utils.ipynb 7 +# convert levels to output quantile names +def level_to_outputs(level): + qs = sum([[50-l/2, 50+l/2] for l in level], []) + output_names = sum([[f'-lo-{l}', f'-hi-{l}'] for l in level], []) + + sort_idx = np.argsort(qs) + quantiles = np.array(qs)[sort_idx] + + # Add default median + quantiles = np.concatenate([np.array([50]), quantiles]) + quantiles = torch.Tensor(quantiles) / 100 + output_names = list(np.array(output_names)[sort_idx]) + output_names.insert(0, '-median') + + return quantiles, output_names + +# convert quantiles to output quantile names +def quantiles_to_outputs(quantiles): + output_names = [] + for q in quantiles: + if q<.50: + output_names.append(f'-lo-{np.round(100-200*q,2)}') + elif q>.50: + output_names.append(f'-hi-{np.round(100-200*(1-q),2)}') + else: + output_names.append('-median') + return quantiles, output_names + # %% ../nbs/utils.ipynb 8 +# given input array of sample forecasts and inptut quantiles/levels, +# output a Pandas Dataframe with columns of quantile predictions +def _to_quantiles_df(samples, + unique_ids, + dates, + quantiles = None, + level = None, + model_name = "model"): + + # Get the shape of the array + N, S, H = samples.shape + + assert N == len(unique_ids) + assert H == len(dates) + assert (quantiles is not None) ^ (level is not None) #check exactly one of quantiles/levels has been input + + #create initial dictionary + forecasts_mean = np.mean(forecasts, axis=1).flatten() + unique_ids = np.repeat(unique_ids, H) + ds = np.tile(dates, N) + data = pd.DataFrame({"unique_id":unique_ids, "ds":ds, model_name:forecasts_mean}) + + #create quantiles and quantile names + quantiles, quantile_names = level_to_outputs(level) if level is not None else quantiles_to_outputs(quantiles) + percentiles = quantiles * 100 + col_names = np.array([model_name + quantile_name for quantile_name in quantile_names]) + + #add quantiles to dataframe + forecasts_quantiles = np.percentile(forecasts, percentiles, axis=1) + + forecasts_quantiles = np.transpose(forecasts_quantiles, (1,2,0)) # [Q,H,N] -> [N,H,Q] + forecasts_quantiles = forecasts_quantiles.reshape(-1,len(quantiles)) + + df = pd.DataFrame(data=forecasts_quantiles, + columns=col_names) + + return quantiles, pd.concat([data,df], axis=1).set_index('unique_id') + +# %% ../nbs/utils.ipynb 10 def _to_summing_matrix(S_df: pd.DataFrame): """Transforms the DataFrame `df` of hierarchies to a summing matrix S.""" categories = [S_df[col].unique() for col in S_df.columns] @@ -81,7 +149,7 @@ def _to_summing_matrix(S_df: pd.DataFrame): tags = dict(zip(S_df.columns, categories)) return S, tags -# %% ../nbs/utils.ipynb 9 +# %% ../nbs/utils.ipynb 11 def aggregate_before(df: pd.DataFrame, spec: List[List[str]], agg_fn: Callable = np.sum): @@ -123,7 +191,7 @@ def aggregate_before(df: pd.DataFrame, S, tags = _to_summing_matrix(S_df.loc[bottom_hier, hiers_cols]) return Y_df, S, tags -# %% ../nbs/utils.ipynb 10 +# %% ../nbs/utils.ipynb 12 def numpy_balance(*arrs): """ Fast NumPy implementation of balance function. @@ -248,7 +316,7 @@ def aggregate(df: pd.DataFrame, Y_df = Y_df.set_index('unique_id') return Y_df, S_df, tags -# %% ../nbs/utils.ipynb 16 +# %% ../nbs/utils.ipynb 18 class HierarchicalPlot: """ Hierarchical Plot diff --git a/nbs/utils.ipynb b/nbs/utils.ipynb index ac4dbe37..634934e7 100644 --- a/nbs/utils.ipynb +++ b/nbs/utils.ipynb @@ -132,6 +132,92 @@ " return corr" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "0665290c", + "metadata": {}, + "outputs": [], + "source": [ + "#| exporti\n", + "\n", + "# convert levels to output quantile names\n", + "def level_to_outputs(level):\n", + " qs = sum([[50-l/2, 50+l/2] for l in level], [])\n", + " output_names = sum([[f'-lo-{l}', f'-hi-{l}'] for l in level], [])\n", + "\n", + " sort_idx = np.argsort(qs)\n", + " quantiles = np.array(qs)[sort_idx]\n", + "\n", + " # Add default median\n", + " quantiles = np.concatenate([np.array([50]), quantiles])\n", + " quantiles = torch.Tensor(quantiles) / 100\n", + " output_names = list(np.array(output_names)[sort_idx])\n", + " output_names.insert(0, '-median')\n", + " \n", + " return quantiles, output_names\n", + "\n", + "# convert quantiles to output quantile names\n", + "def quantiles_to_outputs(quantiles):\n", + " output_names = []\n", + " for q in quantiles:\n", + " if q<.50:\n", + " output_names.append(f'-lo-{np.round(100-200*q,2)}')\n", + " elif q>.50:\n", + " output_names.append(f'-hi-{np.round(100-200*(1-q),2)}')\n", + " else:\n", + " output_names.append('-median')\n", + " return quantiles, output_names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4ffbe55", + "metadata": {}, + "outputs": [], + "source": [ + "#| exporti\n", + "\n", + "# given input array of sample forecasts and inptut quantiles/levels, \n", + "# output a Pandas Dataframe with columns of quantile predictions\n", + "def _to_quantiles_df(samples, \n", + " unique_ids, \n", + " dates, \n", + " quantiles = None,\n", + " level = None, \n", + " model_name = \"model\"):\n", + " \n", + " # Get the shape of the array\n", + " N, S, H = samples.shape\n", + "\n", + " assert N == len(unique_ids)\n", + " assert H == len(dates)\n", + " assert (quantiles is not None) ^ (level is not None) #check exactly one of quantiles/levels has been input\n", + "\n", + " #create initial dictionary\n", + " forecasts_mean = np.mean(forecasts, axis=1).flatten()\n", + " unique_ids = np.repeat(unique_ids, H)\n", + " ds = np.tile(dates, N)\n", + " data = pd.DataFrame({\"unique_id\":unique_ids, \"ds\":ds, model_name:forecasts_mean})\n", + "\n", + " #create quantiles and quantile names\n", + " quantiles, quantile_names = level_to_outputs(level) if level is not None else quantiles_to_outputs(quantiles)\n", + " percentiles = quantiles * 100\n", + " col_names = np.array([model_name + quantile_name for quantile_name in quantile_names])\n", + " \n", + " #add quantiles to dataframe\n", + " forecasts_quantiles = np.percentile(forecasts, percentiles, axis=1)\n", + "\n", + " forecasts_quantiles = np.transpose(forecasts_quantiles, (1,2,0)) # [Q,H,N] -> [N,H,Q]\n", + " forecasts_quantiles = forecasts_quantiles.reshape(-1,len(quantiles))\n", + "\n", + " df = pd.DataFrame(data=forecasts_quantiles, \n", + " columns=col_names)\n", + " \n", + " return quantiles, pd.concat([data,df], axis=1).set_index('unique_id')" + ] + }, { "cell_type": "markdown", "id": "3a1f4267", From d81dcbdab81eb857496a0d363939f9a084dd2fbc Mon Sep 17 00:00:00 2001 From: David Luo Date: Fri, 2 Jun 2023 18:03:51 -0700 Subject: [PATCH 2/4] added documentation --- hierarchicalforecast/utils.py | 41 ++++++++++++++++++++++++++++++----- nbs/utils.ipynb | 41 ++++++++++++++++++++++++++++++----- 2 files changed, 72 insertions(+), 10 deletions(-) diff --git a/hierarchicalforecast/utils.py b/hierarchicalforecast/utils.py index d3ddc371..f943c69f 100644 --- a/hierarchicalforecast/utils.py +++ b/hierarchicalforecast/utils.py @@ -71,6 +71,14 @@ def cov2corr(cov, return_std=False): # %% ../nbs/utils.ipynb 7 # convert levels to output quantile names def level_to_outputs(level): + """ Converts list of levels into output names matching StatsForecast and NeuralForecast methods. + + **Parameters:**
+ `level`: int list [0,100]. Probability levels for prediction intervals.
+ + **Returns:**
+ `output_names`: str list. String list with output column names. + """ qs = sum([[50-l/2, 50+l/2] for l in level], []) output_names = sum([[f'-lo-{l}', f'-hi-{l}'] for l in level], []) @@ -87,6 +95,14 @@ def level_to_outputs(level): # convert quantiles to output quantile names def quantiles_to_outputs(quantiles): + """Converts list of quantiles into output names matching StatsForecast and NeuralForecast methods. + + **Parameters:**
+ `quantiles`: float list [0., 1.]. Alternative to level, quantiles to estimate from y distribution.
+ + **Returns:**
+ `output_names`: str list. String list with output column names. + """ output_names = [] for q in quantiles: if q<.50: @@ -106,18 +122,33 @@ def _to_quantiles_df(samples, quantiles = None, level = None, model_name = "model"): + """ Transform Samples into HierarchicalForecast input. + Auxiliary function to create compatible HierarchicalForecast input Y_hat_df dataframe. + + **Parameters:**
+ `samples`: numpy array. Samples from forecast distribution of shape [n_series, n_samples, horizon].
+ `unique_ids`: string list. Unique identifiers for each time series.
+ `dates`: datetime list. List of forecast dates.
+ `quantiles`: float list [0., 1.]. Alternative to level, quantiles to estimate from y distribution.
+ `level`: int list [0,100]. Probability levels for prediction intervals.
+ `model_name`: string. Name of forecasting model.
+ + **Returns:**
+ `quantiles`: float list [0., 1.]. quantiles to estimate from y distribution .
+ `Y_hat_df`: pd.DataFrame. With base quantile forecasts with columns ds and models to reconcile indexed by unique_id. + """ # Get the shape of the array - N, S, H = samples.shape + n_series, n_samples, horizon = samples.shape - assert N == len(unique_ids) - assert H == len(dates) + assert n_series == len(unique_ids) + assert horizon == len(dates) assert (quantiles is not None) ^ (level is not None) #check exactly one of quantiles/levels has been input #create initial dictionary forecasts_mean = np.mean(forecasts, axis=1).flatten() - unique_ids = np.repeat(unique_ids, H) - ds = np.tile(dates, N) + unique_ids = np.repeat(unique_ids, horizon) + ds = np.tile(dates, n_series) data = pd.DataFrame({"unique_id":unique_ids, "ds":ds, model_name:forecasts_mean}) #create quantiles and quantile names diff --git a/nbs/utils.ipynb b/nbs/utils.ipynb index 634934e7..c40b5c9f 100644 --- a/nbs/utils.ipynb +++ b/nbs/utils.ipynb @@ -143,6 +143,14 @@ "\n", "# convert levels to output quantile names\n", "def level_to_outputs(level):\n", + " \"\"\" Converts list of levels into output names matching StatsForecast and NeuralForecast methods.\n", + "\n", + " **Parameters:**
\n", + " `level`: int list [0,100]. Probability levels for prediction intervals.
\n", + "\n", + " **Returns:**
\n", + " `output_names`: str list. String list with output column names.\n", + " \"\"\"\n", " qs = sum([[50-l/2, 50+l/2] for l in level], [])\n", " output_names = sum([[f'-lo-{l}', f'-hi-{l}'] for l in level], [])\n", "\n", @@ -159,6 +167,14 @@ "\n", "# convert quantiles to output quantile names\n", "def quantiles_to_outputs(quantiles):\n", + " \"\"\"Converts list of quantiles into output names matching StatsForecast and NeuralForecast methods.\n", + "\n", + " **Parameters:**
\n", + " `quantiles`: float list [0., 1.]. Alternative to level, quantiles to estimate from y distribution.
\n", + "\n", + " **Returns:**
\n", + " `output_names`: str list. String list with output column names.\n", + " \"\"\"\n", " output_names = []\n", " for q in quantiles:\n", " if q<.50:\n", @@ -187,18 +203,33 @@ " quantiles = None,\n", " level = None, \n", " model_name = \"model\"):\n", + " \"\"\" Transform Samples into HierarchicalForecast input.\n", + " Auxiliary function to create compatible HierarchicalForecast input Y_hat_df dataframe.\n", + "\n", + " **Parameters:**
\n", + " `samples`: numpy array. Samples from forecast distribution of shape [n_series, n_samples, horizon].
\n", + " `unique_ids`: string list. Unique identifiers for each time series.
\n", + " `dates`: datetime list. List of forecast dates.
\n", + " `quantiles`: float list [0., 1.]. Alternative to level, quantiles to estimate from y distribution.
\n", + " `level`: int list [0,100]. Probability levels for prediction intervals.
\n", + " `model_name`: string. Name of forecasting model.
\n", + "\n", + " **Returns:**
\n", + " `quantiles`: float list [0., 1.]. quantiles to estimate from y distribution .
\n", + " `Y_hat_df`: pd.DataFrame. With base quantile forecasts with columns ds and models to reconcile indexed by unique_id.\n", + " \"\"\"\n", " \n", " # Get the shape of the array\n", - " N, S, H = samples.shape\n", + " n_series, n_samples, horizon = samples.shape\n", "\n", - " assert N == len(unique_ids)\n", - " assert H == len(dates)\n", + " assert n_series == len(unique_ids)\n", + " assert horizon == len(dates)\n", " assert (quantiles is not None) ^ (level is not None) #check exactly one of quantiles/levels has been input\n", "\n", " #create initial dictionary\n", " forecasts_mean = np.mean(forecasts, axis=1).flatten()\n", - " unique_ids = np.repeat(unique_ids, H)\n", - " ds = np.tile(dates, N)\n", + " unique_ids = np.repeat(unique_ids, horizon)\n", + " ds = np.tile(dates, n_series)\n", " data = pd.DataFrame({\"unique_id\":unique_ids, \"ds\":ds, model_name:forecasts_mean})\n", "\n", " #create quantiles and quantile names\n", From f1e3de9951129c617b021ce3274b3d288efbcd71 Mon Sep 17 00:00:00 2001 From: David Luo Date: Fri, 2 Jun 2023 22:25:57 -0700 Subject: [PATCH 3/4] improved documentaion and changed util function name --- hierarchicalforecast/_modidx.py | 6 +++--- hierarchicalforecast/utils.py | 30 +++++++++++++++--------------- nbs/utils.ipynb | 32 +++++++++++++++++++++----------- 3 files changed, 39 insertions(+), 29 deletions(-) diff --git a/hierarchicalforecast/_modidx.py b/hierarchicalforecast/_modidx.py index d415cca0..771e7ce0 100644 --- a/hierarchicalforecast/_modidx.py +++ b/hierarchicalforecast/_modidx.py @@ -170,8 +170,6 @@ 'hierarchicalforecast/utils.py'), 'hierarchicalforecast.utils.HierarchicalPlot.plot_summing_matrix': ( 'utils.html#hierarchicalplot.plot_summing_matrix', 'hierarchicalforecast/utils.py'), - 'hierarchicalforecast.utils._to_quantiles_df': ( 'utils.html#_to_quantiles_df', - 'hierarchicalforecast/utils.py'), 'hierarchicalforecast.utils._to_summing_dataframe': ( 'utils.html#_to_summing_dataframe', 'hierarchicalforecast/utils.py'), 'hierarchicalforecast.utils._to_summing_matrix': ( 'utils.html#_to_summing_matrix', @@ -188,4 +186,6 @@ 'hierarchicalforecast.utils.numpy_balance': ( 'utils.html#numpy_balance', 'hierarchicalforecast/utils.py'), 'hierarchicalforecast.utils.quantiles_to_outputs': ( 'utils.html#quantiles_to_outputs', - 'hierarchicalforecast/utils.py')}}} + 'hierarchicalforecast/utils.py'), + 'hierarchicalforecast.utils.samples_to_quantiles_df': ( 'utils.html#samples_to_quantiles_df', + 'hierarchicalforecast/utils.py')}}} diff --git a/hierarchicalforecast/utils.py b/hierarchicalforecast/utils.py index f943c69f..6a8a4ea9 100644 --- a/hierarchicalforecast/utils.py +++ b/hierarchicalforecast/utils.py @@ -7,7 +7,7 @@ import sys import timeit from itertools import chain -from typing import Callable, Dict, List, Optional +from typing import Callable, Dict, List, Optional, Sequence import matplotlib.pyplot as plt import numpy as np @@ -116,12 +116,12 @@ def quantiles_to_outputs(quantiles): # %% ../nbs/utils.ipynb 8 # given input array of sample forecasts and inptut quantiles/levels, # output a Pandas Dataframe with columns of quantile predictions -def _to_quantiles_df(samples, - unique_ids, - dates, - quantiles = None, - level = None, - model_name = "model"): +def samples_to_quantiles_df(samples:np.ndarray, + unique_ids:Sequence[str], + dates:Sequence, + quantiles:Optional[Sequence[float]] = None, + level:Optional[Sequence[int]] = None, + model_name:Optional[str] = "model"): """ Transform Samples into HierarchicalForecast input. Auxiliary function to create compatible HierarchicalForecast input Y_hat_df dataframe. @@ -129,12 +129,12 @@ def _to_quantiles_df(samples, `samples`: numpy array. Samples from forecast distribution of shape [n_series, n_samples, horizon].
`unique_ids`: string list. Unique identifiers for each time series.
`dates`: datetime list. List of forecast dates.
- `quantiles`: float list [0., 1.]. Alternative to level, quantiles to estimate from y distribution.
- `level`: int list [0,100]. Probability levels for prediction intervals.
+ `quantiles`: float list in [0., 1.]. Alternative to level, quantiles to estimate from y distribution.
+ `level`: int list in [0,100]. Probability levels for prediction intervals.
`model_name`: string. Name of forecasting model.
**Returns:**
- `quantiles`: float list [0., 1.]. quantiles to estimate from y distribution .
+ `quantiles`: float list in [0., 1.]. quantiles to estimate from y distribution .
`Y_hat_df`: pd.DataFrame. With base quantile forecasts with columns ds and models to reconcile indexed by unique_id. """ @@ -167,7 +167,7 @@ def _to_quantiles_df(samples, return quantiles, pd.concat([data,df], axis=1).set_index('unique_id') -# %% ../nbs/utils.ipynb 10 +# %% ../nbs/utils.ipynb 11 def _to_summing_matrix(S_df: pd.DataFrame): """Transforms the DataFrame `df` of hierarchies to a summing matrix S.""" categories = [S_df[col].unique() for col in S_df.columns] @@ -180,14 +180,14 @@ def _to_summing_matrix(S_df: pd.DataFrame): tags = dict(zip(S_df.columns, categories)) return S, tags -# %% ../nbs/utils.ipynb 11 +# %% ../nbs/utils.ipynb 12 def aggregate_before(df: pd.DataFrame, spec: List[List[str]], agg_fn: Callable = np.sum): """Utils Aggregation Function. Aggregates bottom level series contained in the pd.DataFrame `df` according - to levels defined in the `spec` list applying the `agg_fn` (sum, mean). + to levels defined in the `spec` list applying the `agg_fn` (sum, mean).
**Parameters:**
`df`: pd.DataFrame with columns `['ds', 'y']` and columns to aggregate.
@@ -222,7 +222,7 @@ def aggregate_before(df: pd.DataFrame, S, tags = _to_summing_matrix(S_df.loc[bottom_hier, hiers_cols]) return Y_df, S, tags -# %% ../nbs/utils.ipynb 12 +# %% ../nbs/utils.ipynb 13 def numpy_balance(*arrs): """ Fast NumPy implementation of balance function. @@ -347,7 +347,7 @@ def aggregate(df: pd.DataFrame, Y_df = Y_df.set_index('unique_id') return Y_df, S_df, tags -# %% ../nbs/utils.ipynb 18 +# %% ../nbs/utils.ipynb 19 class HierarchicalPlot: """ Hierarchical Plot diff --git a/nbs/utils.ipynb b/nbs/utils.ipynb index c40b5c9f..e6ac4651 100644 --- a/nbs/utils.ipynb +++ b/nbs/utils.ipynb @@ -34,7 +34,7 @@ "import sys\n", "import timeit\n", "from itertools import chain\n", - "from typing import Callable, Dict, List, Optional\n", + "from typing import Callable, Dict, List, Optional, Sequence\n", "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", @@ -197,12 +197,12 @@ "\n", "# given input array of sample forecasts and inptut quantiles/levels, \n", "# output a Pandas Dataframe with columns of quantile predictions\n", - "def _to_quantiles_df(samples, \n", - " unique_ids, \n", - " dates, \n", - " quantiles = None,\n", - " level = None, \n", - " model_name = \"model\"):\n", + "def samples_to_quantiles_df(samples:np.ndarray, \n", + " unique_ids:Sequence[str], \n", + " dates:Sequence, \n", + " quantiles:Optional[Sequence[float]] = None,\n", + " level:Optional[Sequence[int]] = None, \n", + " model_name:Optional[str] = \"model\"):\n", " \"\"\" Transform Samples into HierarchicalForecast input.\n", " Auxiliary function to create compatible HierarchicalForecast input Y_hat_df dataframe.\n", "\n", @@ -210,12 +210,12 @@ " `samples`: numpy array. Samples from forecast distribution of shape [n_series, n_samples, horizon].
\n", " `unique_ids`: string list. Unique identifiers for each time series.
\n", " `dates`: datetime list. List of forecast dates.
\n", - " `quantiles`: float list [0., 1.]. Alternative to level, quantiles to estimate from y distribution.
\n", - " `level`: int list [0,100]. Probability levels for prediction intervals.
\n", + " `quantiles`: float list in [0., 1.]. Alternative to level, quantiles to estimate from y distribution.
\n", + " `level`: int list in [0,100]. Probability levels for prediction intervals.
\n", " `model_name`: string. Name of forecasting model.
\n", "\n", " **Returns:**
\n", - " `quantiles`: float list [0., 1.]. quantiles to estimate from y distribution .
\n", + " `quantiles`: float list in [0., 1.]. quantiles to estimate from y distribution .
\n", " `Y_hat_df`: pd.DataFrame. With base quantile forecasts with columns ds and models to reconcile indexed by unique_id.\n", " \"\"\"\n", " \n", @@ -249,6 +249,16 @@ " return quantiles, pd.concat([data,df], axis=1).set_index('unique_id')" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "940693d0", + "metadata": {}, + "outputs": [], + "source": [ + "show_doc(samples_to_quantiles_df, title_level=3)" + ] + }, { "cell_type": "markdown", "id": "3a1f4267", @@ -292,7 +302,7 @@ " \"\"\"Utils Aggregation Function.\n", "\n", " Aggregates bottom level series contained in the pd.DataFrame `df` according \n", - " to levels defined in the `spec` list applying the `agg_fn` (sum, mean).\n", + " to levels defined in the `spec` list applying the `agg_fn` (sum, mean).
\n", "\n", " **Parameters:**
\n", " `df`: pd.DataFrame with columns `['ds', 'y']` and columns to aggregate.
\n", From 028770d7c3e891d3d281877346d5ac4e1ee8f400 Mon Sep 17 00:00:00 2001 From: David Luo Date: Mon, 5 Jun 2023 23:03:36 -0700 Subject: [PATCH 4/4] added unit tests --- hierarchicalforecast/utils.py | 25 +++++---- nbs/utils.ipynb | 97 ++++++++++++++++++++++++++++++----- 2 files changed, 97 insertions(+), 25 deletions(-) diff --git a/hierarchicalforecast/utils.py b/hierarchicalforecast/utils.py index 6a8a4ea9..f5913641 100644 --- a/hierarchicalforecast/utils.py +++ b/hierarchicalforecast/utils.py @@ -7,7 +7,7 @@ import sys import timeit from itertools import chain -from typing import Callable, Dict, List, Optional, Sequence +from typing import Callable, Dict, List, Optional, Iterable import matplotlib.pyplot as plt import numpy as np @@ -70,7 +70,7 @@ def cov2corr(cov, return_std=False): # %% ../nbs/utils.ipynb 7 # convert levels to output quantile names -def level_to_outputs(level): +def level_to_outputs(level:Iterable[int]): """ Converts list of levels into output names matching StatsForecast and NeuralForecast methods. **Parameters:**
@@ -86,15 +86,14 @@ def level_to_outputs(level): quantiles = np.array(qs)[sort_idx] # Add default median - quantiles = np.concatenate([np.array([50]), quantiles]) - quantiles = torch.Tensor(quantiles) / 100 + quantiles = np.concatenate([np.array([50]), quantiles]) / 100 output_names = list(np.array(output_names)[sort_idx]) output_names.insert(0, '-median') return quantiles, output_names # convert quantiles to output quantile names -def quantiles_to_outputs(quantiles): +def quantiles_to_outputs(quantiles:Iterable[float]): """Converts list of quantiles into output names matching StatsForecast and NeuralForecast methods. **Parameters:**
@@ -117,10 +116,10 @@ def quantiles_to_outputs(quantiles): # given input array of sample forecasts and inptut quantiles/levels, # output a Pandas Dataframe with columns of quantile predictions def samples_to_quantiles_df(samples:np.ndarray, - unique_ids:Sequence[str], - dates:Sequence, - quantiles:Optional[Sequence[float]] = None, - level:Optional[Sequence[int]] = None, + unique_ids:Iterable[str], + dates:Iterable, + quantiles:Optional[Iterable[float]] = None, + level:Optional[Iterable[int]] = None, model_name:Optional[str] = "model"): """ Transform Samples into HierarchicalForecast input. Auxiliary function to create compatible HierarchicalForecast input Y_hat_df dataframe. @@ -146,18 +145,18 @@ def samples_to_quantiles_df(samples:np.ndarray, assert (quantiles is not None) ^ (level is not None) #check exactly one of quantiles/levels has been input #create initial dictionary - forecasts_mean = np.mean(forecasts, axis=1).flatten() + forecasts_mean = np.mean(samples, axis=1).flatten() unique_ids = np.repeat(unique_ids, horizon) ds = np.tile(dates, n_series) data = pd.DataFrame({"unique_id":unique_ids, "ds":ds, model_name:forecasts_mean}) #create quantiles and quantile names quantiles, quantile_names = level_to_outputs(level) if level is not None else quantiles_to_outputs(quantiles) - percentiles = quantiles * 100 + percentiles = [quantile * 100 for quantile in quantiles] col_names = np.array([model_name + quantile_name for quantile_name in quantile_names]) #add quantiles to dataframe - forecasts_quantiles = np.percentile(forecasts, percentiles, axis=1) + forecasts_quantiles = np.percentile(samples, percentiles, axis=1) forecasts_quantiles = np.transpose(forecasts_quantiles, (1,2,0)) # [Q,H,N] -> [N,H,Q] forecasts_quantiles = forecasts_quantiles.reshape(-1,len(quantiles)) @@ -347,7 +346,7 @@ def aggregate(df: pd.DataFrame, Y_df = Y_df.set_index('unique_id') return Y_df, S_df, tags -# %% ../nbs/utils.ipynb 19 +# %% ../nbs/utils.ipynb 21 class HierarchicalPlot: """ Hierarchical Plot diff --git a/nbs/utils.ipynb b/nbs/utils.ipynb index e6ac4651..b0d58a50 100644 --- a/nbs/utils.ipynb +++ b/nbs/utils.ipynb @@ -34,7 +34,7 @@ "import sys\n", "import timeit\n", "from itertools import chain\n", - "from typing import Callable, Dict, List, Optional, Sequence\n", + "from typing import Callable, Dict, List, Optional, Iterable\n", "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", @@ -142,7 +142,7 @@ "#| exporti\n", "\n", "# convert levels to output quantile names\n", - "def level_to_outputs(level):\n", + "def level_to_outputs(level:Iterable[int]):\n", " \"\"\" Converts list of levels into output names matching StatsForecast and NeuralForecast methods.\n", "\n", " **Parameters:**
\n", @@ -158,15 +158,14 @@ " quantiles = np.array(qs)[sort_idx]\n", "\n", " # Add default median\n", - " quantiles = np.concatenate([np.array([50]), quantiles])\n", - " quantiles = torch.Tensor(quantiles) / 100\n", + " quantiles = np.concatenate([np.array([50]), quantiles]) / 100\n", " output_names = list(np.array(output_names)[sort_idx])\n", " output_names.insert(0, '-median')\n", " \n", " return quantiles, output_names\n", "\n", "# convert quantiles to output quantile names\n", - "def quantiles_to_outputs(quantiles):\n", + "def quantiles_to_outputs(quantiles:Iterable[float]):\n", " \"\"\"Converts list of quantiles into output names matching StatsForecast and NeuralForecast methods.\n", "\n", " **Parameters:**
\n", @@ -198,10 +197,10 @@ "# given input array of sample forecasts and inptut quantiles/levels, \n", "# output a Pandas Dataframe with columns of quantile predictions\n", "def samples_to_quantiles_df(samples:np.ndarray, \n", - " unique_ids:Sequence[str], \n", - " dates:Sequence, \n", - " quantiles:Optional[Sequence[float]] = None,\n", - " level:Optional[Sequence[int]] = None, \n", + " unique_ids:Iterable[str], \n", + " dates:Iterable, \n", + " quantiles:Optional[Iterable[float]] = None,\n", + " level:Optional[Iterable[int]] = None, \n", " model_name:Optional[str] = \"model\"):\n", " \"\"\" Transform Samples into HierarchicalForecast input.\n", " Auxiliary function to create compatible HierarchicalForecast input Y_hat_df dataframe.\n", @@ -227,18 +226,18 @@ " assert (quantiles is not None) ^ (level is not None) #check exactly one of quantiles/levels has been input\n", "\n", " #create initial dictionary\n", - " forecasts_mean = np.mean(forecasts, axis=1).flatten()\n", + " forecasts_mean = np.mean(samples, axis=1).flatten()\n", " unique_ids = np.repeat(unique_ids, horizon)\n", " ds = np.tile(dates, n_series)\n", " data = pd.DataFrame({\"unique_id\":unique_ids, \"ds\":ds, model_name:forecasts_mean})\n", "\n", " #create quantiles and quantile names\n", " quantiles, quantile_names = level_to_outputs(level) if level is not None else quantiles_to_outputs(quantiles)\n", - " percentiles = quantiles * 100\n", + " percentiles = [quantile * 100 for quantile in quantiles]\n", " col_names = np.array([model_name + quantile_name for quantile_name in quantile_names])\n", " \n", " #add quantiles to dataframe\n", - " forecasts_quantiles = np.percentile(forecasts, percentiles, axis=1)\n", + " forecasts_quantiles = np.percentile(samples, percentiles, axis=1)\n", "\n", " forecasts_quantiles = np.transpose(forecasts_quantiles, (1,2,0)) # [Q,H,N] -> [N,H,Q]\n", " forecasts_quantiles = forecasts_quantiles.reshape(-1,len(quantiles))\n", @@ -481,6 +480,80 @@ "show_doc(aggregate, title_level=3)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "d15e9834", + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "\n", + "#level_to_outputs unit tests\n", + "test_eq(\n", + " level_to_outputs([80, 90]),\n", + " ([0.5 , 0.05, 0.1 , 0.9 , 0.95], ['-median', '-lo-90', '-lo-80', '-hi-80', '-hi-90'])\n", + ")\n", + "test_eq(\n", + " level_to_outputs([30]),\n", + " ([0.5 , 0.35, 0.65], ['-median', '-lo-30', '-hi-30'])\n", + ")\n", + "#quantiles_to_outputs unit tests\n", + "test_eq(\n", + " quantiles_to_outputs([0.2, 0.4, 0.6, 0.8]),\n", + " ([0.2,0.4,0.6, 0.8], ['-lo-60.0', '-lo-20.0', '-hi-20.0', '-hi-60.0'])\n", + ")\n", + "test_eq(\n", + " quantiles_to_outputs([0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]),\n", + " ([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], \n", + " ['-lo-80.0', '-lo-60.0', '-lo-40.0', '-lo-20.0', '-median', '-hi-20.0', '-hi-40.0', '-hi-60.0', '-hi-80.0'])\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c833f6a", + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "\n", + "#samples_to_quantiles_df unit tests\n", + "start_date = pd.Timestamp(\"2023-06-01\")\n", + "end_date = pd.Timestamp(\"2023-06-10\")\n", + "frequency = \"D\" # Daily frequency\n", + "dates = pd.date_range(start=start_date, end=end_date, freq=frequency).tolist()\n", + "samples = np.random.rand(3, 200, 10)\n", + "unique_ids = ['id1', 'id2', 'id3']\n", + "level = np.array([10, 50, 90])\n", + "quantiles=np.array([0.5, 0.05, 0.25, 0.45, 0.55, 0.75, 0.95])\n", + "\n", + "ret_quantiles_1, ret_df_1 = samples_to_quantiles_df(samples, unique_ids, dates, level=level)\n", + "ret_quantiles_2, ret_df_2 = samples_to_quantiles_df(samples, unique_ids, dates, quantiles=quantiles)\n", + "\n", + "test_eq(\n", + " ret_quantiles_1,\n", + " quantiles\n", + ")\n", + "test_eq(\n", + " ret_df_1.columns,\n", + " ['ds', 'model', 'model-median', 'model-lo-90', 'model-lo-50', 'model-lo-10', 'model-hi-10', 'model-hi-50', 'model-hi-90']\n", + ")\n", + "test_eq(\n", + " ret_df_1.index,\n", + " ['id1', 'id1', 'id1', 'id1', 'id1', 'id1', 'id1', 'id1', 'id1', 'id1',\n", + " 'id2', 'id2', 'id2', 'id2', 'id2', 'id2', 'id2', 'id2', 'id2', 'id2',\n", + " 'id3', 'id3', 'id3', 'id3', 'id3', 'id3', 'id3', 'id3', 'id3', 'id3']\n", + ")\n", + "test_eq(\n", + " ret_quantiles_1, ret_quantiles_2\n", + ")\n", + "test_eq(\n", + " ret_df_1.index, ret_df_2.index\n", + ")" + ] + }, { "cell_type": "code", "execution_count": null,