time tracking utils (#135)

* Added MSSE, sCRPS, and EScore placeholder * Default [N,H,samples] shape for easier future handling * Default [N,H,samples] shape for easier future handling * Working energy score * Improved docstrings * Added Exception messages and QL unit test * TourismL end to end experiment placeholder * Partial y_hat_insample protections * adding tqdm dependency * adding tqdm to reconcilers for loop * adding timer utility
Nixtla · Dec 17, 2022 · 8baabe0 · 8baabe0
1 parent 82b53db
commit 8baabe0
Show file tree

Hide file tree

Showing 8 changed files with 124 additions and 20 deletions.
diff --git a/environment.yml b/environment.yml
@@ -11,3 +11,4 @@ dependencies:
   - pip
   - pip:
     - nbdev
+    - tqdm
diff --git a/hierarchicalforecast/_modidx.py b/hierarchicalforecast/_modidx.py
@@ -136,7 +136,15 @@
                                                                                                                                          'hierarchicalforecast/probabilistic_methods.py'),
                                                             'hierarchicalforecast.probabilistic_methods.PERMBU.get_samples': ( 'probabilistic_methods.html#permbu.get_samples',
                                                                                                                                'hierarchicalforecast/probabilistic_methods.py')},
-            'hierarchicalforecast.utils': { 'hierarchicalforecast.utils.HierarchicalPlot': ( 'utils.html#hierarchicalplot',
+            'hierarchicalforecast.utils': { 'hierarchicalforecast.utils.CodeTimer': ( 'utils.html#codetimer',
+                                                                                      'hierarchicalforecast/utils.py'),
+                                            'hierarchicalforecast.utils.CodeTimer.__enter__': ( 'utils.html#codetimer.__enter__',
+                                                                                                'hierarchicalforecast/utils.py'),
+                                            'hierarchicalforecast.utils.CodeTimer.__exit__': ( 'utils.html#codetimer.__exit__',
+                                                                                               'hierarchicalforecast/utils.py'),
+                                            'hierarchicalforecast.utils.CodeTimer.__init__': ( 'utils.html#codetimer.__init__',
+                                                                                               'hierarchicalforecast/utils.py'),
+                                            'hierarchicalforecast.utils.HierarchicalPlot': ( 'utils.html#hierarchicalplot',
                                                                                              'hierarchicalforecast/utils.py'),
                                             'hierarchicalforecast.utils.HierarchicalPlot.__init__': ( 'utils.html#hierarchicalplot.__init__',
                                                                                                       'hierarchicalforecast/utils.py'),

diff --git a/hierarchicalforecast/core.py b/hierarchicalforecast/core.py
@@ -13,6 +13,7 @@
 
 import numpy as np
 import pandas as pd
+from tqdm import tqdm
 
 # %% ../nbs/core.ipynb 5
 def _build_fn_name(fn) -> str:
@@ -51,7 +52,7 @@ def _reverse_engineer_sigmah(Y_hat_df, y_hat, model_name, uids):
     pi = len(pi_model_name) > 0
 
     if not pi:
-        raise Exception(f'Please include {model_name} prediction intervals in `Y_hat_df`')
+        raise Exception(f'Please include `{model_name}` prediction intervals in `Y_hat_df`')
 
     pi_col = pi_model_name[0]
     sign = -1 if 'lo' in pi_col else 1
@@ -148,6 +149,11 @@ def reconcile(self,
             if Y_diff > 0 or  Y_hat_diff > 0:
                 raise Exception(f'Check `Y_hat_df`, `Y_df` series difference, Y_hat\Y={Y_hat_diff}, Y\Y_hat={Y_diff}')
 
+        # TODO: Complete y_hat_insample protection
+        if intervals_method in ['bootstrap', 'permbu']:
+           if not (set(model_names) <= set(Y_df.columns)):
+               raise Exception('Check `Y_hat_df`, `Y_df` columns difference')
+
         # Same Y_hat_df/S_df/Y_df's unique_id order to prevent errors
         S_ = S.loc[uids]
 
@@ -165,7 +171,7 @@ def reconcile(self,
         start = time.time()
         self.execution_times = {}
         fcsts = Y_hat_df.copy()
-        for reconcile_fn in self.reconcilers:
+        for reconcile_fn in tqdm(self.reconcilers):
             reconcile_fn_name = _build_fn_name(reconcile_fn)
             has_fitted = 'y_hat_insample' in signature(reconcile_fn).parameters
             has_level = 'level' in signature(reconcile_fn).parameters

diff --git a/hierarchicalforecast/utils.py b/hierarchicalforecast/utils.py
@@ -5,6 +5,7 @@
 
 # %% ../nbs/utils.ipynb 2
 import sys
+import timeit
 from itertools import chain
 from typing import Callable, Dict, List, Optional
 
@@ -16,6 +17,21 @@
 plt.rcParams['font.family'] = 'serif'
 
 # %% ../nbs/utils.ipynb 4
+class CodeTimer:
+    def __init__(self, name=None, verbose=True):
+        self.name = " '"  + name + "'" if name else ''
+        self.verbose = verbose
+
+    def __enter__(self):
+        self.start = timeit.default_timer()
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.took = (timeit.default_timer() - self.start)
+        if self.verbose:
+            print('Code block' + self.name + \
+                  ' took:\t{0:.5f}'.format(self.took) + ' seconds')
+
+# %% ../nbs/utils.ipynb 5
 def is_strictly_hierarchical(S: np.ndarray, 
                              tags: Dict[str, np.ndarray]):
     # main idea:
@@ -33,7 +49,7 @@ def is_strictly_hierarchical(S: np.ndarray,
     nodes = levels_.popitem()[1].size
     return paths == nodes
 
-# %% ../nbs/utils.ipynb 5
+# %% ../nbs/utils.ipynb 6
 def cov2corr(cov, return_std=False):
     """ convert covariance matrix to correlation matrix
 
@@ -52,7 +68,7 @@ def cov2corr(cov, return_std=False):
     else:
         return corr
 
-# %% ../nbs/utils.ipynb 7
+# %% ../nbs/utils.ipynb 8
 def _to_summing_matrix(S_df: pd.DataFrame):
     """Transforms the DataFrame `df` of hierarchies to a summing matrix S."""
     categories = [S_df[col].unique() for col in S_df.columns]
@@ -65,7 +81,7 @@ def _to_summing_matrix(S_df: pd.DataFrame):
     tags = dict(zip(S_df.columns, categories))
     return S, tags
 
-# %% ../nbs/utils.ipynb 8
+# %% ../nbs/utils.ipynb 9
 def aggregate(df: pd.DataFrame,
               spec: List[List[str]],
               agg_fn: Callable = np.sum):
@@ -107,7 +123,7 @@ def aggregate(df: pd.DataFrame,
     S, tags = _to_summing_matrix(S_df.loc[bottom_hier, hiers_cols])
     return Y_df, S, tags
 
-# %% ../nbs/utils.ipynb 12
+# %% ../nbs/utils.ipynb 13
 class HierarchicalPlot:
     """ Hierarchical Plot
 

diff --git a/nbs/core.ipynb b/nbs/core.ipynb
@@ -40,7 +40,8 @@
     "from typing import Callable, Dict, List, Optional\n",
     "\n",
     "import numpy as np\n",
-    "import pandas as pd"
+    "import pandas as pd\n",
+    "from tqdm import tqdm"
    ]
   },
   {
@@ -144,7 +145,7 @@
     "    pi = len(pi_model_name) > 0\n",
     "\n",
     "    if not pi:\n",
-    "        raise Exception(f'Please include {model_name} prediction intervals in `Y_hat_df`')\n",
+    "        raise Exception(f'Please include `{model_name}` prediction intervals in `Y_hat_df`')\n",
     "\n",
     "    pi_col = pi_model_name[0]\n",
     "    sign = -1 if 'lo' in pi_col else 1\n",
@@ -248,6 +249,11 @@
     "            if Y_diff > 0 or  Y_hat_diff > 0:\n",
     "                raise Exception(f'Check `Y_hat_df`, `Y_df` series difference, Y_hat\\Y={Y_hat_diff}, Y\\Y_hat={Y_diff}')\n",
     "\n",
+    "        # TODO: Complete y_hat_insample protection\n",
+    "        if intervals_method in ['bootstrap', 'permbu']:\n",
+    "           if not (set(model_names) <= set(Y_df.columns)):\n",
+    "               raise Exception('Check `Y_hat_df`, `Y_df` columns difference')\n",
+    "\n",
     "        # Same Y_hat_df/S_df/Y_df's unique_id order to prevent errors\n",
     "        S_ = S.loc[uids]\n",
     "\n",
@@ -265,7 +271,7 @@
     "        start = time.time()\n",
     "        self.execution_times = {}\n",
     "        fcsts = Y_hat_df.copy()\n",
-    "        for reconcile_fn in self.reconcilers:\n",
+    "        for reconcile_fn in tqdm(self.reconcilers):\n",
     "            reconcile_fn_name = _build_fn_name(reconcile_fn)\n",
     "            has_fitted = 'y_hat_insample' in signature(reconcile_fn).parameters\n",
     "            has_level = 'level' in signature(reconcile_fn).parameters\n",
@@ -459,6 +465,25 @@
     ")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# | hide\n",
+    "# test expected error\n",
+    "# different columns Y_df and Y_hat_df\n",
+    "hrec = HierarchicalReconciliation(\n",
+    "            reconcilers=[ERM(method='reg_bu', lambda_reg=100)])\n",
+    "test_fail(\n",
+    "    hrec.reconcile,\n",
+    "    contains='Please include ',\n",
+    "    args=(hier_grouped_hat_df, S_grouped_df, tags_grouped, \n",
+    "          hier_grouped_df, [80], 'permbu'), # permbu needs y_hat_insample\n",
+    ")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,

diff --git a/nbs/examples/TourismLarge-Evaluation.ipynb b/nbs/examples/TourismLarge-Evaluation.ipynb
@@ -43,7 +43,10 @@
    "id": "690aea05",
    "metadata": {},
    "source": [
-    "## 1. Installing HierarchicalForecast"
+    "## 1. Installing HierarchicalForecast\n",
+    "\n",
+    "We assume you have StatsForecast and HierarchicalForecast already installed, if not \n",
+    "check this guide for instructions on how to install HierarchicalForecast."
    ]
   },
   {
@@ -93,7 +96,7 @@
    "id": "8d002ec0",
    "metadata": {},
    "source": [
-    "### 2.1 Read and Aggregate"
+    "### 2.1 Read Hierarchical Dataset"
    ]
   },
   {
@@ -147,7 +150,10 @@
    "id": "98fec0eb",
    "metadata": {},
    "source": [
-    "### 2.2 StatsForecast's Base Predictions"
+    "### 2.2 StatsForecast's Base Predictions\n",
+    "\n",
+    "This cell computes the base predictions `Y_hat_df` for all the series in `Y_df` using StatsForecast's `AutoARIMA`.\n",
+    "Additionally we obtain insample predictions `Y_fitted_df` for the methods that require them."
    ]
   },
   {
@@ -160,7 +166,10 @@
     "%%capture\n",
     "if os.path.isfile('Y_hat.csv'):\n",
     "    Y_hat_df = pd.read_csv('Y_hat.csv')\n",
+    "    Y_fitted_df = pd.read_csv('Y_fitted.csv')\n",
+    "\n",
     "    Y_hat_df = Y_hat_df.set_index('unique_id')\n",
+    "    Y_fitted_df = Y_fitted_df.set_index('unique_id')\n",
     "else:\n",
     "    fcst = StatsForecast(\n",
     "        df=Y_train_df, \n",
@@ -169,8 +178,10 @@
     "        freq='M', \n",
     "        n_jobs=-1\n",
     "    )\n",
-    "    Y_hat_df = fcst.forecast(h=12, level=[80])\n",
-    "    Y_hat_df.to_csv('Y_hat.csv', index=False)"
+    "    Y_hat_df = fcst.forecast(h=12, fitted=True, level=[80])\n",
+    "    Y_fitted_df = fcst.forecast_fitted_values()\n",
+    "    Y_hat_df.to_csv('Y_hat.csv')\n",
+    "    Y_fitted_df.to_csv('Y_fitted.csv')"
    ]
   },
   {
@@ -183,6 +194,16 @@
     "Y_hat_df"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f325faa1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Y_fitted_df"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "e557adc7",
@@ -204,8 +225,9 @@
     "#     ERM(method='reg_bu', lambda_reg=100)\n",
     "# ]\n",
     "# hrec = HierarchicalReconciliation(reconcilers=reconcilers)\n",
-    "# Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df, Y_df=Y_train_df, \n",
-    "#                           S=S_df, tags=tags)"
+    "# Y_rec_df = hrec.reconcile(Y_hat_df=Y_hat_df,\n",
+    "#                           Y_df=Y_fitted_df,\n",
+    "#                           S=S_df, tags=tags, intervals_method='permbu')"
    ]
   },
   {
@@ -230,7 +252,9 @@
    "id": "c0b2c842",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "Y_hat_df['AutoARIMA']"
+   ]
   },
   {
    "cell_type": "markdown",

diff --git a/nbs/utils.ipynb b/nbs/utils.ipynb
@@ -32,6 +32,7 @@
    "source": [
     "#| export\n",
     "import sys\n",
+    "import timeit\n",
     "from itertools import chain\n",
     "from typing import Callable, Dict, List, Optional\n",
     "\n",
@@ -55,6 +56,29 @@
     "from nbdev.showdoc import add_docs, show_doc"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a5ba2391",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| exporti\n",
+    "class CodeTimer:\n",
+    "    def __init__(self, name=None, verbose=True):\n",
+    "        self.name = \" '\"  + name + \"'\" if name else ''\n",
+    "        self.verbose = verbose\n",
+    "\n",
+    "    def __enter__(self):\n",
+    "        self.start = timeit.default_timer()\n",
+    "\n",
+    "    def __exit__(self, exc_type, exc_value, traceback):\n",
+    "        self.took = (timeit.default_timer() - self.start)\n",
+    "        if self.verbose:\n",
+    "            print('Code block' + self.name + \\\n",
+    "                  ' took:\\t{0:.5f}'.format(self.took) + ' seconds')"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -630,7 +654,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "hierarchicalforecast",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   }

diff --git a/settings.ini b/settings.ini
@@ -15,7 +15,7 @@ language = English
 custom_sidebar = True
 license = apache2
 status = 2
-requirements = numpy numba pandas scikit-learn quadprog matplotlib
+requirements = numpy numba pandas scikit-learn quadprog matplotlib tqdm
 dev_requirements = datasetsforecast statsforecast>=1.0.0 requests
 nbs_path = nbs
 doc_path = _docs
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,3 +11,4 @@ dependencies: @@
       - pip
       - pip:
         - nbdev
+        - tqdm