Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sparse Reconciliation #210

Merged
merged 9 commits into from
Jul 13, 2023
8 changes: 8 additions & 0 deletions hierarchicalforecast/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.BottomUp.fit_predict': ( 'methods.html#bottomup.fit_predict',
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.BottomUpSparse': ( 'methods.html#bottomupsparse',
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.BottomUpSparse._get_PW_matrices': ( 'methods.html#bottomupsparse._get_pw_matrices',
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.ERM': ('methods.html#erm', 'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.ERM.__init__': ( 'methods.html#erm.__init__',
'hierarchicalforecast/methods.py'),
Expand Down Expand Up @@ -90,6 +94,10 @@
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.MinTrace.fit_predict': ( 'methods.html#mintrace.fit_predict',
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.MinTraceSparse': ( 'methods.html#mintracesparse',
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.MinTraceSparse._get_PW_matrices': ( 'methods.html#mintracesparse._get_pw_matrices',
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.OptimalCombination': ( 'methods.html#optimalcombination',
'hierarchicalforecast/methods.py'),
'hierarchicalforecast.methods.OptimalCombination.__init__': ( 'methods.html#optimalcombination.__init__',
Expand Down
18 changes: 17 additions & 1 deletion hierarchicalforecast/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
import copy
from inspect import signature
from scipy.stats import norm
from scipy import sparse
from typing import Callable, Dict, List, Optional
import warnings

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -231,10 +233,18 @@ def reconcile(self,

# Initialize reconciler arguments
reconciler_args = dict(
S=S_df.values.astype(np.float32),
idx_bottom=S_df.index.get_indexer(S.columns),
tags={key: S_df.index.get_indexer(val) for key, val in tags.items()}
)

any_sparse = any([method.is_sparse_method for method in self.reconcilers])
if any_sparse:
try:
S_for_sparse = sparse.csr_matrix(S_df.sparse.to_coo())
except AttributeError:
warnings.warn('Using dense S matrix for sparse reconciliation method.')
S_for_sparse = S_df.values.astype(np.float32)

if Y_df is not None:
if is_balanced:
y_insample = Y_df['y'].values.reshape(len(S_df), -1).astype(np.float32)
Expand All @@ -249,6 +259,12 @@ def reconcile(self,
self.sample_names = {}
for reconcile_fn, name_copy in zip(self.reconcilers, self.orig_reconcilers):
reconcile_fn_name = _build_fn_name(name_copy)

if reconcile_fn.is_sparse_method:
reconciler_args["S"] = S_for_sparse
else:
reconciler_args["S"] = S_df.values.astype(np.float32)

has_fitted = 'y_hat_insample' in signature(reconcile_fn).parameters
has_level = 'level' in signature(reconcile_fn).parameters

Expand Down
156 changes: 145 additions & 11 deletions hierarchicalforecast/methods.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/methods.ipynb.

# %% auto 0
__all__ = ['BottomUp', 'TopDown', 'MiddleOut', 'MinTrace', 'OptimalCombination', 'ERM']
__all__ = ['BottomUp', 'BottomUpSparse', 'TopDown', 'MiddleOut', 'MinTrace', 'MinTraceSparse', 'OptimalCombination', 'ERM']

# %% ../nbs/methods.ipynb 3
import warnings
from collections import OrderedDict
from copy import deepcopy
from typing import Callable, Dict, List, Optional
from typing import Callable, Dict, List, Optional, Union

import numpy as np
from numba import njit
from quadprog import solve_qp
from scipy import sparse

# %% ../nbs/methods.ipynb 4
from .utils import is_strictly_hierarchical, cov2corr
Expand All @@ -20,6 +21,7 @@
# %% ../nbs/methods.ipynb 6
class HReconciler:
fitted = False
is_sparse_method = False

def _get_sampler(self,
intervals_method,
Expand Down Expand Up @@ -226,7 +228,29 @@ def fit_predict(self,

__call__ = fit_predict

# %% ../nbs/methods.ipynb 21
# %% ../nbs/methods.ipynb 9
class BottomUpSparse(BottomUp):
"""BottomUpSparse Reconciliation Class.

This is the implementation of a Bottom Up reconciliation using the sparse
matrix approach. It works much more efficient on datasets with many time series.
[makoren: At least I hope so, I only checked up until ~20k time series, and
there's no real improvement, it would be great to check for smth like 1M time
series, where the dense S matrix really stops fitting in memory]

See the parent class for more details.
"""
is_sparse_method = True

def _get_PW_matrices(self, S, idx_bottom):
n_hiers, n_bottom = S.shape
P = sparse.lil_matrix(S.shape, dtype=np.float32)
P[idx_bottom] = S[idx_bottom]
P = sparse.csr_matrix(P.T)
W = sparse.identity(n_hiers, dtype=np.float32)
return P, W

# %% ../nbs/methods.ipynb 22
def _get_child_nodes(S: np.ndarray, tags: Dict[str, np.ndarray]):
level_names = list(tags.keys())
nodes = OrderedDict()
Expand All @@ -244,7 +268,7 @@ def _get_child_nodes(S: np.ndarray, tags: Dict[str, np.ndarray]):
nodes[level] = nodes_level
return nodes

# %% ../nbs/methods.ipynb 22
# %% ../nbs/methods.ipynb 23
def _reconcile_fcst_proportions(S: np.ndarray, y_hat: np.ndarray,
tags: Dict[str, np.ndarray],
nodes: Dict[str, Dict[int, np.ndarray]],
Expand All @@ -261,7 +285,7 @@ def _reconcile_fcst_proportions(S: np.ndarray, y_hat: np.ndarray,
reconciled[idx_child] = y_hat[idx_child] * fcst_parent / childs_sum
return reconciled

# %% ../nbs/methods.ipynb 23
# %% ../nbs/methods.ipynb 24
class TopDown(HReconciler):
"""Top Down Reconciliation Class.

Expand Down Expand Up @@ -412,7 +436,7 @@ def fit_predict(self,

__call__ = fit_predict

# %% ../nbs/methods.ipynb 31
# %% ../nbs/methods.ipynb 32
class MiddleOut(HReconciler):
"""Middle Out Reconciliation Class.

Expand Down Expand Up @@ -528,11 +552,11 @@ def fit_predict(self,

__call__ = fit_predict

# %% ../nbs/methods.ipynb 36
# %% ../nbs/methods.ipynb 37
def crossprod(x):
return x.T @ x

# %% ../nbs/methods.ipynb 37
# %% ../nbs/methods.ipynb 38
class MinTrace(HReconciler):
"""MinTrace Reconciliation Class.

Expand Down Expand Up @@ -773,7 +797,117 @@ def fit_predict(self,

__call__ = fit_predict

# %% ../nbs/methods.ipynb 47
# %% ../nbs/methods.ipynb 39
class MinTraceSparse(MinTrace):
"""MinTraceSparse Reconciliation Class.

This is the implementation of a subset of MinTrace features using the sparse
matrix approach. It works much more efficient on datasets with many time series.

See the parent class for more details.

Currently supported:
* Methods using diagonal W matrix, i.e. "ols", "wls_struct", "wls_var",
* The standard MinT version (non-negative is not supported).

Note: due to the numerical instability of the matrix inversion when creating the
P matrix, the method is NOT guaranteed to give identical results to the non-sparse
version.
"""
is_sparse_method = True

def _get_PW_matrices(
self,
S: Union[np.ndarray, sparse.spmatrix],
y_hat: np.ndarray,
y_insample: Optional[np.ndarray] = None,
y_hat_insample: Optional[np.ndarray] = None,
idx_bottom: Optional[List[int]] = None,
):
# shape residuals_insample (n_hiers, obs)
res_methods = ["wls_var", "mint_cov", "mint_shrink"]
diag_only_methods = ["ols", "wls_struct", "wls_var"]

if self.method not in diag_only_methods:
raise NotImplementedError(
"Only the methods with diagonal W are supported as sparse operations"
)

if self.nonnegative:
raise NotImplementedError(
"Non-negative MinT is currently not implemented as sparse"
)

S = sparse.csr_matrix(S)

if self.method in res_methods and y_insample is None and y_hat_insample is None:
raise ValueError(
f"For methods {', '.join(res_methods)} you need to pass residuals"
)
n_hiers, n_bottom = S.shape

if self.method == "ols":
W_diag = np.ones(n_hiers)
elif self.method == "wls_struct":
W_diag = S @ np.ones((n_bottom,))
elif self.method == "wls_var":
# Residuals with shape (obs, n_hiers)
residuals = (y_insample - y_hat_insample).T
n, _ = residuals.shape

# Protection: against overfitted model
residuals_sum = np.sum(residuals, axis=0)
zero_residual_prc = np.abs(residuals_sum) < 1e-4
zero_residual_prc = np.mean(zero_residual_prc)
if zero_residual_prc > 0.98:
raise Exception(
f"Insample residuals close to 0, zero_residual_prc={zero_residual_prc}. Check `Y_df`"
)

# Protection: cases where data is unavailable/nan
# makoren: this masking stuff causes more harm than good, I found the results in the presence
# of nan-s can often be rubbish, I'd argue it's better to fail than give rubbish results, here
# the code is simply failing if it encounters nan in the variance vector.
# masked_res = np.ma.array(residuals, mask=np.isnan(residuals))
# covm = np.ma.cov(masked_res, rowvar=False, allow_masked=True).data

W_diag = np.var(residuals, axis=0, ddof=1)
else:
raise ValueError(f"Unknown reconciliation method {self.method}")

if any(W_diag < 1e-8):
raise Exception(
f"min_trace ({self.method}) needs covariance matrix to be positive definite."
)

if any(np.isnan(W_diag)):
raise Exception(
f"min_trace ({self.method}) needs covariance matrix to be positive definite (not nan)."
)

M = sparse.spdiags(np.reciprocal(W_diag), 0, W_diag.size, W_diag.size)
R = sparse.csr_matrix(S.T @ M)

# The implementation of P acting on a vector:
def get_P_action(y):
b = R @ y

A = sparse.linalg.LinearOperator(
(b.size, b.size), matvec=lambda v: R @ (S @ v)
)

x_tilde, exit_code = sparse.linalg.bicgstab(A, b, atol="legacy")

return x_tilde

P = sparse.linalg.LinearOperator(
(S.shape[1], y_hat.shape[0]), matvec=get_P_action
)
W = sparse.spdiags(W_diag, 0, W_diag.size, W_diag.size)

return P, W

# %% ../nbs/methods.ipynb 49
class OptimalCombination(MinTrace):
"""Optimal Combination Reconciliation Class.

Expand Down Expand Up @@ -808,7 +942,7 @@ def __init__(self,
self.nonnegative = nonnegative
self.insample = False

# %% ../nbs/methods.ipynb 56
# %% ../nbs/methods.ipynb 58
@njit
def lasso(X: np.ndarray, y: np.ndarray,
lambda_reg: float, max_iters: int = 1_000,
Expand Down Expand Up @@ -840,7 +974,7 @@ def lasso(X: np.ndarray, y: np.ndarray,
#print(it)
return beta

# %% ../nbs/methods.ipynb 57
# %% ../nbs/methods.ipynb 59
class ERM(HReconciler):
"""Optimal Combination Reconciliation Class.

Expand Down
Loading