From 187ff6b68cbdc8932231162b76a49705e2796531 Mon Sep 17 00:00:00 2001 From: Federico Tomasi Date: Tue, 4 Jul 2023 10:12:47 +0200 Subject: [PATCH 1/4] support Python 3.10 --- .github/workflows/python-package.yml | 2 +- regain/covariance/time_graphical_lasso_.py | 98 +++++++++++++++++----- 2 files changed, 78 insertions(+), 22 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index fff1658..17c1113 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.7", "3.8" ] + python-version: [ "3.8", "3.9", "3.10" ] steps: - uses: actions/checkout@v3 diff --git a/regain/covariance/time_graphical_lasso_.py b/regain/covariance/time_graphical_lasso_.py index 5743e2f..27bdc45 100644 --- a/regain/covariance/time_graphical_lasso_.py +++ b/regain/covariance/time_graphical_lasso_.py @@ -43,7 +43,8 @@ from sklearn.utils.extmath import squared_norm from sklearn.utils.validation import check_X_y -from regain.covariance.graphical_lasso_ import GraphicalLasso, init_precision, logl +from regain.covariance.graphical_lasso_ import GraphicalLasso, init_precision +from regain.math import batch_logdet from regain.norm import l1_od_norm from regain.prox import prox_logdet, soft_thresholding from regain.update_rules import update_rho @@ -51,21 +52,29 @@ from regain.validation import check_norm_prox -def loss(S, K, n_samples=None): +def batch_log_likelihood(emp_cov, precision): + """Gaussian log-likelihood without constant term.""" + return batch_logdet(precision) - np.sum(emp_cov * precision, axis=(-1, -2)) + + +def loss(emp_cov, precision, n_samples=None): """Loss function for time-varying graphical lasso.""" if n_samples is None: - n_samples = np.ones(S.shape[0]) - return sum(-ni * logl(emp_cov, precision) for emp_cov, precision, ni in zip(S, K, n_samples)) + # 1 sample for each batch, ie, do not scale. + batch_dim = emp_cov.shape[0] + n_samples = np.ones(batch_dim) + return np.sum(-n_samples * batch_log_likelihood(emp_cov, precision)) -def objective(n_samples, S, K, Z_0, Z_1, Z_2, alpha, beta, psi): + +def objective(n_samples, emp_cov, K, Z_0, Z_1, Z_2, alpha, beta, psi): """Objective function for time-varying graphical lasso.""" - obj = loss(S, K, n_samples=n_samples) + obj = loss(emp_cov, K, n_samples=n_samples) if isinstance(alpha, np.ndarray): - obj += sum(l1_od_norm(a * z) for a, z in zip(alpha, Z_0)) + obj += np.sum(l1_od_norm(alpha * Z_0)) else: - obj += alpha * sum(map(l1_od_norm, Z_0)) + obj += alpha * np.sum(l1_od_norm(Z_0)) if isinstance(beta, np.ndarray): obj += sum(b[0][0] * m for b, m in zip(beta, map(psi, Z_2 - Z_1))) @@ -167,7 +176,11 @@ def time_graphical_lasso( if n_samples is None: n_samples = np.ones(emp_cov.shape[0]) - checks = [convergence(obj=objective(n_samples, emp_cov, Z_0, Z_0, Z_1, Z_2, alpha, beta, psi))] + checks = [ + convergence( + obj=objective(n_samples, emp_cov, Z_0, Z_0, Z_1, Z_2, alpha, beta, psi) + ) + ] for iteration_ in range(max_iter): # update K A = Z_0 - U_0 @@ -182,7 +195,8 @@ def time_graphical_lasso( A *= -rho * divisor[:, None, None] / n_samples[:, None, None] A += emp_cov - K = np.array([prox_logdet(a, lamda=ni / (rho * div)) for a, div, ni in zip(A, divisor, n_samples)]) + lamda = np.expand_dims(n_samples / (rho * divisor), -1) + K = prox_logdet(A, lamda) # update Z_0 A = K + U_0 @@ -213,11 +227,23 @@ def time_graphical_lasso( U_2 += K[1:] - Z_2 # diagnostics, reporting, termination checks - rnorm = np.sqrt(squared_norm(K - Z_0) + squared_norm(K[:-1] - Z_1) + squared_norm(K[1:] - Z_2)) + rnorm = np.sqrt( + squared_norm(K - Z_0) + + squared_norm(K[:-1] - Z_1) + + squared_norm(K[1:] - Z_2) + ) - snorm = rho * np.sqrt(squared_norm(Z_0 - Z_0_old) + squared_norm(Z_1 - Z_1_old) + squared_norm(Z_2 - Z_2_old)) + snorm = rho * np.sqrt( + squared_norm(Z_0 - Z_0_old) + + squared_norm(Z_1 - Z_1_old) + + squared_norm(Z_2 - Z_2_old) + ) - obj = objective(n_samples, emp_cov, Z_0, K, Z_1, Z_2, alpha, beta, psi) if compute_objective else np.nan + obj = ( + objective(n_samples, emp_cov, Z_0, K, Z_1, Z_2, alpha, beta, psi) + if compute_objective + else np.nan + ) # if np.isinf(obj): # Z_0 = Z_0_old @@ -234,7 +260,9 @@ def time_graphical_lasso( np.sqrt(squared_norm(K) + squared_norm(K[:-1]) + squared_norm(K[1:])), ), e_dual=np.sqrt(K.size + 2 * Z_1.size) * tol - + rtol * rho * np.sqrt(squared_norm(U_0) + squared_norm(U_1) + squared_norm(U_2)), + + rtol + * rho + * np.sqrt(squared_norm(U_0) + squared_norm(U_1) + squared_norm(U_2)), # precision=Z_0.copy() ) Z_0_old = Z_0.copy() @@ -242,7 +270,10 @@ def time_graphical_lasso( Z_2_old = Z_2.copy() if verbose: - print("obj: %.4f, rnorm: %.4f, snorm: %.4f," "eps_pri: %.4f, eps_dual: %.4f" % check[:5]) + print( + "obj: %.4f, rnorm: %.4f, snorm: %.4f," + "eps_pri: %.4f, eps_dual: %.4f" % check[:5] + ) checks.append(check) if stop_at is not None: @@ -252,7 +283,9 @@ def time_graphical_lasso( if check.rnorm <= check.e_pri and check.snorm <= check.e_dual: break - rho_new = update_rho(rho, rnorm, snorm, iteration=iteration_, **(update_rho_options or {})) + rho_new = update_rho( + rho, rnorm, snorm, iteration=iteration_, **(update_rho_options or {}) + ) # scaled dual variables should be also rescaled U_0 *= rho / rho_new U_1 *= rho / rho_new @@ -441,7 +474,15 @@ def fit(self, X, y): """ # Covariance does not make sense for a single feature - X, y = check_X_y(X, y, accept_sparse=False, dtype=np.float64, order="C", ensure_min_features=2, estimator=self) + X, y = check_X_y( + X, + y, + accept_sparse=False, + dtype=np.float64, + order="C", + ensure_min_features=2, + estimator=self, + ) n_dimensions = X.shape[1] self.classes_, n_samples = np.unique(y, return_counts=True) @@ -454,7 +495,10 @@ def fit(self, X, y): self.location_ = np.array([X[y == cl].mean(0) for cl in self.classes_]) emp_cov = np.array( - [empirical_covariance(X[y == cl], assume_centered=self.assume_centered) for cl in self.classes_] + [ + empirical_covariance(X[y == cl], assume_centered=self.assume_centered) + for cl in self.classes_ + ] ) return self._fit(emp_cov, n_samples) @@ -482,12 +526,22 @@ def score(self, X, y): """ # Covariance does not make sense for a single feature - X, y = check_X_y(X, y, accept_sparse=False, dtype=np.float64, order="C", ensure_min_features=2, estimator=self) + X, y = check_X_y( + X, + y, + accept_sparse=False, + dtype=np.float64, + order="C", + ensure_min_features=2, + estimator=self, + ) # compute empirical covariance of the test set test_cov = np.array( [ - empirical_covariance(X[y == cl] - self.location_[i], assume_centered=True) + empirical_covariance( + X[y == cl] - self.location_[i], assume_centered=True + ) for i, cl in enumerate(self.classes_) ] ) @@ -529,4 +583,6 @@ def error_norm(self, comp_cov, norm="frobenius", scaling=True, squared=True): `self` and `comp_cov` covariance estimators. """ - return error_norm_time(self.covariance_, comp_cov, norm=norm, scaling=scaling, squared=squared) + return error_norm_time( + self.covariance_, comp_cov, norm=norm, scaling=scaling, squared=squared + ) From 15997cb5c6ac4478a9a46192fa2f85ffd36af588 Mon Sep 17 00:00:00 2001 From: Federico Tomasi Date: Tue, 4 Jul 2023 10:14:13 +0200 Subject: [PATCH 2/4] support Python 3.10 --- regain/covariance/time_graphical_lasso_.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/regain/covariance/time_graphical_lasso_.py b/regain/covariance/time_graphical_lasso_.py index d71fd31..27bdc45 100644 --- a/regain/covariance/time_graphical_lasso_.py +++ b/regain/covariance/time_graphical_lasso_.py @@ -254,15 +254,15 @@ def time_graphical_lasso( rnorm=rnorm, snorm=snorm, e_pri=np.sqrt(K.size + 2 * Z_1.size) * tol - + rtol - * max( + + rtol + * max( np.sqrt(squared_norm(Z_0) + squared_norm(Z_1) + squared_norm(Z_2)), np.sqrt(squared_norm(K) + squared_norm(K[:-1]) + squared_norm(K[1:])), ), e_dual=np.sqrt(K.size + 2 * Z_1.size) * tol - + rtol - * rho - * np.sqrt(squared_norm(U_0) + squared_norm(U_1) + squared_norm(U_2)), + + rtol + * rho + * np.sqrt(squared_norm(U_0) + squared_norm(U_1) + squared_norm(U_2)), # precision=Z_0.copy() ) Z_0_old = Z_0.copy() From 4eeff4eeb09c32a4311cf92631133b286cd21316 Mon Sep 17 00:00:00 2001 From: Federico Tomasi Date: Tue, 4 Jul 2023 11:00:33 +0200 Subject: [PATCH 3/4] remove custom namedtuple and use daatclasses --- regain/bayesian/gwishart_inference.py | 59 +++++++--- regain/bayesian/wishart_process_.py | 10 +- regain/covariance/graphical_lasso_.py | 9 +- regain/covariance/infimal_convolution_.py | 13 ++- .../kernel_latent_time_graphical_lasso_.py | 9 +- .../kernel_time_graphical_lasso_.py | 11 +- regain/covariance/latent_graphical_lasso_.py | 28 +++-- .../latent_time_graphical_lasso_.py | 9 +- .../latent_time_matrix_decomposition.py | 9 +- regain/covariance/missing_graphical_lasso_.py | 16 ++- regain/covariance/time_graphical_lasso_.py | 18 ++- .../forward_backward/time_graphical_lasso_.py | 11 +- regain/generalized_linear_model/base.py | 16 ++- .../generalized_linear_model/glm_gaussian.py | 11 +- regain/generalized_linear_model/glm_ising.py | 11 +- .../generalized_linear_model/glm_poisson.py | 12 +- .../glm_time_ising.py | 12 +- .../glm_time_poisson.py | 11 +- regain/prox.py | 4 +- regain/utils.py | 103 +++++++++++++----- 20 files changed, 227 insertions(+), 155 deletions(-) diff --git a/regain/bayesian/gwishart_inference.py b/regain/bayesian/gwishart_inference.py index 1f9c0c4..e6bd164 100755 --- a/regain/bayesian/gwishart_inference.py +++ b/regain/bayesian/gwishart_inference.py @@ -39,8 +39,7 @@ from scipy import linalg from scipy.optimize import minimize from scipy.special import comb -from sklearn.covariance import empirical_covariance -from sklearn.covariance.empirical_covariance_ import log_likelihood +from sklearn.covariance import empirical_covariance, log_likelihood from sklearn.linear_model import LassoLars from sklearn.utils import Bunch, check_array from sklearn.utils.extmath import fast_logdet @@ -51,9 +50,11 @@ def mk_all_ugs(n_dim): """Utility for generating all possible graphs.""" nedges = int(comb(n_dim, 2)) - m = 2 ** nedges + m = 2**nedges - ind = np.array([list(binary_repr(x, width=len(binary_repr(m - 1)))) for x in range(m)]).astype(int) + ind = np.array( + [list(binary_repr(x, width=len(binary_repr(m - 1)))) for x in range(m)] + ).astype(int) ord = np.argsort(ind.sum(axis=1)) ind = ind[ord] @@ -98,7 +99,12 @@ def score_blankets(blankets, X, alphas=(0.01, 0.5, 1)): X_mb = np.zeros((X.shape[0], 1)) y_mb = X[:, i] - score = np.sum([LassoLars(alpha=alpha).fit(X_mb, y_mb).score(X_mb, y_mb) for alpha in alphas]) + score = np.sum( + [ + LassoLars(alpha=alpha).fit(X_mb, y_mb).score(X_mb, y_mb) + for alpha in alphas + ] + ) scores.append(score) scores_all.append(scores) @@ -109,7 +115,12 @@ def score_blankets(blankets, X, alphas=(0.01, 0.5, 1)): def _get_graphs(blankets, scores, n_dim, n_resampling=200): - idx = np.array([np.random.choice(scores.shape[1], p=scores[i], size=n_resampling) for i in range(n_dim)]) + idx = np.array( + [ + np.random.choice(scores.shape[1], p=scores[i], size=n_resampling) + for i in range(n_dim) + ] + ) graphs_ = np.array([blankets[i][idx[i]] for i in range(n_dim)]).transpose(1, 0, 2) # symmetrise with AND operator -> product @@ -268,7 +279,11 @@ def compute_score(X, G, P, S, GWprior=None, score_method="bic"): logdetHdiag = sum(np.log(-diagH)) lognormconst = dof * np.log(2 * np.pi) / 2 + logh - logdetHdiag / 2.0 - score = lognormconst - GWprior.lognormconst - n_samples * n_dim * np.log(2 * np.pi) / 2 + score = ( + lognormconst + - GWprior.lognormconst + - n_samples * n_dim * np.log(2 * np.pi) / 2 + ) GWpost.lognormconst = lognormconst elif score_method == "laplace": @@ -294,7 +309,11 @@ def compute_score(X, G, P, S, GWprior=None, score_method="bic"): # neg Hessian will be posdef logdetH = 2 * sum(np.log(np.diag(linalg.cholesky(-H)))) lognormconst = dof * np.log(2 * np.pi) / 2 + logh - logdetH / 2.0 - score = lognormconst - GWprior.lognormconst - n_samples * n_dim * np.log(2 * np.pi) / 2 + score = ( + lognormconst + - GWprior.lognormconst + - n_samples * n_dim * np.log(2 * np.pi) / 2 + ) GWpost.lognormconst = lognormconst GWpost.score = score @@ -315,7 +334,9 @@ def GWishartScore(X, G, d0=3, S0=None, score_method="bic", mode="covsel"): noData = np.zeros((0, n_dim)) P0, S_noData = GWishartFit(noData, G, GWprior) - GWtemp = compute_score(noData, G, P0, S_noData, GWprior=GWprior, score_method=score_method) + GWtemp = compute_score( + noData, G, P0, S_noData, GWprior=GWprior, score_method=score_method + ) GWprior.lognormconst = GWtemp.lognormconst # Compute the map precision matrix P @@ -344,13 +365,17 @@ def bayesian_graphical_lasso( alphas = np.logspace(-2, 0, 20) # get a series of Markov blankets for vaiours alphas - mdl = GraphicalLasso(assume_centered=assume_centered, tol=tol, max_iter=max_iter, verbose=False) + mdl = GraphicalLasso( + assume_centered=assume_centered, tol=tol, max_iter=max_iter, verbose=False + ) precisions = [mdl.set_params(alpha=a).fit(X).precision_ for a in alphas] mblankets = markov_blankets(precisions, tol=tol, unique=True) normalized_scores = score_blankets(mblankets, X=X, alphas=[0.01, 0.5, 1]) - graphs = _get_graphs(mblankets, normalized_scores, n_dim=n_dim, n_resampling=n_resampling) + graphs = _get_graphs( + mblankets, normalized_scores, n_dim=n_dim, n_resampling=n_resampling + ) nonzeros_all = [np.triu(g, 1) + np.eye(n_dim, dtype=bool) for g in graphs] @@ -361,7 +386,10 @@ def bayesian_graphical_lasso( # Find non-zero elements of upper triangle of G # make sure diagonal is non-zero # G = nonzeros_all[1] # probably can discard if all zeros? - res = [GWishartScore(X, G, d0=d0, S0=S0, mode=mode, score_method=scoring) for G in nonzeros_all] + res = [ + GWishartScore(X, G, d0=d0, S0=S0, mode=mode, score_method=scoring) + for G in nonzeros_all + ] top_n = [x.P for x in sorted(res, key=lambda x: x.score)[::-1][:top_n]] return np.mean(top_n, axis=0) @@ -439,7 +467,12 @@ def __init__( top_n=1, ): super(GraphicalLasso, self).__init__( - alpha=alpha, tol=tol, max_iter=max_iter, verbose=verbose, assume_centered=assume_centered, mode=mode + alpha=alpha, + tol=tol, + max_iter=max_iter, + verbose=verbose, + assume_centered=assume_centered, + mode=mode, ) self.alphas = alphas self.n_resampling = n_resampling diff --git a/regain/bayesian/wishart_process_.py b/regain/bayesian/wishart_process_.py index 08f54db..7713dde 100644 --- a/regain/bayesian/wishart_process_.py +++ b/regain/bayesian/wishart_process_.py @@ -34,9 +34,9 @@ import numpy as np from scipy import linalg, stats from sklearn.covariance import empirical_covariance -from sklearn.datasets.base import Bunch from sklearn.gaussian_process import kernels from sklearn.metrics.pairwise import rbf_kernel +from sklearn.utils import Bunch from sklearn.utils.validation import check_X_y from tqdm import trange @@ -333,9 +333,9 @@ def fit(self, X, y): samples_u, loglikes, lps = out # Burn in - self.lps_after_burnin = lps[self.burn_in :] - self.samples_u_after_burnin = samples_u[self.burn_in :] - self.loglikes_after_burnin = loglikes[self.burn_in :] + self.lps_after_burnin = lps[self.burn_in:] + self.samples_u_after_burnin = samples_u[self.burn_in:] + self.loglikes_after_burnin = loglikes[self.burn_in:] # % Select the best hyperparameters based on the loglikes_after_burnin pos = np.argmax(self.loglikes_after_burnin) @@ -343,7 +343,7 @@ def fit(self, X, y): self.u_map = self.samples_u_after_burnin[pos] if self.learn_ell: - self.Ls_after_burnin = Ls[self.burn_in :] + self.Ls_after_burnin = Ls[self.burn_in:] self.Lmap = self.Ls_after_burnin[pos] else: self.Lmap = L diff --git a/regain/covariance/graphical_lasso_.py b/regain/covariance/graphical_lasso_.py index e663876..bbabca1 100644 --- a/regain/covariance/graphical_lasso_.py +++ b/regain/covariance/graphical_lasso_.py @@ -44,7 +44,7 @@ from regain.norm import l1_od_norm from regain.prox import prox_logdet, soft_thresholding_off_diagonal from regain.update_rules import update_rho -from regain.utils import convergence +from regain.utils import Convergence def logl(emp_cov, precision): @@ -169,7 +169,7 @@ def graphical_lasso( obj = objective(emp_cov, K, Z, alpha) if compute_objective else np.nan rnorm = np.linalg.norm(K - Z, "fro") snorm = rho * np.linalg.norm(Z - Z_old, "fro") - check = convergence( + check = Convergence( obj=obj, rnorm=rnorm, snorm=snorm, @@ -180,10 +180,7 @@ def graphical_lasso( Z_old = Z.copy() if verbose: - print( - "obj: %.4f, rnorm: %.4f, snorm: %.4f," - "eps_pri: %.4f, eps_dual: %.4f" % check[:5] - ) + print(check) checks.append(check) if check.rnorm <= check.e_pri and check.snorm <= check.e_dual: diff --git a/regain/covariance/infimal_convolution_.py b/regain/covariance/infimal_convolution_.py index 8d78907..63df391 100644 --- a/regain/covariance/infimal_convolution_.py +++ b/regain/covariance/infimal_convolution_.py @@ -40,7 +40,7 @@ from regain.norm import l1_od_norm from regain.prox import prox_laplacian, prox_trace_indicator, soft_thresholding from regain.update_rules import update_rho -from regain.utils import convergence +from regain.utils import Convergence def objective(S, R, K, L, alpha, tau): @@ -136,24 +136,27 @@ def infimal_convolution( obj = objective(S, R, K, L, alpha, tau) if compute_objective else np.nan rnorm = np.linalg.norm(R - K + L) snorm = rho * np.linalg.norm(R - R_old) - check = convergence( + check = Convergence( obj=obj, rnorm=rnorm, snorm=snorm, - e_pri=np.sqrt(R.size) * tol + rtol * max(np.linalg.norm(R), np.linalg.norm(K - L)), + e_pri=np.sqrt(R.size) * tol + + rtol * max(np.linalg.norm(R), np.linalg.norm(K - L)), e_dual=np.sqrt(R.size) * tol + rtol * rho * np.linalg.norm(U), ) R_old = R.copy() if verbose: - print("obj: %.4f, rnorm: %.4f, snorm: %.4f," "eps_pri: %.4f, eps_dual: %.4f" % check[:5]) + print(check) checks.append(check) if check.rnorm <= check.e_pri and check.snorm <= check.e_dual: break if check.obj == np.inf: break - rho_new = update_rho(rho, rnorm, snorm, iteration=iteration_, **(update_rho_options or {})) + rho_new = update_rho( + rho, rnorm, snorm, iteration=iteration_, **(update_rho_options or {}) + ) # scaled dual variables should be also rescaled U *= rho / rho_new rho = rho_new diff --git a/regain/covariance/kernel_latent_time_graphical_lasso_.py b/regain/covariance/kernel_latent_time_graphical_lasso_.py index 9fb8ef5..c15f569 100644 --- a/regain/covariance/kernel_latent_time_graphical_lasso_.py +++ b/regain/covariance/kernel_latent_time_graphical_lasso_.py @@ -49,7 +49,7 @@ ) from regain.prox import prox_logdet, prox_trace_indicator, soft_thresholding from regain.update_rules import update_rho -from regain.utils import convergence +from regain.utils import Convergence from regain.validation import check_norm_prox @@ -311,7 +311,7 @@ def kernel_latent_time_graphical_lasso( else np.nan ) - check = convergence( + check = Convergence( obj=obj, rnorm=rnorm, snorm=snorm, @@ -360,10 +360,7 @@ def kernel_latent_time_graphical_lasso( W_M_old[m] = (W_M[m][0].copy(), W_M[m][1].copy()) if verbose: - print( - "obj: %.4f, rnorm: %.4f, snorm: %.4f," - "eps_pri: %.4f, eps_dual: %.4f" % check[:5] - ) + print(check) checks.append(check) if check.rnorm <= check.e_pri and check.snorm <= check.e_dual: diff --git a/regain/covariance/kernel_time_graphical_lasso_.py b/regain/covariance/kernel_time_graphical_lasso_.py index e52c6c1..9635e3e 100644 --- a/regain/covariance/kernel_time_graphical_lasso_.py +++ b/regain/covariance/kernel_time_graphical_lasso_.py @@ -50,7 +50,7 @@ from regain.norm import l1_od_norm from regain.prox import prox_logdet, soft_thresholding from regain.update_rules import update_rho -from regain.utils import convergence +from regain.utils import Convergence from regain.validation import check_norm_prox @@ -163,7 +163,7 @@ def kernel_time_graphical_lasso( n_samples = np.ones(n_times) checks = [ - convergence( + Convergence( obj=objective(n_samples, emp_cov, Z_0, Z_0, Z_M, alpha, kernel, psi) ) ] @@ -245,7 +245,7 @@ def kernel_time_graphical_lasso( else np.nan ) - check = convergence( + check = Convergence( obj=obj, rnorm=rnorm, snorm=snorm, @@ -283,10 +283,7 @@ def kernel_time_graphical_lasso( Z_M_old[m] = (Z_M[m][0].copy(), Z_M[m][1].copy()) if verbose: - print( - "obj: %.4f, rnorm: %.4f, snorm: %.4f," - "eps_pri: %.4f, eps_dual: %.4f" % check[:5] - ) + print(check) checks.append(check) if stop_at is not None: diff --git a/regain/covariance/latent_graphical_lasso_.py b/regain/covariance/latent_graphical_lasso_.py index 65a13df..e33d0ae 100644 --- a/regain/covariance/latent_graphical_lasso_.py +++ b/regain/covariance/latent_graphical_lasso_.py @@ -34,13 +34,15 @@ import numpy as np from scipy import linalg -from six.moves import range -from regain.covariance.graphical_lasso_ import GraphicalLasso, init_precision -from regain.covariance.graphical_lasso_ import objective as obj_gl +from regain.covariance.graphical_lasso_ import ( + GraphicalLasso, + init_precision, + objective as obj_gl, +) from regain.prox import prox_logdet, prox_trace_indicator, soft_thresholding from regain.update_rules import update_rho -from regain.utils import convergence +from regain.utils import Convergence def objective(emp_cov, R, K, L, alpha, tau): @@ -144,24 +146,27 @@ def latent_graphical_lasso( obj = objective(emp_cov, R, K, L, alpha, tau) if compute_objective else np.nan rnorm = np.linalg.norm(R - K + L) snorm = rho * np.linalg.norm(R - R_old) - check = convergence( + check = Convergence( obj=obj, rnorm=rnorm, snorm=snorm, - e_pri=np.sqrt(R.size) * tol + rtol * max(np.linalg.norm(R), np.linalg.norm(K - L)), + e_pri=np.sqrt(R.size) * tol + + rtol * max(np.linalg.norm(R), np.linalg.norm(K - L)), e_dual=np.sqrt(R.size) * tol + rtol * rho * np.linalg.norm(U), ) R_old = R.copy() if verbose: - print("obj: %.4f, rnorm: %.4f, snorm: %.4f," "eps_pri: %.4f, eps_dual: %.4f" % check[:5]) + print(check) checks.append(check) if check.rnorm <= check.e_pri and check.snorm <= check.e_dual: break if check.obj == np.inf: break - rho_new = update_rho(rho, rnorm, snorm, iteration=iteration_, **(update_rho_options or {})) + rho_new = update_rho( + rho, rnorm, snorm, iteration=iteration_, **(update_rho_options or {}) + ) # scaled dual variables should be also rescaled U *= rho / rho_new rho = rho_new @@ -293,7 +298,12 @@ def _fit(self, emp_cov): Empirical covariance of data. """ - self.precision_, self.latent_, self.covariance_, self.n_iter_ = latent_graphical_lasso( + ( + self.precision_, + self.latent_, + self.covariance_, + self.n_iter_, + ) = latent_graphical_lasso( emp_cov, alpha=self.alpha, tau=self.tau, diff --git a/regain/covariance/latent_time_graphical_lasso_.py b/regain/covariance/latent_time_graphical_lasso_.py index cbd17e4..a8a6658 100644 --- a/regain/covariance/latent_time_graphical_lasso_.py +++ b/regain/covariance/latent_time_graphical_lasso_.py @@ -41,7 +41,7 @@ ) from regain.prox import prox_logdet, prox_trace_indicator, soft_thresholding from regain.update_rules import update_rho -from regain.utils import convergence +from regain.utils import Convergence from regain.validation import check_norm_prox @@ -277,7 +277,7 @@ def latent_time_graphical_lasso( else np.nan ) - check = convergence( + check = Convergence( obj=obj, rnorm=rnorm, snorm=snorm, @@ -320,10 +320,7 @@ def latent_time_graphical_lasso( W_2_old = W_2.copy() if verbose: - print( - "obj: %.4f, rnorm: %.4f, snorm: %.4f," - "eps_pri: %.4f, eps_dual: %.4f" % check[:5] - ) + print(check) checks.append(check) if check.rnorm <= check.e_pri and check.snorm <= check.e_dual: diff --git a/regain/covariance/latent_time_matrix_decomposition.py b/regain/covariance/latent_time_matrix_decomposition.py index caba74e..c668df6 100644 --- a/regain/covariance/latent_time_matrix_decomposition.py +++ b/regain/covariance/latent_time_matrix_decomposition.py @@ -40,7 +40,7 @@ from regain.norm import l1_od_norm from regain.prox import prox_trace_indicator, soft_thresholding from regain.update_rules import update_rho -from regain.utils import convergence +from regain.utils import Convergence from regain.validation import check_input, check_norm_prox @@ -247,7 +247,7 @@ def latent_time_matrix_decomposition( else np.nan ) - check = convergence( + check = Convergence( obj=obj, rnorm=rnorm, snorm=snorm, @@ -290,10 +290,7 @@ def latent_time_matrix_decomposition( W_2_old = W_2.copy() if verbose: - print( - "obj: %.4f, rnorm: %.4f, snorm: %.4f," - "eps_pri: %.4f, eps_dual: %.4f" % check - ) + print(check) checks.append(check) if check.rnorm <= check.e_pri and check.snorm <= check.e_dual: diff --git a/regain/covariance/missing_graphical_lasso_.py b/regain/covariance/missing_graphical_lasso_.py index c7e4403..8596170 100644 --- a/regain/covariance/missing_graphical_lasso_.py +++ b/regain/covariance/missing_graphical_lasso_.py @@ -37,7 +37,6 @@ import warnings import numpy as np -from six.moves import range from regain.covariance.graphical_lasso_ import GraphicalLasso, graphical_lasso, logl @@ -52,7 +51,9 @@ def compute_empirical_covariance(X, K, cs): if np.isnan(X[i, v]) and np.isnan(X[i, s]): nans = np.where(np.isnan(X[i, :]))[0] xxm, yym = np.meshgrid(nans, nans) - inv = np.linalg.pinv(K[xxm, yym])[np.where(nans == v)[0][0], np.where(nans == s)[0][0]] + inv = np.linalg.pinv(K[xxm, yym])[ + np.where(nans == v)[0][0], np.where(nans == s)[0][0] + ] emp_cov[i, v, s] = inv + cs[i, v] * cs[i, s] else: emp_cov[i, v, s] = aux[i, v] * aux[i, s] @@ -176,7 +177,9 @@ def missing_graphical_lasso( diff = old_logl - loglik checks.append(dict(iteration=iter_, log_likelihood=logl, difference=diff)) if verbose: - print("Iter %d: log-likelihood %.4f, difference: %.4f" % (iter_, loglik, diff)) + print( + "Iter %d: log-likelihood %.4f, difference: %.4f" % (iter_, loglik, diff) + ) if np.abs(diff) < tol: break else: @@ -296,7 +299,12 @@ def fit(self, X, y=None): # X = check_array( # X, ensure_min_features=2, ensure_min_samples=2, estimator=self) - self.precision_, self.covariance_, self.complete_data_matrix_, self.n_iter_ = missing_graphical_lasso( + ( + self.precision_, + self.covariance_, + self.complete_data_matrix_, + self.n_iter_, + ) = missing_graphical_lasso( X, alpha=self.alpha, tol=self.tol, diff --git a/regain/covariance/time_graphical_lasso_.py b/regain/covariance/time_graphical_lasso_.py index 27bdc45..f5c3356 100644 --- a/regain/covariance/time_graphical_lasso_.py +++ b/regain/covariance/time_graphical_lasso_.py @@ -38,7 +38,6 @@ import numpy as np from scipy import linalg -from six.moves import map, range, zip from sklearn.covariance import empirical_covariance, log_likelihood from sklearn.utils.extmath import squared_norm from sklearn.utils.validation import check_X_y @@ -48,7 +47,7 @@ from regain.norm import l1_od_norm from regain.prox import prox_logdet, soft_thresholding from regain.update_rules import update_rho -from regain.utils import convergence, error_norm_time +from regain.utils import Convergence, error_norm_time from regain.validation import check_norm_prox @@ -57,7 +56,7 @@ def batch_log_likelihood(emp_cov, precision): return batch_logdet(precision) - np.sum(emp_cov * precision, axis=(-1, -2)) -def loss(emp_cov, precision, n_samples=None): +def loss(emp_cov, precision, n_samples: np.ndarray = None) -> float: """Loss function for time-varying graphical lasso.""" if n_samples is None: # 1 sample for each batch, ie, do not scale. @@ -67,9 +66,9 @@ def loss(emp_cov, precision, n_samples=None): return np.sum(-n_samples * batch_log_likelihood(emp_cov, precision)) -def objective(n_samples, emp_cov, K, Z_0, Z_1, Z_2, alpha, beta, psi): +def objective(n_samples, emp_cov, K, Z_0, Z_1, Z_2, alpha, beta, psi) -> float: """Objective function for time-varying graphical lasso.""" - obj = loss(emp_cov, K, n_samples=n_samples) + obj: float = loss(emp_cov, K, n_samples=n_samples) if isinstance(alpha, np.ndarray): obj += np.sum(l1_od_norm(alpha * Z_0)) @@ -177,7 +176,7 @@ def time_graphical_lasso( n_samples = np.ones(emp_cov.shape[0]) checks = [ - convergence( + Convergence( obj=objective(n_samples, emp_cov, Z_0, Z_0, Z_1, Z_2, alpha, beta, psi) ) ] @@ -249,7 +248,7 @@ def time_graphical_lasso( # Z_0 = Z_0_old # break - check = convergence( + check = Convergence( obj=obj, rnorm=rnorm, snorm=snorm, @@ -270,10 +269,7 @@ def time_graphical_lasso( Z_2_old = Z_2.copy() if verbose: - print( - "obj: %.4f, rnorm: %.4f, snorm: %.4f," - "eps_pri: %.4f, eps_dual: %.4f" % check[:5] - ) + print(check) checks.append(check) if stop_at is not None: diff --git a/regain/forward_backward/time_graphical_lasso_.py b/regain/forward_backward/time_graphical_lasso_.py index aed7e01..e79df21 100644 --- a/regain/forward_backward/time_graphical_lasso_.py +++ b/regain/forward_backward/time_graphical_lasso_.py @@ -43,8 +43,8 @@ ) from regain.norm import l1_od_norm, vector_p_norm from regain.prox import prox_FL, soft_thresholding_off_diagonal -from regain.utils import convergence from .forward_backward import _scalar_product, choose_gamma, choose_lamda, upper_diag_3d +from ..utils import Convergence def loss(S, K, n_samples=None, vareps=0): @@ -269,7 +269,7 @@ def tgl_forward_backward( max_residual = -np.inf n_linesearch = 0 - checks = [convergence(obj=obj_partial(precision=K))] + checks = [Convergence(obj=obj_partial(precision=K))] for iteration_ in range(max_iter): k_previous = K.copy() x_inv = np.array([linalg.pinvh(x) for x in K]) @@ -325,7 +325,7 @@ def tgl_forward_backward( K = K + min(max(lamda, 0), 1) * (y - K) # K, t = fista_step(Y, Y - Y_old, t) - check = convergence( + check = Convergence( obj=obj_partial(precision=K), rnorm=np.linalg.norm(upper_diag_3d(K) - upper_diag_3d(k_previous)), snorm=np.linalg.norm( @@ -341,10 +341,7 @@ def tgl_forward_backward( ) if verbose and iteration_ % (50 if verbose < 2 else 1) == 0: - print( - "obj: %.4f, rnorm: %.7f, snorm: %.4f," - "eps_pri: %.4f, eps_dual: %.4f" % check[:5] - ) + print(check) if return_history: checks.append(check) diff --git a/regain/generalized_linear_model/base.py b/regain/generalized_linear_model/base.py index c30637e..6236d50 100644 --- a/regain/generalized_linear_model/base.py +++ b/regain/generalized_linear_model/base.py @@ -1,11 +1,19 @@ -import numpy as np - from abc import ABC, abstractmethod +from dataclasses import dataclass +import numpy as np from sklearn.base import BaseEstimator -from regain.utils import namedtuple_with_defaults -convergence = namedtuple_with_defaults("convergence", "iter obj iter_norm iter_r_norm") + +@dataclass +class Convergence: + iter: float = 0 + obj: float = 0 + iter_norm: float = 0 + iter_r_norm: float = 0 + + def __str__(self): + return f"Iter: {self.iter}, objective: {self.obj:.4f}, iter_norm {self.iter_norm:.4f}, iter_norm_normalized: {self.iter_r_norm:.4f}" def build_adjacency_matrix(neighbours, how="union"): diff --git a/regain/generalized_linear_model/glm_gaussian.py b/regain/generalized_linear_model/glm_gaussian.py index aee805d..e476243 100644 --- a/regain/generalized_linear_model/glm_gaussian.py +++ b/regain/generalized_linear_model/glm_gaussian.py @@ -32,9 +32,9 @@ from sklearn.utils import check_array from regain.generalized_linear_model.base import ( + Convergence, GLM_GM, build_adjacency_matrix, - convergence, ) from regain.prox import soft_thresholding @@ -74,7 +74,7 @@ def gradient(X, theta, r, selector, n): theta = soft_thresholding(theta_new, alpha * gamma) thetas.append(theta) - check = convergence( + check = Convergence( iter=iter_, obj=objective(X, theta, n, ix, selector, alpha), iter_norm=np.linalg.norm(thetas[-2] - thetas[-1]), @@ -85,12 +85,9 @@ def gradient(X, theta, r, selector, n): checks.append(check) # if adjust_gamma: # TODO multiply or divide if verbose: - print( - "Iter: %d, objective: %.4f, iter_norm %.4f" - % (check[0], check[1], check[2]) - ) + print(check) - if check[-2] < tol: + if check.iter_norm < tol: break return_list = [thetas[-1]] diff --git a/regain/generalized_linear_model/glm_ising.py b/regain/generalized_linear_model/glm_ising.py index c2999fe..efab1cc 100644 --- a/regain/generalized_linear_model/glm_ising.py +++ b/regain/generalized_linear_model/glm_ising.py @@ -35,9 +35,9 @@ from sklearn.utils import check_array from regain.generalized_linear_model.base import ( + Convergence, GLM_GM, build_adjacency_matrix, - convergence, ) from regain.norm import l1_od_norm from regain.prox import soft_thresholding_off_diagonal @@ -147,7 +147,7 @@ def _fit( with warnings.catch_warnings(): warnings.simplefilter("ignore") - check = convergence( + check = Convergence( iter=iter_, obj=objective(X, theta, alpha), iter_norm=np.linalg.norm(thetas[-2] - thetas[-1]), @@ -158,12 +158,9 @@ def _fit( checks.append(check) # if adjust_gamma: # TODO multiply or divide if verbose: - print( - "Iter: %d, objective: %.4f, iter_norm %.4f" - % (check[0], check[1], check[2]) - ) + print(check) - if np.abs(check[2]) < tol: + if np.abs(check.iter_norm) < tol: break return_list = [thetas[-1]] diff --git a/regain/generalized_linear_model/glm_poisson.py b/regain/generalized_linear_model/glm_poisson.py index 7d204e2..2b577de 100644 --- a/regain/generalized_linear_model/glm_poisson.py +++ b/regain/generalized_linear_model/glm_poisson.py @@ -33,9 +33,9 @@ from sklearn.utils import check_array from regain.generalized_linear_model.base import ( + Convergence, GLM_GM, build_adjacency_matrix, - convergence, ) from regain.norm import l1_od_norm from regain.prox import soft_thresholding @@ -124,7 +124,7 @@ def gradient(X, theta, r, selector, n, A, T, rho): break thetas.append(theta) if iter_ > 0: - check = convergence( + check = Convergence( iter=iter_, obj=objective_single_variable(X, theta, n, ix, selector, alpha), iter_norm=np.linalg.norm(thetas[-2] - thetas[-1]), @@ -135,13 +135,9 @@ def gradient(X, theta, r, selector, n, A, T, rho): checks.append(check) # if adjust_gamma: # TODO multiply or divide if verbose: - print( - "Iter: %d, objective: %.4f, iter_norm %.4f," - " iter_norm_normalized: %.4f" - % (check[0], check[1], check[2], check[3]) - ) + print(check) - if np.abs(check[2]) < tol: + if np.abs(check.iter_norm) < tol: break return_list = [thetas[-1]] diff --git a/regain/generalized_linear_model/glm_time_ising.py b/regain/generalized_linear_model/glm_time_ising.py index 648a2c2..e9c3e97 100644 --- a/regain/generalized_linear_model/glm_time_ising.py +++ b/regain/generalized_linear_model/glm_time_ising.py @@ -31,7 +31,6 @@ import warnings import numpy as np -from six.moves import map, range, zip from sklearn.base import BaseEstimator from sklearn.cluster import AgglomerativeClustering from sklearn.gaussian_process import kernels @@ -42,7 +41,7 @@ from regain.generalized_linear_model.glm_ising import _fit, loss from regain.norm import l1_od_norm from regain.update_rules import update_rho -from regain.utils import convergence +from regain.utils import Convergence from regain.validation import check_norm_prox @@ -157,7 +156,7 @@ def _fit_time_ising_model( Z_R_old = np.zeros_like(Z_R) Z_M_old[m] = (Z_L_old, Z_R_old) - checks = [convergence(obj=objective(X, K, Z_M, alpha, kernel, psi))] + checks = [Convergence(obj=objective(X, K, Z_M, alpha, kernel, psi))] for iteration_ in range(max_iter): # update K @@ -232,7 +231,7 @@ def _fit_time_ising_model( obj = objective(X, K, Z_M, alpha, kernel, psi) if compute_objective else np.nan - check = convergence( + check = Convergence( obj=obj, rnorm=rnorm, snorm=snorm, @@ -267,10 +266,7 @@ def _fit_time_ising_model( Z_M_old[m] = (Z_M[m][0].copy(), Z_M[m][1].copy()) if verbose: - print( - "obj: %.4f, rnorm: %.4f, snorm: %.4f," - "eps_pri: %.4f, eps_dual: %.4f" % check[:5] - ) + print(check) checks.append(check) if stop_at is not None: diff --git a/regain/generalized_linear_model/glm_time_poisson.py b/regain/generalized_linear_model/glm_time_poisson.py index aaaa3c3..b7fefea 100644 --- a/regain/generalized_linear_model/glm_time_poisson.py +++ b/regain/generalized_linear_model/glm_time_poisson.py @@ -42,7 +42,7 @@ from regain.generalized_linear_model.glm_poisson import fit_each_variable, loss from regain.norm import l1_od_norm from regain.update_rules import update_rho -from regain.utils import convergence +from regain.utils import Convergence from regain.validation import check_norm_prox @@ -152,7 +152,7 @@ def _fit_time_poisson_model( Z_R_old = np.zeros_like(Z_R) Z_M_old[m] = (Z_L_old, Z_R_old) - checks = [convergence(obj=objective(X, K, Z_M, alpha, kernel, psi))] + checks = [Convergence(obj=objective(X, K, Z_M, alpha, kernel, psi))] for iteration_ in range(max_iter): # update K A = np.zeros_like(K) @@ -227,7 +227,7 @@ def _fit_time_poisson_model( obj = objective(X, K, Z_M, alpha, kernel, psi) if compute_objective else np.nan - check = convergence( + check = Convergence( obj=obj, rnorm=rnorm, snorm=snorm, @@ -262,10 +262,7 @@ def _fit_time_poisson_model( Z_M_old[m] = (Z_M[m][0].copy(), Z_M[m][1].copy()) if verbose: - print( - "obj: %.4f, rnorm: %.4f, snorm: %.4f," - "eps_pri: %.4f, eps_dual: %.4f" % check[:5] - ) + print(check) checks.append(check) if stop_at is not None: diff --git a/regain/prox.py b/regain/prox.py index 7c5e164..9e1e132 100644 --- a/regain/prox.py +++ b/regain/prox.py @@ -37,7 +37,7 @@ from regain.math import create_from_diagonal, fill_diagonal, get_diagonal from regain.update_rules import update_rho -from regain.utils import convergence +from regain.utils import Convergence try: from prox_tv import tv1_1d, tvp_1d, tvgen, tvp_2d @@ -209,7 +209,7 @@ def prox_node_penalty(A_12, lamda, rho=1, tol=1e-4, rtol=1e-2, max_iter=500): snorm = rho * np.sqrt( squared_norm(W - W_old) + squared_norm(V + W - V_old - W_old) ) - check = convergence( + check = Convergence( obj=np.nan, rnorm=rnorm, snorm=snorm, diff --git a/regain/utils.py b/regain/utils.py index 76dfdb0..20c82b6 100644 --- a/regain/utils.py +++ b/regain/utils.py @@ -31,13 +31,13 @@ """Utils for REGAIN package.""" from __future__ import division -import collections import functools import logging import os import sys import warnings from contextlib import contextmanager +from dataclasses import dataclass import numpy as np import six @@ -48,12 +48,19 @@ from sklearn.metrics import average_precision_score, matthews_corrcoef -def display_topics(H, W, feature_names, documents, n_top_words, n_top_documents, print_docs=True): +def display_topics( + H, W, feature_names, documents, n_top_words, n_top_documents, print_docs=True +): """Display topics of LDA.""" topics = [] for topic_idx, topic in enumerate(H): topics.append( - " ".join([feature_names[i] + " (%.3f)" % topic[i] for i in topic.argsort()[: -n_top_words - 1 : -1]]) + " ".join( + [ + feature_names[i] + " (%.3f)" % topic[i] + for i in topic.argsort()[: -n_top_words - 1 : -1] + ] + ) ) print("Topic %d: %s" % (topic_idx, topics[-1])) @@ -67,7 +74,9 @@ def display_topics(H, W, feature_names, documents, n_top_words, n_top_documents, def logentropy_normalize(X): """Log-entropy normalisation.""" P = X / X.values.sum(axis=0, keepdims=True) - E = 1 + (P * np.log(P)).fillna(0).values.sum(axis=0, keepdims=True) / np.log1p(X.shape[0]) + E = 1 + (P * np.log(P)).fillna(0).values.sum(axis=0, keepdims=True) / np.log1p( + X.shape[0] + ) return E * np.log1p(X) @@ -87,18 +96,20 @@ def retain_top_n(arr, n): return mask_array -def namedtuple_with_defaults(typename, field_names, default_values=()): - T = collections.namedtuple(typename, field_names) - T.__new__.__defaults__ = (None,) * len(T._fields) - if isinstance(default_values, collections.Mapping): - prototype = T(**default_values) - else: - prototype = T(*default_values) - T.__new__.__defaults__ = tuple(prototype) - return T +@dataclass +class Convergence: + obj: float = 0 + rnorm: float = 0 + snorm: float = 0 + e_pri: float = 0 + e_dual: float = 0 + precision: float = 0 - -convergence = namedtuple_with_defaults("convergence", "obj rnorm snorm e_pri e_dual precision") + def __str__(self): + return ( + f"obj: {self.obj:.4f}, rnorm: {self.rnorm:.4f}, snorm: {self.snorm:.4f}," + f"eps_pri: {self.e_pri:.4f}, eps_dual: {self.e_dual:.4f}" + ) @contextmanager @@ -123,7 +134,11 @@ def _ensure_filename_ending(filename, possible_extensions=".txt"): if isinstance(possible_extensions, six.string_types): possible_extensions = [possible_extensions] - return filename + ("" if any(filename.endswith(end) for end in possible_extensions) else possible_extensions[0]) + return filename + ( + "" + if any(filename.endswith(end) for end in possible_extensions) + else possible_extensions[0] + ) def init_logger(filename, verbose=True): @@ -141,11 +156,16 @@ def init_logger(filename, verbose=True): _.close() logging.basicConfig( - filename=logfile, level=logging.INFO, filemode="w", format="%(levelname)s (%(asctime)-15s): %(message)s" + filename=logfile, + level=logging.INFO, + filemode="w", + format="%(levelname)s (%(asctime)-15s): %(message)s", ) stream_handler = logging.StreamHandler() stream_handler.setLevel(logging.INFO if verbose else logging.ERROR) - stream_handler.setFormatter(logging.Formatter("%(levelname)s (%(asctime)-15s): %(message)s")) + stream_handler.setFormatter( + logging.Formatter("%(levelname)s (%(asctime)-15s): %(message)s") + ) root_logger.addHandler(stream_handler) return logfile @@ -170,7 +190,9 @@ def write_network(dataframe, filename): dataframe.stack().to_csv(filename) -def read_network(filename, threshold=1.0, full_network=True, fill_diagonal=True, delimiter="auto"): +def read_network( + filename, threshold=1.0, full_network=True, fill_diagonal=True, delimiter="auto" +): """Read a network from a list of interactions. Parameters @@ -229,7 +251,11 @@ def read_network(filename, threshold=1.0, full_network=True, fill_diagonal=True, def flatten(lst): """Flatten a list.""" - return [y for l in lst for y in flatten(l)] if isinstance(lst, (list, np.ndarray)) else [lst] + return ( + [y for l in lst for y in flatten(l)] + if isinstance(lst, (list, np.ndarray)) + else [lst] + ) def upper_to_full(a): @@ -258,7 +284,11 @@ def convert_data_to_2d(data): to the class is encoded in y. """ X = np.vstack(data) - y = np.array([np.ones(x.shape[0]) * i for i, x in enumerate(data)]).flatten().astype(int) + y = ( + np.array([np.ones(x.shape[0]) * i for i, x in enumerate(data)]) + .flatten() + .astype(int) + ) return X, y @@ -285,7 +315,14 @@ def normalize_matrix(x): def error_norm( - cov, comp_cov, norm="frobenius", scaling=True, squared=True, upper_triangular=False, nonzero=False, n=False + cov, + comp_cov, + norm="frobenius", + scaling=True, + squared=True, + upper_triangular=False, + nonzero=False, + n=False, ): """Mean Squared Error between two covariance estimators. @@ -334,7 +371,7 @@ def error_norm( error = comp_cov - cov # compute the error norm if norm == "frobenius": - squared_norm = np.sum(error ** 2) + squared_norm = np.sum(error**2) elif norm == "spectral": squared_norm = np.amax(np.linalg.svdvals(np.dot(error.T, error))) else: @@ -344,7 +381,10 @@ def error_norm( scaling_factor = ( error.shape[0] if len(error.shape) < 3 - else (np.prod(error.shape[:2]) * ((error.shape[1] - 1) / 2.0 if upper_triangular else error.shape[1])) + else ( + np.prod(error.shape[:2]) + * ((error.shape[1] - 1) / 2.0 if upper_triangular else error.shape[1]) + ) ) squared_norm = squared_norm / scaling_factor # finally get either the squared norm or the norm @@ -384,7 +424,12 @@ def error_norm_time(cov, comp_cov, norm="frobenius", scaling=True, squared=True) `self` and `comp_cov` covariance estimators. """ - return np.mean([error_norm(x, y, norm=norm, scaling=scaling, squared=squared) for x, y in zip(cov, comp_cov)]) + return np.mean( + [ + error_norm(x, y, norm=norm, scaling=scaling, squared=squared) + for x, y in zip(cov, comp_cov) + ] + ) def structure_error(true, pred, thresholding=False, eps=1e-2, no_diagonal=False): @@ -457,12 +502,16 @@ def structure_error(true, pred, thresholding=False, eps=1e-2, no_diagonal=False) balanced_accuracy = 0.5 * (recall + specificity) false_discovery_rate = FP / (TP + FP) if TP + FP > 0 else 1 - precision false_omission_rate = FN / (FN + TN) if FN + TN > 0 else 0 - negative_predicted_value = TN / (FN + TN) if FN + TN > 0 else 1 - false_omission_rate + negative_predicted_value = ( + TN / (FN + TN) if FN + TN > 0 else 1 - false_omission_rate + ) positive_likelihood_ratio = recall / fall_out if fall_out > 0 else 0 negative_likelihood_ratio = miss_rate / specificity if specificity > 0 else 0 diagnostic_odds_ratio = ( - positive_likelihood_ratio / negative_likelihood_ratio if negative_likelihood_ratio > 0 else 0 + positive_likelihood_ratio / negative_likelihood_ratio + if negative_likelihood_ratio > 0 + else 0 ) dictionary = dict( From 28f39ba8a6a42e26f57c09ac1966f3e6f255c261 Mon Sep 17 00:00:00 2001 From: Federico Tomasi Date: Tue, 4 Jul 2023 11:22:54 +0200 Subject: [PATCH 4/4] =?UTF-8?q?Bump=20version:=200.3.8=20=E2=86=92=200.3.9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- regain/__init__.py | 2 +- setup.cfg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/regain/__init__.py b/regain/__init__.py index eda4d7d..a4fa9ba 100644 --- a/regain/__init__.py +++ b/regain/__init__.py @@ -28,4 +28,4 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -__version__ = "0.3.8" +__version__ = "0.3.9" diff --git a/setup.cfg b/setup.cfg index 075c766..1dc1f16 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.3.8 +current_version = 0.3.9 commit = True tag = True