Skip to content

Commit

Permalink
Merge pull request #38 from fdtomasi/efficient_batching
Browse files Browse the repository at this point in the history
add tests for glm
  • Loading branch information
fdtomasi authored Apr 24, 2023
2 parents de427e2 + 5b4f9c8 commit 82b7767
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 36 deletions.
12 changes: 6 additions & 6 deletions regain/covariance/time_graphical_lasso_.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ def batch_log_likelihood(emp_cov, precision):

def loss(emp_cov, precision, n_samples=None):
"""Loss function for time-varying graphical lasso."""
batch_dim = emp_cov.shape[0]
if n_samples is None:
# 1 sample for each batch, ie, do not scale.
batch_dim = emp_cov.shape[0]
n_samples = np.ones(batch_dim)

return np.sum(-n_samples * batch_log_likelihood(emp_cov, precision))
Expand Down Expand Up @@ -254,15 +254,15 @@ def time_graphical_lasso(
rnorm=rnorm,
snorm=snorm,
e_pri=np.sqrt(K.size + 2 * Z_1.size) * tol
+ rtol
* max(
+ rtol
* max(
np.sqrt(squared_norm(Z_0) + squared_norm(Z_1) + squared_norm(Z_2)),
np.sqrt(squared_norm(K) + squared_norm(K[:-1]) + squared_norm(K[1:])),
),
e_dual=np.sqrt(K.size + 2 * Z_1.size) * tol
+ rtol
* rho
* np.sqrt(squared_norm(U_0) + squared_norm(U_1) + squared_norm(U_2)),
+ rtol
* rho
* np.sqrt(squared_norm(U_0) + squared_norm(U_1) + squared_norm(U_2)),
# precision=Z_0.copy()
)
Z_0_old = Z_0.copy()
Expand Down
40 changes: 29 additions & 11 deletions regain/generalized_linear_model/glm_poisson.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,16 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import numpy as np
from sklearn.utils import check_array
from sklearn.base import BaseEstimator
from sklearn.utils import check_array

from regain.generalized_linear_model.base import GLM_GM, convergence
from regain.generalized_linear_model.base import build_adjacency_matrix
from regain.prox import soft_thresholding
from regain.generalized_linear_model.base import (
GLM_GM,
build_adjacency_matrix,
convergence,
)
from regain.norm import l1_od_norm
from regain.prox import soft_thresholding


def loss_single_variable(X, theta, n, r, selector):
Expand Down Expand Up @@ -125,14 +128,17 @@ def gradient(X, theta, r, selector, n, A, T, rho):
iter=iter_,
obj=objective_single_variable(X, theta, n, ix, selector, alpha),
iter_norm=np.linalg.norm(thetas[-2] - thetas[-1]),
iter_r_norm=(np.linalg.norm(thetas[-2] - thetas[-1]) / np.linalg.norm(thetas[-2])),
iter_r_norm=(
np.linalg.norm(thetas[-2] - thetas[-1]) / np.linalg.norm(thetas[-2])
),
)
checks.append(check)
# if adjust_gamma: # TODO multiply or divide
if verbose:
print(
"Iter: %d, objective: %.4f, iter_norm %.4f,"
" iter_norm_normalized: %.4f" % (check[0], check[1], check[2], check[3])
" iter_norm_normalized: %.4f"
% (check[0], check[1], check[2], check[3])
)

if np.abs(check[2]) < tol:
Expand Down Expand Up @@ -228,7 +234,14 @@ def __init__(
compute_objective=True,
):
super(PoissonGraphicalModel, self).__init__(
alpha, tol, rtol, max_iter, verbose, return_history, return_n_iter, compute_objective
alpha,
tol,
rtol,
max_iter,
verbose,
return_history,
return_n_iter,
compute_objective,
)
self.reconstruction = reconstruction
self.mode = mode
Expand All @@ -251,21 +264,26 @@ def fit(self, X, y=None):
raise ValueError("Not implemented.")

elif self.mode.lower() == "coordinate_descent":
print("sono qui")
thetas_pred = []
historys = []
if self.intercept:
X = np.hstack((X, np.ones((X.shape[0], 1))))
for ix in range(X.shape[1]):
verbose = max(0, self.verbose - 1)
res = fit_each_variable(X, ix, self.alpha, tol=self.tol, verbose=verbose)
res = fit_each_variable(
X, ix, self.alpha, tol=self.tol, verbose=verbose
)
thetas_pred.append(res[0])
historys.append(res[1:])
self.precision_ = build_adjacency_matrix(thetas_pred, how=self.reconstruction)
self.precision_ = build_adjacency_matrix(
thetas_pred, how=self.reconstruction
)
self.history = historys
else:
raise ValueError(
"Unknown optimization mode. Found " + self.mode + ". Options are 'coordiante_descent', "
"Unknown optimization mode. Found "
+ self.mode
+ ". Options are 'coordiante_descent', "
"'symmetric_fbs'"
)
return self
Expand Down
27 changes: 11 additions & 16 deletions regain/generalized_linear_model/glm_time_ising.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,37 +31,36 @@
import warnings

import numpy as np

from six.moves import map, range, zip

from sklearn.base import BaseEstimator
from sklearn.utils.extmath import squared_norm
from sklearn.utils.validation import check_X_y
from sklearn.cluster import AgglomerativeClustering
from sklearn.gaussian_process import kernels

from sklearn.utils.validation import check_is_fitted
from sklearn.utils.extmath import squared_norm
from sklearn.utils.validation import check_X_y, check_is_fitted

from regain.covariance.time_graphical_lasso_ import init_precision
from regain.generalized_linear_model.glm_ising import _fit, loss
from regain.norm import l1_od_norm
from regain.utils import convergence
from regain.update_rules import update_rho
from regain.utils import convergence
from regain.validation import check_norm_prox


def loss_ising(X, K, n_samples=None):
def loss_ising(X, precision, n_samples=None):
"""Loss function for time-varying ising model."""
if n_samples is None:
n_samples = np.ones(X.shape[0])
return sum(-ni * loss(x, k) for x, k, ni in zip(X, K, n_samples))
# 1 sample for each batch, ie, do not scale.
batch_dim = X.shape[0]
n_samples = np.ones(batch_dim)

return sum(-ni * loss(x, k) for x, k, ni in zip(X, precision, n_samples))


def objective(X, K, Z_M, alpha, kernel, psi):
"""Objective function for time-varying ising model."""

obj = loss_ising(X, K)
obj += alpha * sum(map(l1_od_norm, K))
obj += alpha * np.sum(l1_od_norm(K))

for m in range(1, K.shape[0]):
# all possible non markovians jumps
Expand Down Expand Up @@ -172,8 +171,6 @@ def _fit_time_ising_model(
A /= 2.0
# K_new = np.zeros_like(K)

print(K.shape)

for t in range(n_times):
K[t, :, :] = _fit(
X=X[t, :, :],
Expand Down Expand Up @@ -509,7 +506,7 @@ def fit(self, X, y):
self.classes_[:, None]
)
else:
kernel = self.kernel
kernel = self.kernel or np.identity(self.classes_.size)
if kernel.shape[0] != self.classes_.size:
raise ValueError(
"Kernel size does not match classes of samples, "
Expand Down Expand Up @@ -579,7 +576,6 @@ def precision_similarity(precision, psi=None):
distances = squareform(
[l1_od_norm(t1 - t2) for t1, t2 in combinations(precision, 2)]
)
print(distances)
distances /= np.max(distances)
return 1 - distances

Expand Down Expand Up @@ -750,7 +746,6 @@ def fit(self, X, y):
kernel = kernels.RBF(0.0001)(labels_pred[:, None]) + kernels.RBF(
self.beta
)(np.arange(n_times)[:, None])
print(kernel)
labels_pred_old = labels_pred

else:
Expand Down
5 changes: 2 additions & 3 deletions regain/generalized_linear_model/glm_time_poisson.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import warnings

import numpy as np
from six.moves import map, range, zip
from sklearn.base import BaseEstimator
from sklearn.cluster import AgglomerativeClustering
from sklearn.gaussian_process import kernels
Expand All @@ -58,7 +57,7 @@ def objective(X, K, Z_M, alpha, kernel, psi):
"""Objective function for time-varying poisson model."""

obj = loss_poisson(X, K)
obj += alpha * sum(map(l1_od_norm, K))
obj += alpha * np.sum(l1_od_norm(K))

for m in range(1, K.shape[0]):
# all possible non markovians jumps
Expand Down Expand Up @@ -501,7 +500,7 @@ def fit(self, X, y):
self.classes_[:, None]
)
else:
kernel = self.kernel
kernel = self.kernel or np.identity(self.classes_.size)
if kernel.shape[0] != self.classes_.size:
raise ValueError(
"Kernel size does not match classes of samples, "
Expand Down
104 changes: 104 additions & 0 deletions regain/tests/test_generalized_linear_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# BSD 3-Clause License

# Copyright (c) 2019, regain authors
# All rights reserved.

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:

# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.

# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.

# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""Test GLM."""
import warnings

import numpy as np
from numpy.testing import assert_array_equal

from regain.generalized_linear_model.glm_gaussian import Gaussian_GLM_GM
from regain.generalized_linear_model.glm_ising import IsingGraphicalModel
from regain.generalized_linear_model.glm_poisson import PoissonGraphicalModel
from regain.generalized_linear_model.glm_time_ising import TemporalIsingModel
from regain.generalized_linear_model.glm_time_poisson import TemporalPoissonModel
from regain.math import fill_diagonal


def test_glmg_zero():
"""Check that Gaussian_GLM_GM can handle zero data."""
x = np.zeros((9, 3))

with warnings.catch_warnings():
warnings.simplefilter("ignore")
mdl = Gaussian_GLM_GM(max_iter=1).fit(x)

fill_diagonal(mdl.precision_, 0)
assert_array_equal(mdl.precision_, np.zeros((3, 3)))


def test_glmi_zero():
"""Check that IsingGraphicalModel can handle zero data."""
x = np.zeros((9, 3))

with warnings.catch_warnings():
warnings.simplefilter("ignore")
mdl = IsingGraphicalModel(max_iter=1).fit(x)

fill_diagonal(mdl.precision_, 0)
assert_array_equal(mdl.precision_, np.zeros((3, 3)))


def test_glmp_zero():
"""Check that PoissonGraphicalModel can handle zero data."""
x = np.zeros((9, 3))

with warnings.catch_warnings():
warnings.simplefilter("ignore")
mdl = PoissonGraphicalModel(max_iter=1).fit(x)

fill_diagonal(mdl.precision_, 0)
assert_array_equal(mdl.precision_, np.zeros((3, 3)))


def test_glmtp_zero():
"""Check that TemporalPoissonModel can handle zero data."""
x = np.zeros((9, 3))
y = [0, 0, 0, 1, 1, 1, 2, 2, 2]

with warnings.catch_warnings():
warnings.simplefilter("ignore")
mdl = TemporalPoissonModel(max_iter=1).fit(x, y)

fill_diagonal(mdl.precision_, 0)
assert_array_equal(mdl.precision_, np.zeros((3, 3, 3)))


def test_glmti_zero():
"""Check that TemporalIsingModel can handle zero data."""
x = np.zeros((9, 3))
x[0][0] = 1
y = [0, 0, 0, 1, 1, 1, 2, 2, 2]

with warnings.catch_warnings():
warnings.simplefilter("ignore")
mdl = TemporalIsingModel(max_iter=1).fit(x, y)

fill_diagonal(mdl.precision_, 0)
assert_array_equal(mdl.precision_, np.zeros((3, 3, 3)))

0 comments on commit 82b7767

Please sign in to comment.