Skip to content

Commit

Permalink
Resolve format issue (#352)
Browse files Browse the repository at this point in the history
# Pull Request

## What problem does this PR solve?

Issue Number: Fixed #

## Possible side effects?

- Performance:

- Backward compatibility:
  • Loading branch information
anakinxc authored Sep 18, 2023
1 parent 7b62f3a commit 977e932
Show file tree
Hide file tree
Showing 7 changed files with 225 additions and 133 deletions.
90 changes: 57 additions & 33 deletions sml/glm/glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,23 @@
from utils.link import *
import warnings
import os

DEBUG = 0


# Define the _GeneralizedLinearRegressor class using JAX
class _GeneralizedLinearRegressor:
def __init__(self,
fit_intercept=True, # Whether to fit the intercept term, default is True
alpha=0, # L2 regularization strength, default is 0 (no regularization)
solver="newton-cholesky", # Optimization algorithm, default is Newton-Cholesky
max_iter=20, # Maximum number of iterations, default is 20
warm_start=False, # Whether to use warm start, default is False
n_threads=None, # Deprecated parameter (no longer used)
tol=None, # Deprecated parameter (no longer used)
verbose=0 # Level of verbosity, default is 0 (no output)
):
def __init__(
self,
fit_intercept=True, # Whether to fit the intercept term, default is True
alpha=0, # L2 regularization strength, default is 0 (no regularization)
solver="newton-cholesky", # Optimization algorithm, default is Newton-Cholesky
max_iter=20, # Maximum number of iterations, default is 20
warm_start=False, # Whether to use warm start, default is False
n_threads=None, # Deprecated parameter (no longer used)
tol=None, # Deprecated parameter (no longer used)
verbose=0, # Level of verbosity, default is 0 (no output)
):
"""
Initialize the generalized linear regression model.
Expand Down Expand Up @@ -51,11 +54,19 @@ def __init__(self,
self.warm_start = warm_start
self.verbose = verbose
if n_threads:
warnings.warn("SPU does not need n_threads.", category=DeprecationWarning, stacklevel=2)
warnings.warn(
"SPU does not need n_threads.",
category=DeprecationWarning,
stacklevel=2,
)
if warm_start:
warnings.warn("Using minibatch in the second optimizer may cause problems.")
if tol:
warnings.warn("SPU does not support early stop.", category=DeprecationWarning, stacklevel=2)
warnings.warn(
"SPU does not support early stop.",
category=DeprecationWarning,
stacklevel=2,
)

def fit(self, X, y, sample_weight=None):
if sample_weight is None:
Expand All @@ -70,7 +81,10 @@ def fit(self, X, y, sample_weight=None):
if not self.warm_start or not hasattr(self, "coef_"):
self.coef_ = None
if self.solver == "lbfgs":
warnings.warn("LBFGS algorithm cannot be accurately implemented on SPU platform, only approximate implementation is available.", UserWarning)
warnings.warn(
"LBFGS algorithm cannot be accurately implemented on SPU platform, only approximate implementation is available.",
UserWarning,
)
self._fit_lbfgs(X, y)
elif self.solver == "newton-cholesky":
self._fit_newton_cholesky(X, y)
Expand All @@ -85,22 +99,26 @@ def _get_link(self):

def _fit_newton_cholesky(self, X, y):
# Use the NewtonCholeskySolver class to implement the Newton-Cholesky optimization algorithm
solver = NewtonCholeskySolver(loss_model=self.loss_model,
l2_reg_strength=self.l2_reg_strength,
max_iter=self.max_iter,
verbose=self.verbose,
link=self.link_model,
coef=self.coef_)
solver = NewtonCholeskySolver(
loss_model=self.loss_model,
l2_reg_strength=self.l2_reg_strength,
max_iter=self.max_iter,
verbose=self.verbose,
link=self.link_model,
coef=self.coef_,
)
self.coef_ = solver.solve(X, y)

def _fit_lbfgs(self, X, y):
# Use the LBFGSSolver class to implement the Newton-Cholesky optimization algorithm
solver = LBFGSSolver(loss_model=self.loss_model,
max_iter=self.max_iter,
l2_reg_strength=self.l2_reg_strength,
verbose=self.verbose,
link=self.link_model,
coef=self.coef_)
solver = LBFGSSolver(
loss_model=self.loss_model,
max_iter=self.max_iter,
l2_reg_strength=self.l2_reg_strength,
verbose=self.verbose,
link=self.link_model,
coef=self.coef_,
)
self.coef_ = solver.solve(X, y)

def predict(self, X):
Expand All @@ -117,20 +135,22 @@ def score(self, X, y, sample_weight=None):

# Calculate the model's predictions
prediction = self.predict(X)
squared_error = lambda y_true, prediction: jnp.mean(
(y_true - prediction)**2)
squared_error = lambda y_true, prediction: jnp.mean((y_true - prediction) ** 2)
# Calculate the model's deviance
deviance = squared_error(y_true=y, prediction=prediction)
# Calculate the null deviance
deviance_null = squared_error(y_true=y,
prediction=jnp.tile(
jnp.average(y), y.shape[0]))
deviance_null = squared_error(
y_true=y, prediction=jnp.tile(jnp.average(y), y.shape[0])
)
# Calculate D^2
d2 = 1 - (deviance) / (deviance_null)
return d2

def _check_solver_support(self):
supported_solvers = ["lbfgs", "newton-cholesky"] # List of supported optimization algorithms
supported_solvers = [
"lbfgs",
"newton-cholesky",
] # List of supported optimization algorithms
if self.solver not in supported_solvers:
raise ValueError(
f"Invalid solver={self.solver}. Supported solvers are {supported_solvers}."
Expand All @@ -143,6 +163,7 @@ class PoissonRegressor(_GeneralizedLinearRegressor):
This regressor uses the 'log' link function.
"""

def _get_loss(self):
return HalfPoissonLoss()

Expand All @@ -158,6 +179,7 @@ def _get_loss(self):
def _get_link(self):
return LogLink()


# The TweedieRegressor class represents a generalized linear model with Tweedie distribution using JAX.
class TweedieRegressor(_GeneralizedLinearRegressor):
def __init__(
Expand All @@ -166,11 +188,13 @@ def __init__(
):
super().__init__()
# Ensure that the power is within the valid range for the Tweedie distribution
assert(power>=0 and power<=3)
assert power >= 0 and power <= 3
self.power = power

def _get_loss(self):
return HalfTweedieLoss(self.power, )
return HalfTweedieLoss(
self.power,
)

def _get_link(self):
if self.power > 0:
Expand Down
15 changes: 11 additions & 4 deletions sml/glm/glm_emul.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,16 @@
sys.path.append('../../')
import sml.utils.emulation as emulation
import spu.utils.distributed as ppd
from glm import _GeneralizedLinearRegressor, PoissonRegressor, GammaRegressor, TweedieRegressor
from glm import (
_GeneralizedLinearRegressor,
PoissonRegressor,
GammaRegressor,
TweedieRegressor,
)

n_samples, n_features = 100, 5


def generate_data(noise=False):
"""
Generate random data for testing.
Expand Down Expand Up @@ -39,8 +46,10 @@ def generate_data(noise=False):
sample_weight = np.random.rand(n_samples)
return X, y, coef, sample_weight


X, y, coef, sample_weight = generate_data()


def emul_SGDClassifier(mode: emulation.Mode.MULTIPROCESS, num=10):
"""
Execute the encrypted SGD classifier in a simulation environment and output the results.
Expand Down Expand Up @@ -85,9 +94,7 @@ def proc_ncSolver(X, y):
# Specify the file paths for cluster and dataset
CLUSTER_ABY3_3PC = os.path.join('../../', emulation.CLUSTER_ABY3_3PC)
# Create the emulator with specified mode and bandwidth/latency settings
emulator = emulation.Emulator(
CLUSTER_ABY3_3PC, mode, bandwidth=300, latency=20
)
emulator = emulation.Emulator(CLUSTER_ABY3_3PC, mode, bandwidth=300, latency=20)
emulator.up()

# Run the proc_ncSolver function using both plaintext and encrypted data
Expand Down
109 changes: 65 additions & 44 deletions sml/glm/glm_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,25 @@
import jax.numpy as jnp
import spu.spu_pb2 as spu_pb2
import spu.utils.simulation as spsim
from glm import _GeneralizedLinearRegressor, PoissonRegressor, GammaRegressor, TweedieRegressor
from glm import (
_GeneralizedLinearRegressor,
PoissonRegressor,
GammaRegressor,
TweedieRegressor,
)
import numpy as np
import scipy.stats as stats
from sklearn.linear_model._glm import _GeneralizedLinearRegressor as std__GeneralizedLinearRegressor
from sklearn.linear_model._glm import PoissonRegressor as std_PoissonRegressor
from sklearn.linear_model._glm import (
_GeneralizedLinearRegressor as std__GeneralizedLinearRegressor,
)
from sklearn.linear_model._glm import PoissonRegressor as std_PoissonRegressor
from sklearn.linear_model._glm import GammaRegressor as std_GammaRegressor
from sklearn.linear_model._glm import TweedieRegressor as std_TweedieRegressor

verbose = 0
n_samples, n_features = 100, 5


def generate_data(noise=False):
"""
Generate random data for testing.
Expand Down Expand Up @@ -41,49 +51,54 @@ def generate_data(noise=False):
sample_weight = np.random.rand(n_samples)
return X, y, coef, sample_weight


X, y, coef, sample_weight = generate_data()
exp_y = jnp.exp(y)
round_exp_y = jnp.round(exp_y)
sim = spsim.Simulator.simple(3, spu_pb2.ProtocolKind.ABY3, spu_pb2.FieldType.FM128)

def accuracy_test(model,std_model, y, coef, num=5):
"""
Test the fitting, prediction, and scoring functionality of the generalized linear regression model.
Parameters:
----------
model : object
Generalized linear regression model object.
X : array-like, shape (n_samples, n_features)
Feature data.
y : array-like, shape (n_samples,)
Target data.
coef : array-like, shape (n_features + 1,)
True coefficients, including the intercept term and feature weights.
num : int, optional (default=5)
Number of coefficients to display.
Returns:
-------
None
"""
model.fit(X, y, sample_weight)
std_model.fit(X,y,sample_weight)
norm_diff = jnp.linalg.norm(model.predict(X)[:num]-jnp.array(std_model.predict(X)[:num]))
if verbose:
print('True Coefficients:', coef[:num])
print("Fitted Coefficients:", model.coef_[:num])
print("std Fitted Coefficients:", std_model.coef_[:num])
print("D^2 Score:", model.score(X[:num], y[:num]))
print("X:", X[:num])
print("Samples:", y[:num])
print("Predictions:", model.predict(X[:num]))
print("std Predictions:", std_model.predict(X[:num]))
print("norm of predict between ours and std: %f" %norm_diff)
print("_________________________________")
print()
assert norm_diff < 1e-2

def accuracy_test(model, std_model, y, coef, num=5):
"""
Test the fitting, prediction, and scoring functionality of the generalized linear regression model.
Parameters:
----------
model : object
Generalized linear regression model object.
X : array-like, shape (n_samples, n_features)
Feature data.
y : array-like, shape (n_samples,)
Target data.
coef : array-like, shape (n_features + 1,)
True coefficients, including the intercept term and feature weights.
num : int, optional (default=5)
Number of coefficients to display.
Returns:
-------
None
"""
model.fit(X, y, sample_weight)
std_model.fit(X, y, sample_weight)
norm_diff = jnp.linalg.norm(
model.predict(X)[:num] - jnp.array(std_model.predict(X)[:num])
)
if verbose:
print('True Coefficients:', coef[:num])
print("Fitted Coefficients:", model.coef_[:num])
print("std Fitted Coefficients:", std_model.coef_[:num])
print("D^2 Score:", model.score(X[:num], y[:num]))
print("X:", X[:num])
print("Samples:", y[:num])
print("Predictions:", model.predict(X[:num]))
print("std Predictions:", std_model.predict(X[:num]))
print("norm of predict between ours and std: %f" % norm_diff)
print("_________________________________")
print()
assert norm_diff < 1e-2


def proc_test(proc):
"""
Expand Down Expand Up @@ -111,6 +126,7 @@ def proc_test(proc):
# Assert that the difference is within the tolerance
assert norm_diff < 1e-4


def proc_ncSolver():
"""
Fit Generalized Linear Regression model using Newton-Cholesky algorithm and return the model coefficients.
Expand All @@ -125,6 +141,7 @@ def proc_ncSolver():
model.fit(X, y)
return model.coef_


def proc_lbfgsSolver():
"""
Fit Generalized Linear Regression model using Newton-Cholesky algorithm and return the model coefficients.
Expand All @@ -139,6 +156,7 @@ def proc_lbfgsSolver():
model.fit(X, y)
return model.coef_


def proc_Poisson():
"""
Fit Generalized Linear Regression model using PoissonRegressor and return the model coefficients.
Expand All @@ -153,6 +171,7 @@ def proc_Poisson():
model.fit(X, round_exp_y)
return model.coef_


def proc_Gamma():
"""
Fit Generalized Linear Regression model using GammaRegressor and return the model coefficients.
Expand All @@ -167,6 +186,7 @@ def proc_Gamma():
model.fit(X, exp_y)
return model.coef_


def proc_Tweedie():
"""
Fit Generalized Linear Regression model using TweedieRegressor and return the model coefficients.
Expand Down Expand Up @@ -204,10 +224,10 @@ def test_gamma_accuracy(self):
accuracy_test(model, std_model, exp_y, coef)
print('test_gamma_accuracy: OK')

def test_Tweedie_accuracy(self,power=0):
def test_Tweedie_accuracy(self, power=0):
# Test the accuracy of the TweedieRegressor model
model = TweedieRegressor(power=power)
std_model = std_TweedieRegressor(alpha=0,power=power)
std_model = std_TweedieRegressor(alpha=0, power=power)
accuracy_test(model, std_model, exp_y, coef)
print('test_Tweedie_accuracy: OK')

Expand All @@ -231,11 +251,12 @@ def test_gamma_encrypted(self):
proc_test(proc_Gamma)
print('test_gamma_encrypted: OK')

def test_Tweedie_encrypted(self,power=0):
def test_Tweedie_encrypted(self, power=0):
# Test if the results of the TweedieRegressor model are correct after encryption
proc_test(proc_Tweedie)
print('test_Tweedie_encrypted: OK')


if __name__ == '__main__':
# Run the unit tests
unittest.main()
Loading

0 comments on commit 977e932

Please sign in to comment.