Resolve format issue (#352)

# Pull Request ## What problem does this PR solve? Issue Number: Fixed # ## Possible side effects? - Performance: - Backward compatibility:
secretflow · Sep 18, 2023 · 977e932 · 977e932
1 parent 7b62f3a
commit 977e932
Show file tree

Hide file tree

Showing 7 changed files with 225 additions and 133 deletions.
diff --git a/sml/glm/glm.py b/sml/glm/glm.py
@@ -6,20 +6,23 @@
 from utils.link import *
 import warnings
 import os
+
 DEBUG = 0
 
+
 # Define the _GeneralizedLinearRegressor class using JAX
 class _GeneralizedLinearRegressor:
-    def __init__(self,
-                 fit_intercept=True,  # Whether to fit the intercept term, default is True
-                 alpha=0,  # L2 regularization strength, default is 0 (no regularization)
-                 solver="newton-cholesky",  # Optimization algorithm, default is Newton-Cholesky
-                 max_iter=20,  # Maximum number of iterations, default is 20
-                 warm_start=False,  # Whether to use warm start, default is False
-                 n_threads=None,  # Deprecated parameter (no longer used)
-                 tol=None,  # Deprecated parameter (no longer used)
-                 verbose=0  # Level of verbosity, default is 0 (no output)
-                 ):
+    def __init__(
+        self,
+        fit_intercept=True,  # Whether to fit the intercept term, default is True
+        alpha=0,  # L2 regularization strength, default is 0 (no regularization)
+        solver="newton-cholesky",  # Optimization algorithm, default is Newton-Cholesky
+        max_iter=20,  # Maximum number of iterations, default is 20
+        warm_start=False,  # Whether to use warm start, default is False
+        n_threads=None,  # Deprecated parameter (no longer used)
+        tol=None,  # Deprecated parameter (no longer used)
+        verbose=0,  # Level of verbosity, default is 0 (no output)
+    ):
         """
         Initialize the generalized linear regression model.
 
@@ -51,11 +54,19 @@ def __init__(self,
         self.warm_start = warm_start
         self.verbose = verbose
         if n_threads:
-            warnings.warn("SPU does not need n_threads.", category=DeprecationWarning, stacklevel=2)
+            warnings.warn(
+                "SPU does not need n_threads.",
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
         if warm_start:
             warnings.warn("Using minibatch in the second optimizer may cause problems.")
         if tol:
-            warnings.warn("SPU does not support early stop.", category=DeprecationWarning, stacklevel=2)
+            warnings.warn(
+                "SPU does not support early stop.",
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
 
     def fit(self, X, y, sample_weight=None):
         if sample_weight is None:
@@ -70,7 +81,10 @@ def fit(self, X, y, sample_weight=None):
         if not self.warm_start or not hasattr(self, "coef_"):
             self.coef_ = None
         if self.solver == "lbfgs":
-            warnings.warn("LBFGS algorithm cannot be accurately implemented on SPU platform, only approximate implementation is available.", UserWarning)
+            warnings.warn(
+                "LBFGS algorithm cannot be accurately implemented on SPU platform, only approximate implementation is available.",
+                UserWarning,
+            )
             self._fit_lbfgs(X, y)
         elif self.solver == "newton-cholesky":
             self._fit_newton_cholesky(X, y)
@@ -85,22 +99,26 @@ def _get_link(self):
 
     def _fit_newton_cholesky(self, X, y):
         # Use the NewtonCholeskySolver class to implement the Newton-Cholesky optimization algorithm
-        solver = NewtonCholeskySolver(loss_model=self.loss_model,
-                                      l2_reg_strength=self.l2_reg_strength,
-                                      max_iter=self.max_iter,
-                                      verbose=self.verbose,
-                                      link=self.link_model,
-                                      coef=self.coef_)
+        solver = NewtonCholeskySolver(
+            loss_model=self.loss_model,
+            l2_reg_strength=self.l2_reg_strength,
+            max_iter=self.max_iter,
+            verbose=self.verbose,
+            link=self.link_model,
+            coef=self.coef_,
+        )
         self.coef_ = solver.solve(X, y)
 
     def _fit_lbfgs(self, X, y):
         # Use the LBFGSSolver class to implement the Newton-Cholesky optimization algorithm
-        solver = LBFGSSolver(loss_model=self.loss_model,
-                             max_iter=self.max_iter,
-                             l2_reg_strength=self.l2_reg_strength,
-                             verbose=self.verbose,
-                             link=self.link_model,
-                             coef=self.coef_)
+        solver = LBFGSSolver(
+            loss_model=self.loss_model,
+            max_iter=self.max_iter,
+            l2_reg_strength=self.l2_reg_strength,
+            verbose=self.verbose,
+            link=self.link_model,
+            coef=self.coef_,
+        )
         self.coef_ = solver.solve(X, y)
 
     def predict(self, X):
@@ -117,20 +135,22 @@ def score(self, X, y, sample_weight=None):
 
         # Calculate the model's predictions
         prediction = self.predict(X)
-        squared_error = lambda y_true, prediction: jnp.mean(
-            (y_true - prediction)**2)
+        squared_error = lambda y_true, prediction: jnp.mean((y_true - prediction) ** 2)
         # Calculate the model's deviance
         deviance = squared_error(y_true=y, prediction=prediction)
         # Calculate the null deviance
-        deviance_null = squared_error(y_true=y,
-                                      prediction=jnp.tile(
-                                          jnp.average(y), y.shape[0]))
+        deviance_null = squared_error(
+            y_true=y, prediction=jnp.tile(jnp.average(y), y.shape[0])
+        )
         # Calculate D^2
         d2 = 1 - (deviance) / (deviance_null)
         return d2
 
     def _check_solver_support(self):
-        supported_solvers = ["lbfgs", "newton-cholesky"]  # List of supported optimization algorithms
+        supported_solvers = [
+            "lbfgs",
+            "newton-cholesky",
+        ]  # List of supported optimization algorithms
         if self.solver not in supported_solvers:
             raise ValueError(
                 f"Invalid solver={self.solver}. Supported solvers are {supported_solvers}."
@@ -143,6 +163,7 @@ class PoissonRegressor(_GeneralizedLinearRegressor):
 
     This regressor uses the 'log' link function.
     """
+
     def _get_loss(self):
         return HalfPoissonLoss()
 
@@ -158,6 +179,7 @@ def _get_loss(self):
     def _get_link(self):
         return LogLink()
 
+
 # The TweedieRegressor class represents a generalized linear model with Tweedie distribution using JAX.
 class TweedieRegressor(_GeneralizedLinearRegressor):
     def __init__(
@@ -166,11 +188,13 @@ def __init__(
     ):
         super().__init__()
         # Ensure that the power is within the valid range for the Tweedie distribution
-        assert(power>=0 and power<=3)
+        assert power >= 0 and power <= 3
         self.power = power
 
     def _get_loss(self):
-        return HalfTweedieLoss(self.power, )
+        return HalfTweedieLoss(
+            self.power,
+        )
 
     def _get_link(self):
         if self.power > 0:

diff --git a/sml/glm/glm_emul.py b/sml/glm/glm_emul.py
@@ -7,9 +7,16 @@
 sys.path.append('../../')
 import sml.utils.emulation as emulation
 import spu.utils.distributed as ppd
-from glm import _GeneralizedLinearRegressor, PoissonRegressor, GammaRegressor, TweedieRegressor
+from glm import (
+    _GeneralizedLinearRegressor,
+    PoissonRegressor,
+    GammaRegressor,
+    TweedieRegressor,
+)
 
 n_samples, n_features = 100, 5
+
+
 def generate_data(noise=False):
     """
     Generate random data for testing.
@@ -39,8 +46,10 @@ def generate_data(noise=False):
     sample_weight = np.random.rand(n_samples)
     return X, y, coef, sample_weight
 
+
 X, y, coef, sample_weight = generate_data()
 
+
 def emul_SGDClassifier(mode: emulation.Mode.MULTIPROCESS, num=10):
     """
     Execute the encrypted SGD classifier in a simulation environment and output the results.
@@ -85,9 +94,7 @@ def proc_ncSolver(X, y):
         # Specify the file paths for cluster and dataset
         CLUSTER_ABY3_3PC = os.path.join('../../', emulation.CLUSTER_ABY3_3PC)
         # Create the emulator with specified mode and bandwidth/latency settings
-        emulator = emulation.Emulator(
-            CLUSTER_ABY3_3PC, mode, bandwidth=300, latency=20
-        )
+        emulator = emulation.Emulator(CLUSTER_ABY3_3PC, mode, bandwidth=300, latency=20)
         emulator.up()
 
         # Run the proc_ncSolver function using both plaintext and encrypted data

diff --git a/sml/glm/glm_test.py b/sml/glm/glm_test.py
@@ -3,15 +3,25 @@
 import jax.numpy as jnp
 import spu.spu_pb2 as spu_pb2
 import spu.utils.simulation as spsim
-from glm import _GeneralizedLinearRegressor, PoissonRegressor, GammaRegressor, TweedieRegressor
+from glm import (
+    _GeneralizedLinearRegressor,
+    PoissonRegressor,
+    GammaRegressor,
+    TweedieRegressor,
+)
 import numpy as np
 import scipy.stats as stats
-from sklearn.linear_model._glm import _GeneralizedLinearRegressor as std__GeneralizedLinearRegressor
-from sklearn.linear_model._glm import PoissonRegressor  as std_PoissonRegressor
+from sklearn.linear_model._glm import (
+    _GeneralizedLinearRegressor as std__GeneralizedLinearRegressor,
+)
+from sklearn.linear_model._glm import PoissonRegressor as std_PoissonRegressor
 from sklearn.linear_model._glm import GammaRegressor as std_GammaRegressor
 from sklearn.linear_model._glm import TweedieRegressor as std_TweedieRegressor
+
 verbose = 0
 n_samples, n_features = 100, 5
+
+
 def generate_data(noise=False):
     """
     Generate random data for testing.
@@ -41,49 +51,54 @@ def generate_data(noise=False):
     sample_weight = np.random.rand(n_samples)
     return X, y, coef, sample_weight
 
+
 X, y, coef, sample_weight = generate_data()
 exp_y = jnp.exp(y)
 round_exp_y = jnp.round(exp_y)
 sim = spsim.Simulator.simple(3, spu_pb2.ProtocolKind.ABY3, spu_pb2.FieldType.FM128)
 
-def accuracy_test(model,std_model, y, coef, num=5):
-        """
-        Test the fitting, prediction, and scoring functionality of the generalized linear regression model.
-
-        Parameters:
-        ----------
-        model : object
-            Generalized linear regression model object.
-        X : array-like, shape (n_samples, n_features)
-            Feature data.
-        y : array-like, shape (n_samples,)
-            Target data.
-        coef : array-like, shape (n_features + 1,)
-            True coefficients, including the intercept term and feature weights.
-        num : int, optional (default=5)
-            Number of coefficients to display.
-
-        Returns:
-        -------
-        None
-
-        """
-        model.fit(X, y, sample_weight)
-        std_model.fit(X,y,sample_weight)
-        norm_diff = jnp.linalg.norm(model.predict(X)[:num]-jnp.array(std_model.predict(X)[:num]))
-        if verbose:
-            print('True Coefficients:', coef[:num])
-            print("Fitted Coefficients:", model.coef_[:num])
-            print("std Fitted Coefficients:", std_model.coef_[:num])
-            print("D^2 Score:", model.score(X[:num], y[:num]))
-            print("X:", X[:num])
-            print("Samples:", y[:num])
-            print("Predictions:", model.predict(X[:num]))
-            print("std Predictions:", std_model.predict(X[:num]))
-            print("norm of predict between ours and std: %f" %norm_diff)
-            print("_________________________________")
-            print()
-        assert norm_diff < 1e-2
+
+def accuracy_test(model, std_model, y, coef, num=5):
+    """
+    Test the fitting, prediction, and scoring functionality of the generalized linear regression model.
+
+    Parameters:
+    ----------
+    model : object
+        Generalized linear regression model object.
+    X : array-like, shape (n_samples, n_features)
+        Feature data.
+    y : array-like, shape (n_samples,)
+        Target data.
+    coef : array-like, shape (n_features + 1,)
+        True coefficients, including the intercept term and feature weights.
+    num : int, optional (default=5)
+        Number of coefficients to display.
+
+    Returns:
+    -------
+    None
+
+    """
+    model.fit(X, y, sample_weight)
+    std_model.fit(X, y, sample_weight)
+    norm_diff = jnp.linalg.norm(
+        model.predict(X)[:num] - jnp.array(std_model.predict(X)[:num])
+    )
+    if verbose:
+        print('True Coefficients:', coef[:num])
+        print("Fitted Coefficients:", model.coef_[:num])
+        print("std Fitted Coefficients:", std_model.coef_[:num])
+        print("D^2 Score:", model.score(X[:num], y[:num]))
+        print("X:", X[:num])
+        print("Samples:", y[:num])
+        print("Predictions:", model.predict(X[:num]))
+        print("std Predictions:", std_model.predict(X[:num]))
+        print("norm of predict between ours and std: %f" % norm_diff)
+        print("_________________________________")
+        print()
+    assert norm_diff < 1e-2
+
 
 def proc_test(proc):
     """
@@ -111,6 +126,7 @@ def proc_test(proc):
     # Assert that the difference is within the tolerance
     assert norm_diff < 1e-4
 
+
 def proc_ncSolver():
     """
     Fit Generalized Linear Regression model using Newton-Cholesky algorithm and return the model coefficients.
@@ -125,6 +141,7 @@ def proc_ncSolver():
     model.fit(X, y)
     return model.coef_
 
+
 def proc_lbfgsSolver():
     """
     Fit Generalized Linear Regression model using Newton-Cholesky algorithm and return the model coefficients.
@@ -139,6 +156,7 @@ def proc_lbfgsSolver():
     model.fit(X, y)
     return model.coef_
 
+
 def proc_Poisson():
     """
     Fit Generalized Linear Regression model using PoissonRegressor and return the model coefficients.
@@ -153,6 +171,7 @@ def proc_Poisson():
     model.fit(X, round_exp_y)
     return model.coef_
 
+
 def proc_Gamma():
     """
     Fit Generalized Linear Regression model using GammaRegressor and return the model coefficients.
@@ -167,6 +186,7 @@ def proc_Gamma():
     model.fit(X, exp_y)
     return model.coef_
 
+
 def proc_Tweedie():
     """
     Fit Generalized Linear Regression model using TweedieRegressor and return the model coefficients.
@@ -204,10 +224,10 @@ def test_gamma_accuracy(self):
         accuracy_test(model, std_model, exp_y, coef)
         print('test_gamma_accuracy: OK')
 
-    def test_Tweedie_accuracy(self,power=0):
+    def test_Tweedie_accuracy(self, power=0):
         # Test the accuracy of the TweedieRegressor model
         model = TweedieRegressor(power=power)
-        std_model = std_TweedieRegressor(alpha=0,power=power)
+        std_model = std_TweedieRegressor(alpha=0, power=power)
         accuracy_test(model, std_model, exp_y, coef)
         print('test_Tweedie_accuracy: OK')
 
@@ -231,11 +251,12 @@ def test_gamma_encrypted(self):
         proc_test(proc_Gamma)
         print('test_gamma_encrypted: OK')
 
-    def test_Tweedie_encrypted(self,power=0):
+    def test_Tweedie_encrypted(self, power=0):
         # Test if the results of the TweedieRegressor model are correct after encryption
         proc_test(proc_Tweedie)
         print('test_Tweedie_encrypted: OK')
 
+
 if __name__ == '__main__':
     # Run the unit tests
     unittest.main()