CLN: Remove fit_intercept in optimizers

Fixes dynamicslab#338 Initialize fit_intercept in super() as False Prevent the _tragic_ 😭 case of fitting both an intercept and a constant term, which could only otherwise be achieved with higher coupling and more tests. Users can specify a constant term in the feature libraries using include_bias, where it exists, or by adding a constant term to a custom library and concatenating. This change incurs a minor performance hit that disappears with scale, based upon how sklearn.LinearRegression fit the coefficient separately from the regression matrices.
jpcurbelo · Aug 7, 2023 · d8f5785 · d8f5785
1 parent 309da49
commit d8f5785
Show file tree

Hide file tree

Showing 13 changed files with 14 additions and 82 deletions.
diff --git a/pysindy/optimizers/base.py b/pysindy/optimizers/base.py
@@ -46,10 +46,6 @@ class BaseOptimizer(LinearRegression, ComplexityMixin):
 
     Parameters
     ----------
-    fit_intercept : boolean, optional (default False)
-        Whether to calculate the intercept for this model. If set to false, no
-        intercept will be used in calculations.
-
     normalize_columns : boolean, optional (default False)
         Normalize the columns of x (the SINDy library terms) before regression
         by dividing by the L2-norm. Note that the 'normalize' option in sklearn
@@ -97,12 +93,11 @@ def __init__(
         self,
         max_iter=20,
         normalize_columns=False,
-        fit_intercept=False,
         initial_guess=None,
         copy_X=True,
         unbias: bool = True,
     ):
-        super().__init__(fit_intercept=fit_intercept, copy_X=copy_X)
+        super().__init__(fit_intercept=False, copy_X=copy_X)
 
         if max_iter <= 0:
             raise ValueError("max_iter must be positive")
@@ -157,7 +152,7 @@ def fit(self, x_, y, sample_weight=None, **reduce_kws):
         x, y, X_offset, y_offset, X_scale = _preprocess_data(
             x_,
             y,
-            fit_intercept=self.fit_intercept,
+            fit_intercept=False,
             copy=self.copy_X,
             sample_weight=sample_weight,
         )
@@ -212,14 +207,10 @@ def fit(self, x_, y, sample_weight=None, **reduce_kws):
 
     def _unbias(self, x, y):
         coef = np.zeros((y.shape[1], x.shape[1]))
-        if hasattr(self, "fit_intercept"):
-            fit_intercept = self.fit_intercept
-        else:
-            fit_intercept = False
         for i in range(self.ind_.shape[0]):
             if np.any(self.ind_[i]):
                 coef[i, self.ind_[i]] = (
-                    LinearRegression(fit_intercept=fit_intercept)
+                    LinearRegression(fit_intercept=False)
                     .fit(x[:, self.ind_[i]], y[:, i])
                     .coef_
                 )
@@ -303,7 +294,6 @@ def __init__(
 
         super().__init__(
             max_iter=opt.max_iter,
-            fit_intercept=opt.fit_intercept,
             initial_guess=opt.initial_guess,
             copy_X=opt.copy_X,
         )

diff --git a/pysindy/optimizers/constrained_sr3.py b/pysindy/optimizers/constrained_sr3.py
@@ -66,10 +66,6 @@ class ConstrainedSR3(SR3):
     max_iter : int, optional (default 30)
         Maximum iterations of the optimization algorithm.
 
-    fit_intercept : boolean, optional (default False)
-        Whether to calculate the intercept for this model. If set to false, no
-        intercept will be used in calculations.
-
     constraint_lhs : numpy ndarray, optional (default None)
         Shape should be (n_constraints, n_features * n_targets),
         The left hand side matrix C of Cw <= d.
@@ -157,7 +153,6 @@ def __init__(
         constraint_rhs=None,
         constraint_order="target",
         normalize_columns=False,
-        fit_intercept=False,
         copy_X=True,
         initial_guess=None,
         thresholds=None,
@@ -178,7 +173,6 @@ def __init__(
             trimming_step_size=trimming_step_size,
             max_iter=max_iter,
             initial_guess=initial_guess,
-            fit_intercept=fit_intercept,
             copy_X=copy_X,
             normalize_columns=normalize_columns,
             verbose=verbose,

diff --git a/pysindy/optimizers/frols.py b/pysindy/optimizers/frols.py
@@ -22,10 +22,6 @@ class FROLS(BaseOptimizer):
 
     Parameters
     ----------
-    fit_intercept : boolean, optional (default False)
-        Whether to calculate the intercept for this model. If set to false, no
-        intercept will be used in calculations.
-
     normalize_columns : boolean, optional (default False)
         Normalize the columns of x (the SINDy library terms) before regression
         by dividing by the L2-norm. Note that the 'normalize' option in sklearn
@@ -84,7 +80,6 @@ class FROLS(BaseOptimizer):
     def __init__(
         self,
         normalize_columns=False,
-        fit_intercept=False,
         copy_X=True,
         kappa=None,
         max_iter=10,
@@ -94,7 +89,6 @@ def __init__(
         unbias=True,
     ):
         super().__init__(
-            fit_intercept=fit_intercept,
             copy_X=copy_X,
             max_iter=max_iter,
             normalize_columns=normalize_columns,

diff --git a/pysindy/optimizers/miosr.py b/pysindy/optimizers/miosr.py
@@ -53,10 +53,6 @@ class MIOSR(BaseOptimizer):
         optimality (either per dimension or jointly depending on the
         above sparsity settings).
 
-    fit_intercept : boolean, optional (default False)
-        Whether to calculate the intercept for this model. If set to false, no
-        intercept will be used in calculations.
-
     constraint_lhs : numpy ndarray, optional (default None)
         Shape should be (n_constraints, n_features * n_targets),
         The left hand side matrix C of Cw <= d.
@@ -115,7 +111,6 @@ def __init__(
         group_sparsity=None,
         alpha=0.01,
         regression_timeout=10,
-        fit_intercept=False,
         constraint_lhs=None,
         constraint_rhs=None,
         constraint_order="target",
@@ -127,7 +122,6 @@ def __init__(
     ):
         super().__init__(
             normalize_columns=normalize_columns,
-            fit_intercept=fit_intercept,
             copy_X=copy_X,
             unbias=unbias,
         )

diff --git a/pysindy/optimizers/sindy_pi.py b/pysindy/optimizers/sindy_pi.py
@@ -46,10 +46,6 @@ class SINDyPI(SR3):
     max_iter : int, optional (default 10000)
         Maximum iterations of the optimization algorithm.
 
-    fit_intercept : boolean, optional (default False)
-        Whether to calculate the intercept for this model. If set to false, no
-        intercept will be used in calculations.
-
     normalize_columns : boolean, optional (default False)
         This parameter normalizes the columns of Theta before the
         optimization is done. This tends to standardize the columns
@@ -97,7 +93,6 @@ def __init__(
         tol=1e-5,
         thresholder="l1",
         max_iter=10000,
-        fit_intercept=False,
         copy_X=True,
         thresholds=None,
         model_subset=None,
@@ -111,7 +106,6 @@ def __init__(
             tol=tol,
             thresholder=thresholder,
             max_iter=max_iter,
-            fit_intercept=fit_intercept,
             copy_X=copy_X,
             normalize_columns=normalize_columns,
             unbias=unbias,

diff --git a/pysindy/optimizers/sr3.py b/pysindy/optimizers/sr3.py
@@ -74,10 +74,6 @@ class SR3(BaseOptimizer):
         Initial guess for coefficients ``coef_``.
         If None, least-squares is used to obtain an initial guess.
 
-    fit_intercept : boolean, optional (default False)
-        Whether to calculate the intercept for this model. If set to false, no
-        intercept will be used in calculations.
-
     normalize_columns : boolean, optional (default False)
         Normalize the columns of x (the SINDy library terms) before regression
         by dividing by the L2-norm. Note that the 'normalize' option in sklearn
@@ -151,7 +147,6 @@ def __init__(
         trimming_fraction=0.0,
         trimming_step_size=1.0,
         max_iter=30,
-        fit_intercept=False,
         copy_X=True,
         initial_guess=None,
         normalize_columns=False,
@@ -161,7 +156,6 @@ def __init__(
         super(SR3, self).__init__(
             max_iter=max_iter,
             initial_guess=initial_guess,
-            fit_intercept=fit_intercept,
             copy_X=copy_X,
             normalize_columns=normalize_columns,
             unbias=unbias,

diff --git a/pysindy/optimizers/ssr.py b/pysindy/optimizers/ssr.py
@@ -23,10 +23,6 @@ class SSR(BaseOptimizer):
     max_iter : int, optional (default 20)
         Maximum iterations of the optimization algorithm.
 
-    fit_intercept : boolean, optional (default False)
-        Whether to calculate the intercept for this model. If set to false, no
-        intercept will be used in calculations.
-
     normalize_columns : boolean, optional (default False)
         Normalize the columns of x (the SINDy library terms) before regression
         by dividing by the L2-norm. Note that the 'normalize' option in sklearn
@@ -94,7 +90,6 @@ def __init__(
         max_iter=20,
         ridge_kw=None,
         normalize_columns=False,
-        fit_intercept=False,
         copy_X=True,
         criteria="coefficient_value",
         kappa=None,
@@ -103,7 +98,6 @@ def __init__(
     ):
         super(SSR, self).__init__(
             max_iter=max_iter,
-            fit_intercept=fit_intercept,
             copy_X=copy_X,
             normalize_columns=normalize_columns,
             unbias=unbias,

diff --git a/pysindy/optimizers/stable_linear_sr3.py b/pysindy/optimizers/stable_linear_sr3.py
@@ -67,10 +67,6 @@ class StableLinearSR3(ConstrainedSR3):
     max_iter : int, optional (default 30)
         Maximum iterations of the optimization algorithm.
 
-    fit_intercept : boolean, optional (default False)
-        Whether to calculate the intercept for this model. If set to false, no
-        intercept will be used in calculations.
-
     constraint_lhs : numpy ndarray, optional (default None)
         Shape should be (n_constraints, n_features * n_targets),
         The left hand side matrix C of Cw <= d.
@@ -155,7 +151,6 @@ def __init__(
         constraint_rhs=None,
         constraint_order="target",
         normalize_columns=False,
-        fit_intercept=False,
         copy_X=True,
         initial_guess=None,
         thresholds=None,
@@ -177,7 +172,6 @@ def __init__(
             trimming_step_size=trimming_step_size,
             max_iter=max_iter,
             initial_guess=initial_guess,
-            fit_intercept=fit_intercept,
             copy_X=copy_X,
             normalize_columns=normalize_columns,
             verbose=verbose,

diff --git a/pysindy/optimizers/stlsq.py b/pysindy/optimizers/stlsq.py
@@ -42,10 +42,6 @@ class STLSQ(BaseOptimizer):
     ridge_kw : dict, optional (default None)
         Optional keyword arguments to pass to the ridge regression.
 
-    fit_intercept : boolean, optional (default False)
-        Whether to calculate the intercept for this model. If set to false, no
-        intercept will be used in calculations.
-
     normalize_columns : boolean, optional (default False)
         Normalize the columns of x (the SINDy library terms) before regression
         by dividing by the L2-norm. Note that the 'normalize' option in sklearn
@@ -103,15 +99,13 @@ def __init__(
         max_iter=20,
         ridge_kw=None,
         normalize_columns=False,
-        fit_intercept=False,
         copy_X=True,
         initial_guess=None,
         verbose=False,
         unbias=True,
     ):
         super().__init__(
             max_iter=max_iter,
-            fit_intercept=fit_intercept,
             copy_X=copy_X,
             normalize_columns=normalize_columns,
             unbias=unbias,

diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
@@ -133,10 +133,6 @@ class TrappingSR3(SR3):
         Initial guess for vector A in the optimization. Otherwise
         A is initialized as A = diag(gamma).
 
-    fit_intercept : boolean, optional (default False)
-        Whether to calculate the intercept for this model. If set to false, no
-        intercept will be used in calculations.
-
     copy_X : boolean, optional (default True)
         If True, X will be copied; else, it may be overwritten.
 
@@ -239,7 +235,6 @@ def __init__(
         max_iter=30,
         accel=False,
         normalize_columns=False,
-        fit_intercept=False,
         copy_X=True,
         m0=None,
         A0=None,
@@ -255,7 +250,6 @@ def __init__(
             threshold=threshold,
             max_iter=max_iter,
             normalize_columns=normalize_columns,
-            fit_intercept=fit_intercept,
             copy_X=copy_X,
             thresholder=thresholder,
             thresholds=thresholds,

diff --git a/pysindy/optimizers/wrapped_optimizer.py b/pysindy/optimizers/wrapped_optimizer.py
@@ -21,10 +21,11 @@ class WrappedOptimizer(BaseOptimizer):
     ----------
     optimizer: estimator object
         The optimizer/sparse regressor to be wrapped, implementing ``fit`` and
-        ``predict``. ``optimizer`` should also have the attributes ``coef_``,
-        ``fit_intercept``, and ``intercept_``. Note that attribute
-        ``normalize`` is deprecated as of sklearn versions >= 1.0 and will be
-        removed in future versions.
+        ``predict``. ``optimizer`` should also have the attribute ``coef_``.
+        Any optimizer that supports a ``fit_intercept`` argument should
+        be initialized to False.  Note that attribute ``normalize`` is
+        deprecated as of sklearn versions >= 1.0 and will be removed in
+        future versions.
 
     """
 
@@ -43,10 +44,7 @@ def _reduce(self, x, y):
         ]
         self.coef_ = np.concatenate(coef_list, axis=0)
         self.ind_ = np.abs(self.coef_) > COEF_THRESHOLD
-        if hasattr(self.optimizer, "intercept_"):
-            self.intercept_ = self.optimizer.intercept_
-        else:
-            self.intercept_ = 0.0
+        self.intercept_ = 0.0
         return self
 
     def predict(self, x):

diff --git a/test/test_optimizers.py b/test/test_optimizers.py
@@ -138,9 +138,7 @@ def test_complexity_not_fitted(optimizer, data_derivative_2d):
     assert optimizer.complexity > 0
 
 
-@pytest.mark.parametrize(
-    "kwargs", [{"normalize_columns": True}, {"fit_intercept": True}, {"copy_X": False}]
-)
+@pytest.mark.parametrize("kwargs", [{"normalize_columns": True}, {"copy_X": False}])
 def test_alternate_parameters(data_derivative_1d, kwargs):
     x, x_dot = data_derivative_1d
     x = x.reshape(-1, 1)

diff --git a/test/test_optimizers_complexity.py b/test/test_optimizers_complexity.py
@@ -49,12 +49,12 @@ def test_complexity(n_samples, n_features, n_informative, random_state):
         )
         y = y.reshape(-1, 1)
 
-        opt_kwargs = dict(fit_intercept=True)
+        opt_kwargs = dict(fit_intercept=False)
         optimizers = [
-            SR3(thresholder="l0", threshold=0.1, **opt_kwargs),
-            SR3(thresholder="l1", threshold=0.1, **opt_kwargs),
+            SR3(thresholder="l0", threshold=0.1),
+            SR3(thresholder="l1", threshold=0.1),
             Lasso(**opt_kwargs),
-            STLSQ(**opt_kwargs),
+            STLSQ(),
             ElasticNet(**opt_kwargs),
             Ridge(**opt_kwargs),
             LinearRegression(**opt_kwargs),