y_required flag

koaning · Nov 20, 2024 · e555a6c · e555a6c
1 parent fe730cf
commit e555a6c
Show file tree

Hide file tree

Showing 16 changed files with 23 additions and 20 deletions.
diff --git a/sklego/dummy.py b/sklego/dummy.py
@@ -72,7 +72,7 @@ def fit(self, X: np.array, y: np.array) -> "RandomRegressor":
         """
         if self.strategy not in self._ALLOWED_STRATEGIES:
             raise ValueError(f"strategy {self.strategy} is not in {self._ALLOWED_STRATEGIES}")
-        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
+        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True)
         self.n_features_in_ = X.shape[1]
 
         self.min_ = np.min(y)

diff --git a/sklego/feature_selection/mrmr.py b/sklego/feature_selection/mrmr.py
@@ -203,7 +203,7 @@ def fit(self, X, y):
 
                 k parameter is not integer type or is < n_features_in (X.shape[1]) or < 1
         """
-        X, y = validate_data(self, X, y, dtype="numeric", y_numeric=True)
+        X, y = validate_data(self, X, y, dtype="numeric", y_numeric=True, y_required=True)
 
         self._y_dtype = y.dtype
 

diff --git a/sklego/linear_model.py b/sklego/linear_model.py
@@ -98,7 +98,7 @@ def fit(self, X, y):
             - If `span` is not between 0 and 1.
             - If `sigma` is negative.
         """
-        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
+        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True)
         if self.span is not None:
             if not 0 <= self.span <= 1:
                 raise ValueError(f"Param `span` must be 0 <= span <= 1, got: {self.span}")
@@ -225,7 +225,7 @@ def fit(self, X, y):
         self : ProbWeightRegression
             The fitted estimator.
         """
-        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
+        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True)
 
         # Construct the problem.
         betas = cp.Variable(X.shape[1])
@@ -373,7 +373,7 @@ def fit(self, X, y):
         ValueError
             If `effect` is not one of "linear", "quadratic" or "constant".
         """
-        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
+        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True)
         if self.effect not in self._ALLOWED_EFFECTS:
             raise ValueError(f"effect {self.effect} must be in {self._ALLOWED_EFFECTS}")
 
@@ -1054,7 +1054,7 @@ def _prepare_inputs(self, X, sample_weight, y):
         This method is called by `fit` to prepare the inputs for the optimization problem. It adds an intercept column
         to `X` if `fit_intercept=True`, and returns the loss function and its gradient.
         """
-        X, y = validate_data(self, X, y, y_numeric=True)
+        X, y = validate_data(self, X, y, y_numeric=True, y_required=True)
         sample_weight = _check_sample_weight(sample_weight, X)
         self.n_features_in_ = X.shape[1]
 

diff --git a/sklego/meta/confusion_balancer.py b/sklego/meta/confusion_balancer.py
@@ -64,7 +64,7 @@ def fit(self, X, y):
             If the underlying estimator does not have a `predict_proba` method.
         """
 
-        X, y = validate_data(self.estimator, X, y, dtype=FLOAT_DTYPES)
+        X, y = validate_data(self.estimator, X, y, dtype=FLOAT_DTYPES, y_required=True)
         if not isinstance(self.estimator, ProbabilisticClassifier):
             raise ValueError(
                 "The ConfusionBalancer meta model only works on classification models with .predict_proba."

diff --git a/sklego/meta/decay_estimator.py b/sklego/meta/decay_estimator.py
@@ -126,7 +126,7 @@ def fit(self, X, y):
         """
 
         if self.check_input:
-            X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, ensure_min_features=0)
+            X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, ensure_min_features=0, y_required=True)
 
         if self.decay_func in self._ALLOWED_DECAYS.keys():
             self.decay_func_ = self._ALLOWED_DECAYS[self.decay_func]

diff --git a/sklego/meta/estimator_transformer.py b/sklego/meta/estimator_transformer.py
@@ -54,7 +54,7 @@ def fit(self, X, y, **kwargs):
         """
 
         if self.check_input:
-            X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, multi_output=True)
+            X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, multi_output=True, y_required=True)
 
         self.multi_output_ = len(y.shape) > 1
         self.estimator_ = clone(self.estimator)

diff --git a/sklego/meta/ordinal_classification.py b/sklego/meta/ordinal_classification.py
@@ -131,7 +131,7 @@ def fit(self, X, y):
         if not hasattr(self.estimator, "predict_proba"):
             raise ValueError("The estimator must implement `.predict_proba()` method.")
 
-        X, y = validate_data(self, X, y, ensure_min_samples=2, ensure_2d=True)
+        X, y = validate_data(self, X, y, ensure_min_samples=2, ensure_2d=True, y_required=True)
 
         self.classes_ = np.sort(np.unique(y))
         self.n_features_in_ = X.shape[1]

diff --git a/sklego/meta/outlier_classifier.py b/sklego/meta/outlier_classifier.py
@@ -88,7 +88,10 @@ def fit(self, X, y=None):
                 f"Passed model {self.model} does not have a `decision_function` "
                 f"method. This is required for `predict_proba` estimation."
             )
-        X, y = validate_data(self, X, y)
+        if y is not None:
+            X, y = validate_data(self, X, y)
+        else:
+            X = validate_data(self, X)
         self.estimator_ = clone(self.model).fit(X, y)
         self.n_features_in_ = self.estimator_.n_features_in_
         self.classes_ = np.array([0, 1])

diff --git a/sklego/meta/subjective_classifier.py b/sklego/meta/subjective_classifier.py
@@ -111,7 +111,7 @@ def fit(self, X, y):
         if self.evidence not in self._ALLOWED_EVIDENCE:
             raise ValueError(f"Invalid evidence: the provided evidence should be one of {self._ALLOWED_EVIDENCE}")
 
-        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
+        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True)
         if set(y) - set(self.prior.keys()):
             raise ValueError(
                 f"Training data is inconsistent with prior: no prior defined for classes "

diff --git a/sklego/meta/zero_inflated_regressor.py b/sklego/meta/zero_inflated_regressor.py
@@ -92,7 +92,7 @@ def fit(self, X, y, sample_weight=None):
         ValueError
             If `classifier` is not a classifier or `regressor` is not a regressor.
         """
-        X, y = validate_data(self, X, y)
+        X, y = validate_data(self, X, y, y_required=True)
 
         self.n_features_in_ = X.shape[1]
 

diff --git a/sklego/mixture/bayesian_gmm_classifier.py b/sklego/mixture/bayesian_gmm_classifier.py
@@ -78,7 +78,7 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> "BayesianGMMClassifier":
         self : BayesianGMMClassifier
             The fitted estimator.
         """
-        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
+        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True)
         if X.ndim == 1:
             X = np.expand_dims(X, 1)
 

diff --git a/sklego/mixture/gmm_classifier.py b/sklego/mixture/gmm_classifier.py
@@ -73,7 +73,7 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> "GMMClassifier":
         self : GMMClassifier
             The fitted estimator.
         """
-        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
+        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True)
         if X.ndim == 1:
             X = np.expand_dims(X, 1)
 

diff --git a/sklego/naive_bayes.py b/sklego/naive_bayes.py
@@ -74,7 +74,7 @@ def fit(self, X, y) -> "GaussianMixtureNB":
         self : GaussianMixtureNB
             The fitted estimator.
         """
-        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
+        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True)
         if X.ndim == 1:
             X = np.expand_dims(X, 1)
 
@@ -239,7 +239,7 @@ def fit(self, X, y) -> "BayesianGaussianMixtureNB":
         self : BayesianGaussianMixtureNB
             The fitted estimator.
         """
-        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
+        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True)
 
         if X.ndim == 1:
             X = np.expand_dims(X, 1)

diff --git a/sklego/neighbors.py b/sklego/neighbors.py
@@ -63,7 +63,7 @@ def fit(self, X: np.ndarray, y: np.ndarray):
         self : BayesianKernelDensityClassifier
             The fitted estimator.
         """
-        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
+        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True)
 
         self.classes_ = unique_labels(y)
         self.models_, self.priors_logp_ = {}, {}

diff --git a/sklego/preprocessing/intervalencoder.py b/sklego/preprocessing/intervalencoder.py
@@ -157,7 +157,7 @@ def fit(self, X, y):
 
         # these two matrices will have shape (columns, quantiles)
         # quantiles indicate where the interval split occurs
-        X, y = validate_data(self, X, y)
+        X, y = validate_data(self, X, y, y_required=True)
         self.quantiles_ = np.zeros((X.shape[1], self.n_chunks))
         # heights indicate what heights these intervals will have
         self.heights_ = np.zeros((X.shape[1], self.n_chunks))

diff --git a/sklego/preprocessing/randomadder.py b/sklego/preprocessing/randomadder.py
@@ -68,7 +68,7 @@ def fit(self, X, y):
             The fitted transformer.
         """
         super().fit(X, y)
-        X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
+        X = validate_data(self, X, dtype=FLOAT_DTYPES)
         self.n_features_in_ = X.shape[1]
 
         return self