Skip to content

Commit

Permalink
y_required flag
Browse files Browse the repository at this point in the history
  • Loading branch information
FBruzzesi committed Nov 20, 2024
1 parent fe730cf commit e555a6c
Show file tree
Hide file tree
Showing 16 changed files with 23 additions and 20 deletions.
2 changes: 1 addition & 1 deletion sklego/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def fit(self, X: np.array, y: np.array) -> "RandomRegressor":
"""
if self.strategy not in self._ALLOWED_STRATEGIES:
raise ValueError(f"strategy {self.strategy} is not in {self._ALLOWED_STRATEGIES}")
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True)
self.n_features_in_ = X.shape[1]

self.min_ = np.min(y)
Expand Down
2 changes: 1 addition & 1 deletion sklego/feature_selection/mrmr.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def fit(self, X, y):
k parameter is not integer type or is < n_features_in (X.shape[1]) or < 1
"""
X, y = validate_data(self, X, y, dtype="numeric", y_numeric=True)
X, y = validate_data(self, X, y, dtype="numeric", y_numeric=True, y_required=True)

self._y_dtype = y.dtype

Expand Down
8 changes: 4 additions & 4 deletions sklego/linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def fit(self, X, y):
- If `span` is not between 0 and 1.
- If `sigma` is negative.
"""
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True)
if self.span is not None:
if not 0 <= self.span <= 1:
raise ValueError(f"Param `span` must be 0 <= span <= 1, got: {self.span}")
Expand Down Expand Up @@ -225,7 +225,7 @@ def fit(self, X, y):
self : ProbWeightRegression
The fitted estimator.
"""
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True)

# Construct the problem.
betas = cp.Variable(X.shape[1])
Expand Down Expand Up @@ -373,7 +373,7 @@ def fit(self, X, y):
ValueError
If `effect` is not one of "linear", "quadratic" or "constant".
"""
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True)
if self.effect not in self._ALLOWED_EFFECTS:
raise ValueError(f"effect {self.effect} must be in {self._ALLOWED_EFFECTS}")

Expand Down Expand Up @@ -1054,7 +1054,7 @@ def _prepare_inputs(self, X, sample_weight, y):
This method is called by `fit` to prepare the inputs for the optimization problem. It adds an intercept column
to `X` if `fit_intercept=True`, and returns the loss function and its gradient.
"""
X, y = validate_data(self, X, y, y_numeric=True)
X, y = validate_data(self, X, y, y_numeric=True, y_required=True)
sample_weight = _check_sample_weight(sample_weight, X)
self.n_features_in_ = X.shape[1]

Expand Down
2 changes: 1 addition & 1 deletion sklego/meta/confusion_balancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def fit(self, X, y):
If the underlying estimator does not have a `predict_proba` method.
"""

X, y = validate_data(self.estimator, X, y, dtype=FLOAT_DTYPES)
X, y = validate_data(self.estimator, X, y, dtype=FLOAT_DTYPES, y_required=True)
if not isinstance(self.estimator, ProbabilisticClassifier):
raise ValueError(
"The ConfusionBalancer meta model only works on classification models with .predict_proba."
Expand Down
2 changes: 1 addition & 1 deletion sklego/meta/decay_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def fit(self, X, y):
"""

if self.check_input:
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, ensure_min_features=0)
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, ensure_min_features=0, y_required=True)

if self.decay_func in self._ALLOWED_DECAYS.keys():
self.decay_func_ = self._ALLOWED_DECAYS[self.decay_func]
Expand Down
2 changes: 1 addition & 1 deletion sklego/meta/estimator_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def fit(self, X, y, **kwargs):
"""

if self.check_input:
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, multi_output=True)
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, multi_output=True, y_required=True)

self.multi_output_ = len(y.shape) > 1
self.estimator_ = clone(self.estimator)
Expand Down
2 changes: 1 addition & 1 deletion sklego/meta/ordinal_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def fit(self, X, y):
if not hasattr(self.estimator, "predict_proba"):
raise ValueError("The estimator must implement `.predict_proba()` method.")

X, y = validate_data(self, X, y, ensure_min_samples=2, ensure_2d=True)
X, y = validate_data(self, X, y, ensure_min_samples=2, ensure_2d=True, y_required=True)

self.classes_ = np.sort(np.unique(y))
self.n_features_in_ = X.shape[1]
Expand Down
5 changes: 4 additions & 1 deletion sklego/meta/outlier_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,10 @@ def fit(self, X, y=None):
f"Passed model {self.model} does not have a `decision_function` "
f"method. This is required for `predict_proba` estimation."
)
X, y = validate_data(self, X, y)
if y is not None:
X, y = validate_data(self, X, y)
else:
X = validate_data(self, X)
self.estimator_ = clone(self.model).fit(X, y)
self.n_features_in_ = self.estimator_.n_features_in_
self.classes_ = np.array([0, 1])
Expand Down
2 changes: 1 addition & 1 deletion sklego/meta/subjective_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def fit(self, X, y):
if self.evidence not in self._ALLOWED_EVIDENCE:
raise ValueError(f"Invalid evidence: the provided evidence should be one of {self._ALLOWED_EVIDENCE}")

X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True)
if set(y) - set(self.prior.keys()):
raise ValueError(
f"Training data is inconsistent with prior: no prior defined for classes "
Expand Down
2 changes: 1 addition & 1 deletion sklego/meta/zero_inflated_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def fit(self, X, y, sample_weight=None):
ValueError
If `classifier` is not a classifier or `regressor` is not a regressor.
"""
X, y = validate_data(self, X, y)
X, y = validate_data(self, X, y, y_required=True)

self.n_features_in_ = X.shape[1]

Expand Down
2 changes: 1 addition & 1 deletion sklego/mixture/bayesian_gmm_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> "BayesianGMMClassifier":
self : BayesianGMMClassifier
The fitted estimator.
"""
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True)
if X.ndim == 1:
X = np.expand_dims(X, 1)

Expand Down
2 changes: 1 addition & 1 deletion sklego/mixture/gmm_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> "GMMClassifier":
self : GMMClassifier
The fitted estimator.
"""
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True)
if X.ndim == 1:
X = np.expand_dims(X, 1)

Expand Down
4 changes: 2 additions & 2 deletions sklego/naive_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def fit(self, X, y) -> "GaussianMixtureNB":
self : GaussianMixtureNB
The fitted estimator.
"""
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True)
if X.ndim == 1:
X = np.expand_dims(X, 1)

Expand Down Expand Up @@ -239,7 +239,7 @@ def fit(self, X, y) -> "BayesianGaussianMixtureNB":
self : BayesianGaussianMixtureNB
The fitted estimator.
"""
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True)

if X.ndim == 1:
X = np.expand_dims(X, 1)
Expand Down
2 changes: 1 addition & 1 deletion sklego/neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def fit(self, X: np.ndarray, y: np.ndarray):
self : BayesianKernelDensityClassifier
The fitted estimator.
"""
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True)

self.classes_ = unique_labels(y)
self.models_, self.priors_logp_ = {}, {}
Expand Down
2 changes: 1 addition & 1 deletion sklego/preprocessing/intervalencoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def fit(self, X, y):

# these two matrices will have shape (columns, quantiles)
# quantiles indicate where the interval split occurs
X, y = validate_data(self, X, y)
X, y = validate_data(self, X, y, y_required=True)
self.quantiles_ = np.zeros((X.shape[1], self.n_chunks))
# heights indicate what heights these intervals will have
self.heights_ = np.zeros((X.shape[1], self.n_chunks))
Expand Down
2 changes: 1 addition & 1 deletion sklego/preprocessing/randomadder.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def fit(self, X, y):
The fitted transformer.
"""
super().fit(X, y)
X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES)
X = validate_data(self, X, dtype=FLOAT_DTYPES)
self.n_features_in_ = X.shape[1]

return self
Expand Down

0 comments on commit e555a6c

Please sign in to comment.