diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 0263c4d1d..5c4218dec 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -115,10 +115,10 @@ jobs: ne(variables['Build.Reason'], 'Schedule') ) matrix: - py38_conda_forge_openblas_ubuntu_1804: + py39_conda_forge_openblas_ubuntu_1804: DISTRIB: 'conda' CONDA_CHANNEL: 'conda-forge' - PYTHON_VERSION: '3.8' + PYTHON_VERSION: '3.9' BLAS: 'openblas' COVERAGE: 'false' @@ -188,7 +188,7 @@ jobs: pylatest_conda_tensorflow: DISTRIB: 'conda-latest-tensorflow' CONDA_CHANNEL: 'conda-forge' - PYTHON_VERSION: '3.8' + PYTHON_VERSION: '3.9' TEST_DOCS: 'true' TEST_DOCSTRINGS: 'true' CHECK_WARNINGS: 'true' @@ -214,7 +214,7 @@ jobs: pylatest_conda_keras: DISTRIB: 'conda-latest-keras' CONDA_CHANNEL: 'conda-forge' - PYTHON_VERSION: '3.8' + PYTHON_VERSION: '3.9' TEST_DOCS: 'true' TEST_DOCSTRINGS: 'true' CHECK_WARNINGS: 'true' @@ -301,7 +301,7 @@ jobs: py38_conda_forge_mkl: DISTRIB: 'conda' CONDA_CHANNEL: 'conda-forge' - PYTHON_VERSION: '3.8' + PYTHON_VERSION: '3.10' CHECK_WARNINGS: 'true' PYTHON_ARCH: '64' PYTEST_VERSION: '*' diff --git a/doc/ensemble.rst b/doc/ensemble.rst index 8efd73947..d8b6751d3 100644 --- a/doc/ensemble.rst +++ b/doc/ensemble.rst @@ -33,8 +33,7 @@ data set, this classifier will favor the majority classes:: >>> from sklearn.ensemble import BaggingClassifier >>> from sklearn.tree import DecisionTreeClassifier >>> X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) - >>> bc = BaggingClassifier(base_estimator=DecisionTreeClassifier(), - ... random_state=0) + >>> bc = BaggingClassifier(DecisionTreeClassifier(), random_state=0) >>> bc.fit(X_train, y_train) #doctest: BaggingClassifier(...) >>> y_pred = bc.predict(X_test) @@ -50,7 +49,7 @@ sampling is controlled by the parameter `sampler` or the two parameters :class:`~imblearn.under_sampling.RandomUnderSampler`:: >>> from imblearn.ensemble import BalancedBaggingClassifier - >>> bbc = BalancedBaggingClassifier(base_estimator=DecisionTreeClassifier(), + >>> bbc = BalancedBaggingClassifier(DecisionTreeClassifier(), ... sampling_strategy='auto', ... replacement=False, ... random_state=0) diff --git a/doc/whats_new/v0.12.rst b/doc/whats_new/v0.12.rst index 08172f829..1c4325356 100644 --- a/doc/whats_new/v0.12.rst +++ b/doc/whats_new/v0.12.rst @@ -23,9 +23,13 @@ Compatibility - :class:`~imblearn.ensemble.BalancedRandomForestClassifier` now support missing values and monotonic constraints if scikit-learn >= 1.4 is installed. + - :class:`~imblearn.pipeline.Pipeline` support metadata routing if scikit-learn >= 1.4 is installed. +- Compatibility with scikit-learn 1.4. + :pr:`1058` by :user:`Guillaume Lemaitre `. + Deprecations ............ diff --git a/imblearn/ensemble/_bagging.py b/imblearn/ensemble/_bagging.py index 04d2217b9..afcf3fd3a 100644 --- a/imblearn/ensemble/_bagging.py +++ b/imblearn/ensemble/_bagging.py @@ -5,7 +5,6 @@ # License: MIT import copy -import inspect import numbers import warnings @@ -15,6 +14,7 @@ from sklearn.ensemble import BaggingClassifier from sklearn.ensemble._bagging import _parallel_decision_function from sklearn.ensemble._base import _partition_estimators +from sklearn.exceptions import NotFittedError from sklearn.tree import DecisionTreeClassifier from sklearn.utils import parse_version from sklearn.utils.validation import check_is_fitted @@ -121,14 +121,6 @@ class BalancedBaggingClassifier(_ParamsValidationMixin, BaggingClassifier): .. versionadded:: 0.8 - base_estimator : estimator object, default=None - The base estimator to fit on random subsets of the dataset. - If None, then the base estimator is a decision tree. - - .. deprecated:: 0.10 - `base_estimator` was renamed to `estimator` in version 0.10 and - will be removed in 0.12. - Attributes ---------- estimator_ : estimator @@ -136,15 +128,6 @@ class BalancedBaggingClassifier(_ParamsValidationMixin, BaggingClassifier): .. versionadded:: 0.10 - base_estimator_ : estimator - The base estimator from which the ensemble is grown. - - .. deprecated:: 1.2 - `base_estimator_` is deprecated in `scikit-learn` 1.2 and will be - removed in 1.4. Use `estimator_` instead. When the minimum version - of `scikit-learn` supported by `imbalanced-learn` will reach 1.4, - this attribute will be removed. - n_features_ : int The number of features when `fit` is performed. @@ -266,7 +249,7 @@ class BalancedBaggingClassifier(_ParamsValidationMixin, BaggingClassifier): """ # make a deepcopy to not modify the original dictionary - if sklearn_version >= parse_version("1.3"): + if sklearn_version >= parse_version("1.4"): _parameter_constraints = copy.deepcopy(BaggingClassifier._parameter_constraints) else: _parameter_constraints = copy.deepcopy(_bagging_parameter_constraints) @@ -283,6 +266,9 @@ class BalancedBaggingClassifier(_ParamsValidationMixin, BaggingClassifier): "sampler": [HasMethods(["fit_resample"]), None], } ) + # TODO: remove when minimum supported version of scikit-learn is 1.4 + if "base_estimator" in _parameter_constraints: + del _parameter_constraints["base_estimator"] def __init__( self, @@ -301,18 +287,8 @@ def __init__( random_state=None, verbose=0, sampler=None, - base_estimator="deprecated", ): - # TODO: remove when supporting scikit-learn>=1.2 - bagging_classifier_signature = inspect.signature(super().__init__) - estimator_params = {"base_estimator": base_estimator} - if "estimator" in bagging_classifier_signature.parameters: - estimator_params["estimator"] = estimator - else: - self.estimator = estimator - super().__init__( - **estimator_params, n_estimators=n_estimators, max_samples=max_samples, max_features=max_features, @@ -324,6 +300,7 @@ def __init__( random_state=random_state, verbose=verbose, ) + self.estimator = estimator self.sampling_strategy = sampling_strategy self.replacement = replacement self.sampler = sampler @@ -349,42 +326,17 @@ def _validate_y(self, y): def _validate_estimator(self, default=DecisionTreeClassifier()): """Check the estimator and the n_estimator attribute, set the `estimator_` attribute.""" - if self.estimator is not None and ( - self.base_estimator not in [None, "deprecated"] - ): - raise ValueError( - "Both `estimator` and `base_estimator` were set. Only set `estimator`." - ) - if self.estimator is not None: - base_estimator = clone(self.estimator) - elif self.base_estimator not in [None, "deprecated"]: - warnings.warn( - "`base_estimator` was renamed to `estimator` in version 0.10 and " - "will be removed in 0.12.", - FutureWarning, - ) - base_estimator = clone(self.base_estimator) + estimator = clone(self.estimator) else: - base_estimator = clone(default) + estimator = clone(default) if self.sampler_._sampling_type != "bypass": self.sampler_.set_params(sampling_strategy=self._sampling_strategy) - self._estimator = Pipeline( - [("sampler", self.sampler_), ("classifier", base_estimator)] + self.estimator_ = Pipeline( + [("sampler", self.sampler_), ("classifier", estimator)] ) - try: - # scikit-learn < 1.2 - self.base_estimator_ = self._estimator - except AttributeError: - pass - - # TODO: remove when supporting scikit-learn>=1.4 - @property - def estimator_(self): - """Estimator used to grow the ensemble.""" - return self._estimator # TODO: remove when supporting scikit-learn>=1.2 @property @@ -483,6 +435,22 @@ def decision_function(self, X): return decisions + @property + def base_estimator_(self): + """Attribute for older sklearn version compatibility.""" + error = AttributeError( + f"{self.__class__.__name__} object has no attribute 'base_estimator_'." + ) + if sklearn_version < parse_version("1.2"): + # The base class require to have the attribute defined. For scikit-learn + # > 1.2, we are going to raise an error. + try: + check_is_fitted(self) + return self.estimator_ + except NotFittedError: + raise error + raise error + def _more_tags(self): tags = super()._more_tags() tags_key = "_xfail_checks" diff --git a/imblearn/ensemble/_easy_ensemble.py b/imblearn/ensemble/_easy_ensemble.py index e2f5575cb..db3c6cbb7 100644 --- a/imblearn/ensemble/_easy_ensemble.py +++ b/imblearn/ensemble/_easy_ensemble.py @@ -5,7 +5,6 @@ # License: MIT import copy -import inspect import numbers import warnings @@ -15,6 +14,7 @@ from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier from sklearn.ensemble._bagging import _parallel_decision_function from sklearn.ensemble._base import _partition_estimators +from sklearn.exceptions import NotFittedError from sklearn.utils import parse_version from sklearn.utils._tags import _safe_tags from sklearn.utils.validation import check_is_fitted @@ -85,14 +85,6 @@ class EasyEnsembleClassifier(_ParamsValidationMixin, BaggingClassifier): verbose : int, default=0 Controls the verbosity of the building process. - base_estimator : estimator object, default=AdaBoostClassifier() - The base AdaBoost classifier used in the inner ensemble. Note that you - can set the number of inner learner by passing your own instance. - - .. deprecated:: 0.10 - `base_estimator` was renamed to `estimator` in version 0.10 and will - be removed in 0.12. - Attributes ---------- estimator_ : estimator @@ -100,15 +92,6 @@ class EasyEnsembleClassifier(_ParamsValidationMixin, BaggingClassifier): .. versionadded:: 0.10 - base_estimator_ : estimator - The base estimator from which the ensemble is grown. - - .. deprecated:: 1.2 - `base_estimator_` is deprecated in `scikit-learn` 1.2 and will be - removed in 1.4. Use `estimator_` instead. When the minimum version - of `scikit-learn` supported by `imbalanced-learn` will reach 1.4, - this attribute will be removed. - estimators_ : list of estimators The collection of fitted base estimators. @@ -191,7 +174,7 @@ class EasyEnsembleClassifier(_ParamsValidationMixin, BaggingClassifier): """ # make a deepcopy to not modify the original dictionary - if sklearn_version >= parse_version("1.3"): + if sklearn_version >= parse_version("1.4"): _parameter_constraints = copy.deepcopy(BaggingClassifier._parameter_constraints) else: _parameter_constraints = copy.deepcopy(_bagging_parameter_constraints) @@ -217,6 +200,9 @@ class EasyEnsembleClassifier(_ParamsValidationMixin, BaggingClassifier): "replacement": ["boolean"], } ) + # TODO: remove when minimum supported version of scikit-learn is 1.4 + if "base_estimator" in _parameter_constraints: + del _parameter_constraints["base_estimator"] def __init__( self, @@ -229,18 +215,8 @@ def __init__( n_jobs=None, random_state=None, verbose=0, - base_estimator="deprecated", ): - # TODO: remove when supporting scikit-learn>=1.2 - bagging_classifier_signature = inspect.signature(super().__init__) - estimator_params = {"base_estimator": base_estimator} - if "estimator" in bagging_classifier_signature.parameters: - estimator_params["estimator"] = estimator - else: - self.estimator = estimator - super().__init__( - **estimator_params, n_estimators=n_estimators, max_samples=1.0, max_features=1.0, @@ -252,6 +228,7 @@ def __init__( random_state=random_state, verbose=verbose, ) + self.estimator = estimator self.sampling_strategy = sampling_strategy self.replacement = replacement @@ -270,46 +247,19 @@ def _validate_y(self, y): self._sampling_strategy = self.sampling_strategy return y_encoded - def _validate_estimator(self, default=AdaBoostClassifier()): + def _validate_estimator(self, default=AdaBoostClassifier(algorithm="SAMME")): """Check the estimator and the n_estimator attribute, set the `estimator_` attribute.""" - if self.estimator is not None and ( - self.base_estimator not in [None, "deprecated"] - ): - raise ValueError( - "Both `estimator` and `base_estimator` were set. Only set `estimator`." - ) - if self.estimator is not None: - base_estimator = clone(self.estimator) - elif self.base_estimator not in [None, "deprecated"]: - warnings.warn( - "`base_estimator` was renamed to `estimator` in version 0.10 and " - "will be removed in 0.12.", - FutureWarning, - ) - base_estimator = clone(self.base_estimator) + estimator = clone(self.estimator) else: - base_estimator = clone(default) + estimator = clone(default) sampler = RandomUnderSampler( sampling_strategy=self._sampling_strategy, replacement=self.replacement, ) - self._estimator = Pipeline( - [("sampler", sampler), ("classifier", base_estimator)] - ) - try: - self.base_estimator_ = self._estimator - except AttributeError: - # scikit-learn < 1.2 - pass - - # TODO: remove when supporting scikit-learn>=1.4 - @property - def estimator_(self): - """Estimator used to grow the ensemble.""" - return self._estimator + self.estimator_ = Pipeline([("sampler", sampler), ("classifier", estimator)]) # TODO: remove when supporting scikit-learn>=1.2 @property @@ -399,9 +349,25 @@ def decision_function(self, X): return decisions + @property + def base_estimator_(self): + """Attribute for older sklearn version compatibility.""" + error = AttributeError( + f"{self.__class__.__name__} object has no attribute 'base_estimator_'." + ) + if sklearn_version < parse_version("1.2"): + # The base class require to have the attribute defined. For scikit-learn + # > 1.2, we are going to raise an error. + try: + check_is_fitted(self) + return self.estimator_ + except NotFittedError: + raise error + raise error + def _more_tags(self): if self.estimator is None: - estimator = AdaBoostClassifier() + estimator = AdaBoostClassifier(algorithm="SAMME") else: estimator = self.estimator return {"allow_nan": _safe_tags(estimator, "allow_nan")} diff --git a/imblearn/ensemble/_forest.py b/imblearn/ensemble/_forest.py index b8ef60c6e..a7c8f9beb 100644 --- a/imblearn/ensemble/_forest.py +++ b/imblearn/ensemble/_forest.py @@ -334,16 +334,6 @@ class BalancedRandomForestClassifier(_ParamsValidationMixin, RandomForestClassif .. versionadded:: 0.10 - base_estimator_ : :class:`~sklearn.tree.DecisionTreeClassifier` instance - The child estimator template used to create the collection of fitted - sub-estimators. - - .. deprecated:: 1.2 - `base_estimator_` is deprecated in `scikit-learn` 1.2 and will be - removed in 1.4. Use `estimator_` instead. When the minimum version - of `scikit-learn` supported by `imbalanced-learn` will reach 1.4, - this attribute will be removed. - estimators_ : list of :class:`~sklearn.tree.DecisionTreeClassifier` The collection of fitted sub-estimators. @@ -529,15 +519,9 @@ def _validate_estimator(self, default=DecisionTreeClassifier()): base_estimator = self.base_estimator if base_estimator is not None: - self._estimator = clone(base_estimator) + self.estimator_ = clone(base_estimator) else: - self._estimator = clone(default) - - try: - # scikit-learn < 1.2 - self.base_estimator_ = self._estimator - except AttributeError: - pass + self.estimator_ = clone(default) self.base_sampler_ = RandomUnderSampler( sampling_strategy=self._sampling_strategy, @@ -549,7 +533,7 @@ def _make_sampler_estimator(self, random_state=None): Warning: This method should be used to properly instantiate new sub-estimators. """ - estimator = clone(self._estimator) + estimator = clone(self.estimator_) estimator.set_params(**{p: getattr(self, p) for p in self.estimator_params}) sampler = clone(self.base_sampler_) @@ -910,12 +894,6 @@ def _compute_oob_predictions(self, X, y): return oob_pred - # TODO: remove when supporting scikit-learn>=1.4 - @property - def estimator_(self): - """Estimator used to grow the ensemble.""" - return self._estimator - # TODO: remove when supporting scikit-learn>=1.2 @property def n_features_(self): diff --git a/imblearn/ensemble/_weight_boosting.py b/imblearn/ensemble/_weight_boosting.py index 7f7965d5e..539b7824f 100644 --- a/imblearn/ensemble/_weight_boosting.py +++ b/imblearn/ensemble/_weight_boosting.py @@ -1,7 +1,5 @@ import copy -import inspect import numbers -import warnings from copy import deepcopy import numpy as np @@ -66,6 +64,10 @@ class RUSBoostClassifier(_ParamsValidationMixin, AdaBoostClassifier): The SAMME.R algorithm typically converges faster than SAMME, achieving a lower test error with fewer boosting iterations. + .. deprecated:: 0.12 + `"SAMME.R"` is deprecated and will be removed in version 0.14. + '"SAMME"' will become the default. + {sampling_strategy} replacement : bool, default=False @@ -73,16 +75,6 @@ class RUSBoostClassifier(_ParamsValidationMixin, AdaBoostClassifier): {random_state} - base_estimator : estimator object, default=None - The base estimator from which the boosted ensemble is built. - Support for sample weighting is required, as well as proper - ``classes_`` and ``n_classes_`` attributes. If ``None``, then - the base estimator is ``DecisionTreeClassifier(max_depth=1)``. - - .. deprecated:: 0.10 - `base_estimator` is deprecated in version 0.10 and will be removed - in 0.12. Use `estimator` instead. - Attributes ---------- estimator_ : estimator @@ -90,15 +82,6 @@ class RUSBoostClassifier(_ParamsValidationMixin, AdaBoostClassifier): .. versionadded:: 0.10 - base_estimator_ : estimator - The base estimator from which the ensemble is grown. - - .. deprecated:: 1.2 - `base_estimator_` is deprecated in `scikit-learn` 1.2 and will be - removed in 1.4. Use `estimator_` instead. When the minimum version - of `scikit-learn` supported by `imbalanced-learn` will reach 1.4, - this attribute will be removed. - estimators_ : list of classifiers The collection of fitted sub-estimators. @@ -172,7 +155,7 @@ class RUSBoostClassifier(_ParamsValidationMixin, AdaBoostClassifier): """ # make a deepcopy to not modify the original dictionary - if sklearn_version >= parse_version("1.3"): + if sklearn_version >= parse_version("1.4"): _parameter_constraints = copy.deepcopy( AdaBoostClassifier._parameter_constraints ) @@ -192,6 +175,9 @@ class RUSBoostClassifier(_ParamsValidationMixin, AdaBoostClassifier): "replacement": ["boolean"], } ) + # TODO: remove when minimum supported version of scikit-learn is 1.4 + if "base_estimator" in _parameter_constraints: + del _parameter_constraints["base_estimator"] def __init__( self, @@ -203,23 +189,14 @@ def __init__( sampling_strategy="auto", replacement=False, random_state=None, - base_estimator="deprecated", ): - # TODO: remove when supporting scikit-learn>=1.2 - bagging_classifier_signature = inspect.signature(super().__init__) - estimator_params = {"base_estimator": base_estimator} - if "estimator" in bagging_classifier_signature.parameters: - estimator_params["estimator"] = estimator - else: - self.estimator = estimator - super().__init__( - **estimator_params, n_estimators=n_estimators, learning_rate=learning_rate, algorithm=algorithm, random_state=random_state, ) + self.estimator = estimator self.sampling_strategy = sampling_strategy self.replacement = replacement @@ -257,36 +234,15 @@ def _validate_estimator(self): Sets the `estimator_` attributes. """ - if self.estimator is not None and ( - self.base_estimator not in [None, "deprecated"] - ): - raise ValueError( - "Both `estimator` and `base_estimator` were set. Only set `estimator`." - ) - default = DecisionTreeClassifier(max_depth=1) if self.estimator is not None: - base_estimator = clone(self.estimator) - elif self.base_estimator not in [None, "deprecated"]: - warnings.warn( - "`base_estimator` was renamed to `estimator` in version 0.10 and " - "will be removed in 0.12.", - FutureWarning, - ) - base_estimator = clone(self.base_estimator) + self.estimator_ = clone(self.estimator) else: - base_estimator = clone(default) - - self._estimator = base_estimator - try: - # scikit-learn < 1.2 - self.base_estimator_ = self._estimator - except AttributeError: - pass + self.estimator_ = clone(default) # SAMME-R requires predict_proba-enabled estimators if self.algorithm == "SAMME.R": - if not hasattr(self._estimator, "predict_proba"): + if not hasattr(self.estimator_, "predict_proba"): raise TypeError( "AdaBoostClassifier with algorithm='SAMME.R' requires " "that the weak learner supports the calculation of class " @@ -294,9 +250,9 @@ def _validate_estimator(self): "Please change the base estimator or set " "algorithm='SAMME' instead." ) - if not has_fit_parameter(self._estimator, "sample_weight"): + if not has_fit_parameter(self.estimator_, "sample_weight"): raise ValueError( - f"{self._estimator.__class__.__name__} doesn't support sample_weight." + f"{self.estimator_.__class__.__name__} doesn't support sample_weight." ) self.base_sampler_ = RandomUnderSampler( @@ -309,7 +265,7 @@ def _make_sampler_estimator(self, append=True, random_state=None): Warning: This method should be used to properly instantiate new sub-estimators. """ - estimator = clone(self._estimator) + estimator = clone(self.estimator_) estimator.set_params(**{p: getattr(self, p) for p in self.estimator_params}) sampler = clone(self.base_sampler_) @@ -437,9 +393,3 @@ def _boost_discrete(self, iboost, X, y, sample_weight, random_state): sample_weight *= np.exp(estimator_weight * incorrect * (sample_weight > 0)) return sample_weight, estimator_weight, estimator_error - - # TODO: remove when supporting scikit-learn>=1.4 - @property - def estimator_(self): - """Estimator used to grow the ensemble.""" - return self._estimator diff --git a/imblearn/ensemble/tests/test_bagging.py b/imblearn/ensemble/tests/test_bagging.py index 01532add8..5705de553 100644 --- a/imblearn/ensemble/tests/test_bagging.py +++ b/imblearn/ensemble/tests/test_bagging.py @@ -592,25 +592,3 @@ def test_balanced_bagging_classifier_n_features(): estimator = BalancedBaggingClassifier().fit(X, y) with pytest.warns(FutureWarning, match="`n_features_` was deprecated"): estimator.n_features_ - - -@pytest.mark.skipif( - sklearn_version < parse_version("1.2"), reason="requires scikit-learn>=1.2" -) -def test_balanced_bagging_classifier_base_estimator(): - """Check that we raise a FutureWarning when accessing `base_estimator_`.""" - X, y = load_iris(return_X_y=True) - estimator = BalancedBaggingClassifier().fit(X, y) - with pytest.warns(FutureWarning, match="`base_estimator_` was deprecated"): - estimator.base_estimator_ - - -def test_balanced_bagging_classifier_set_both_estimator_and_base_estimator(): - """Check that we raise a ValueError when setting both `estimator` and - `base_estimator`.""" - X, y = load_iris(return_X_y=True) - err_msg = "Both `estimator` and `base_estimator` were set. Only set `estimator`." - with pytest.raises(ValueError, match=err_msg): - BalancedBaggingClassifier( - estimator=KNeighborsClassifier(), base_estimator=KNeighborsClassifier() - ).fit(X, y) diff --git a/imblearn/ensemble/tests/test_easy_ensemble.py b/imblearn/ensemble/tests/test_easy_ensemble.py index a8574e4d6..7dc04414a 100644 --- a/imblearn/ensemble/tests/test_easy_ensemble.py +++ b/imblearn/ensemble/tests/test_easy_ensemble.py @@ -43,7 +43,10 @@ @pytest.mark.parametrize("n_estimators", [10, 20]) @pytest.mark.parametrize( "estimator", - [AdaBoostClassifier(n_estimators=5), AdaBoostClassifier(n_estimators=10)], + [ + AdaBoostClassifier(algorithm="SAMME", n_estimators=5), + AdaBoostClassifier(algorithm="SAMME", n_estimators=10), + ], ) def test_easy_ensemble_classifier(n_estimators, estimator): # Check classification for various parameter settings. @@ -89,7 +92,7 @@ def test_estimator(): assert isinstance(ensemble.estimator_.steps[-1][1], AdaBoostClassifier) ensemble = EasyEnsembleClassifier( - 2, AdaBoostClassifier(), n_jobs=-1, random_state=0 + 2, AdaBoostClassifier(algorithm="SAMME"), n_jobs=-1, random_state=0 ).fit(X_train, y_train) assert isinstance(ensemble.estimator_.steps[-1][1], AdaBoostClassifier) @@ -104,7 +107,9 @@ def test_bagging_with_pipeline(): ) estimator = EasyEnsembleClassifier( n_estimators=2, - estimator=make_pipeline(SelectKBest(k=1), AdaBoostClassifier()), + estimator=make_pipeline( + SelectKBest(k=1), AdaBoostClassifier(algorithm="SAMME") + ), ) estimator.fit(X, y).predict(X) @@ -195,7 +200,8 @@ def test_easy_ensemble_classifier_single_estimator(): clf1 = EasyEnsembleClassifier(n_estimators=1, random_state=0).fit(X_train, y_train) clf2 = make_pipeline( - RandomUnderSampler(random_state=0), AdaBoostClassifier(random_state=0) + RandomUnderSampler(random_state=0), + AdaBoostClassifier(algorithm="SAMME", random_state=0), ).fit(X_train, y_train) assert_array_equal(clf1.predict(X_test), clf2.predict(X_test)) @@ -214,7 +220,7 @@ def test_easy_ensemble_classifier_grid_search(): "estimator__n_estimators": [3, 4], } grid_search = GridSearchCV( - EasyEnsembleClassifier(estimator=AdaBoostClassifier()), + EasyEnsembleClassifier(estimator=AdaBoostClassifier(algorithm="SAMME")), parameters, cv=5, ) @@ -227,25 +233,3 @@ def test_easy_ensemble_classifier_n_features(): estimator = EasyEnsembleClassifier().fit(X, y) with pytest.warns(FutureWarning, match="`n_features_` was deprecated"): estimator.n_features_ - - -@pytest.mark.skipif( - sklearn_version < parse_version("1.2"), reason="warns for scikit-learn>=1.2" -) -def test_easy_ensemble_classifier_base_estimator(): - """Check that we raise a FutureWarning when accessing `base_estimator_`.""" - X, y = load_iris(return_X_y=True) - estimator = EasyEnsembleClassifier().fit(X, y) - with pytest.warns(FutureWarning, match="`base_estimator_` was deprecated"): - estimator.base_estimator_ - - -def test_easy_ensemble_classifier_set_both_estimator_and_base_estimator(): - """Check that we raise a ValueError when setting both `estimator` and - `base_estimator`.""" - X, y = load_iris(return_X_y=True) - err_msg = "Both `estimator` and `base_estimator` were set. Only set `estimator`." - with pytest.raises(ValueError, match=err_msg): - EasyEnsembleClassifier( - estimator=AdaBoostClassifier(), base_estimator=AdaBoostClassifier() - ).fit(X, y) diff --git a/imblearn/ensemble/tests/test_forest.py b/imblearn/ensemble/tests/test_forest.py index 9bd73de65..3719568e5 100644 --- a/imblearn/ensemble/tests/test_forest.py +++ b/imblearn/ensemble/tests/test_forest.py @@ -229,19 +229,6 @@ def test_balanced_bagging_classifier_n_features(): estimator.n_features_ -@pytest.mark.skipif( - sklearn_version < parse_version("1.2"), reason="requires scikit-learn>=1.2" -) -def test_balanced_random_forest_classifier_base_estimator(): - """Check that we raise a FutureWarning when accessing `base_estimator_`.""" - X, y = load_iris(return_X_y=True) - estimator = BalancedRandomForestClassifier( - sampling_strategy="all", replacement=True, bootstrap=False - ).fit(X, y) - with pytest.warns(FutureWarning, match="`base_estimator_` was deprecated"): - estimator.base_estimator_ - - # TODO: remove in 0.13 def test_balanced_random_forest_change_behaviour(imbalanced_dataset): """Check that we raise a change of behaviour for the parameters `sampling_strategy` diff --git a/imblearn/ensemble/tests/test_weight_boosting.py b/imblearn/ensemble/tests/test_weight_boosting.py index a36395e55..ad3dbca04 100644 --- a/imblearn/ensemble/tests/test_weight_boosting.py +++ b/imblearn/ensemble/tests/test_weight_boosting.py @@ -1,9 +1,8 @@ import numpy as np import pytest import sklearn -from sklearn.datasets import load_iris, make_classification +from sklearn.datasets import make_classification from sklearn.model_selection import train_test_split -from sklearn.tree import DecisionTreeClassifier from sklearn.utils._testing import assert_array_equal from sklearn.utils.fixes import parse_version @@ -29,6 +28,7 @@ def imbalanced_dataset(): @pytest.mark.parametrize("algorithm", ["SAMME", "SAMME.R"]) +@pytest.mark.filterwarnings("ignore:The SAMME.R algorithm (the default) is") def test_rusboost(imbalanced_dataset, algorithm): X, y = imbalanced_dataset X_train, X_test, y_train, y_test = train_test_split( @@ -74,6 +74,7 @@ def test_rusboost(imbalanced_dataset, algorithm): @pytest.mark.parametrize("algorithm", ["SAMME", "SAMME.R"]) +@pytest.mark.filterwarnings("ignore:The SAMME.R algorithm (the default) is") def test_rusboost_sample_weight(imbalanced_dataset, algorithm): X, y = imbalanced_dataset sample_weight = np.ones_like(y) @@ -91,25 +92,3 @@ def test_rusboost_sample_weight(imbalanced_dataset, algorithm): with pytest.raises(AssertionError): assert_array_equal(y_pred_no_sample_weight, y_pred_sample_weight) - - -@pytest.mark.skipif( - sklearn_version < parse_version("1.2"), reason="requires scikit-learn>=1.2" -) -def test_rus_boost_classifier_base_estimator(): - """Check that we raise a FutureWarning when accessing `base_estimator_`.""" - X, y = load_iris(return_X_y=True) - estimator = RUSBoostClassifier().fit(X, y) - with pytest.warns(FutureWarning, match="`base_estimator_` was deprecated"): - estimator.base_estimator_ - - -def test_rus_boost_classifier_set_both_estimator_and_base_estimator(): - """Check that we raise a ValueError when setting both `estimator` and - `base_estimator`.""" - X, y = load_iris(return_X_y=True) - err_msg = "Both `estimator` and `base_estimator` were set. Only set `estimator`." - with pytest.raises(ValueError, match=err_msg): - RUSBoostClassifier( - estimator=DecisionTreeClassifier(), base_estimator=DecisionTreeClassifier() - ).fit(X, y) diff --git a/imblearn/over_sampling/_smote/tests/test_smoten.py b/imblearn/over_sampling/_smote/tests/test_smoten.py index db9c14f99..2e30e3f19 100644 --- a/imblearn/over_sampling/_smote/tests/test_smoten.py +++ b/imblearn/over_sampling/_smote/tests/test_smoten.py @@ -66,7 +66,7 @@ def test_smoten_sparse_input(data, sparse_format): https://github.com/scikit-learn-contrib/imbalanced-learn/issues/971 """ X, y = data - X = OneHotEncoder().fit_transform(X) + X = OneHotEncoder().fit_transform(X).toarray() X = _convert_container(X, sparse_format) with pytest.warns(DataConversionWarning, match="is not really efficient"): diff --git a/imblearn/tests/test_base.py b/imblearn/tests/test_base.py index 6c16541d4..dc5b705f0 100644 --- a/imblearn/tests/test_base.py +++ b/imblearn/tests/test_base.py @@ -26,9 +26,10 @@ def test_function_sampler_reject_sparse(): X_sparse = sparse.csr_matrix(X) sampler = FunctionSampler(accept_sparse=False) + err_msg = "dense data is required" with pytest.raises( TypeError, - match="A sparse matrix was passed, but dense data is required", + match=err_msg, ): sampler.fit_resample(X_sparse, y) diff --git a/imblearn/tests/test_docstring_parameters.py b/imblearn/tests/test_docstring_parameters.py index 388de5f51..b595d77d7 100644 --- a/imblearn/tests/test_docstring_parameters.py +++ b/imblearn/tests/test_docstring_parameters.py @@ -212,7 +212,11 @@ def test_fit_docstring_attributes(name, Estimator): else: est.fit(X, y) - skipped_attributes = set([]) + skipped_attributes = set( + [ + "base_estimator_", # this attribute exist with old version of sklearn + ] + ) for attr in attributes: if attr.name in skipped_attributes: diff --git a/imblearn/utils/_param_validation.py b/imblearn/utils/_param_validation.py index 2bd5e8014..3ccabf2eb 100644 --- a/imblearn/utils/_param_validation.py +++ b/imblearn/utils/_param_validation.py @@ -6,7 +6,6 @@ import math import operator import re -import warnings from abc import ABC, abstractmethod from collections.abc import Iterable from inspect import signature @@ -22,16 +21,14 @@ sklearn_version = parse_version(sklearn.__version__) -if sklearn_version < parse_version("1.3"): - # TODO: remove `if True` when we have clear support for: - # - ignoring `*args` and `**kwargs` in the signature +if sklearn_version < parse_version("1.4"): class InvalidParameterError(ValueError, TypeError): - """Custom exception to be raised when the parameter of a - class/method/function does not have a valid type or value. + """Custom exception to be raised when the parameter of a class/method/function + does not have a valid type or value. """ - # Inherits from ValueError and TypeError to keep backward compatibility. + # Inherits from ValueError and TypeError to keep backward compatibility. def validate_parameter_constraints(parameter_constraints, params, caller_name): """Validate types and values of given parameters. @@ -56,14 +53,15 @@ def validate_parameter_constraints(parameter_constraints, params, caller_name): - the string "boolean" - the string "verbose" - the string "cv_object" + - the string "nan" - a MissingValues object representing markers for missing values - a HasMethods object, representing method(s) an object must have - a Hidden object, representing a constraint not meant to be exposed to the user params : dict - A dictionary `param_name: param_value`. The parameters to validate - against the constraints. + A dictionary `param_name: param_value`. The parameters to validate against + the constraints. caller_name : str The name of the estimator or function or method that called this function. @@ -148,6 +146,8 @@ def make_constraint(constraint): constraint = make_constraint(constraint.constraint) constraint.hidden = True return constraint + if isinstance(constraint, str) and constraint == "nan": + return _NanConstraint() raise ValueError(f"Unknown constraint type: {constraint}") def validate_params(parameter_constraints, *, prefer_skip_nested_validation): @@ -156,12 +156,12 @@ def validate_params(parameter_constraints, *, prefer_skip_nested_validation): Parameters ---------- parameter_constraints : dict - A dictionary `param_name: list of constraints`. See the docstring - of `validate_parameter_constraints` for a description of the - accepted constraints. + A dictionary `param_name: list of constraints`. See the docstring of + `validate_parameter_constraints` for a description of the accepted + constraints. - Note that the *args and **kwargs parameters are not validated and - must not be present in the parameter_constraints dictionary. + Note that the *args and **kwargs parameters are not validated and must not + be present in the parameter_constraints dictionary. prefer_skip_nested_validation : bool If True, the validation of parameters of inner estimators or functions @@ -223,11 +223,10 @@ def wrapper(*args, **kwargs): ): return func(*args, **kwargs) except InvalidParameterError as e: - # When the function is just a wrapper around an estimator, - # we allow the function to delegate validation to the - # estimator, but we replace the name of the estimator by - # the name of the function in the error message to avoid - # confusion. + # When the function is just a wrapper around an estimator, we allow + # the function to delegate validation to the estimator, but we + # replace the name of the estimator by the name of the function in + # the error message to avoid confusion. msg = re.sub( r"parameter of \w+ must be", f"parameter of {func.__qualname__} must be", @@ -318,7 +317,11 @@ class _NanConstraint(_Constraint): """Constraint representing the indicator `np.nan`.""" def is_satisfied_by(self, val): - return isinstance(val, Real) and math.isnan(val) + return ( + not isinstance(val, Integral) + and isinstance(val, Real) + and math.isnan(val) + ) def __str__(self): return "numpy.nan" @@ -484,7 +487,7 @@ def _check_params(self): ) def __contains__(self, val): - if np.isnan(val): + if not isinstance(val, Integral) and np.isnan(val): return False left_cmp = operator.lt if self.closed in ("left", "both") else operator.le @@ -586,20 +589,9 @@ def __init__(self): self._constraints = [ _InstancesOf(bool), _InstancesOf(np.bool_), - _InstancesOf(Integral), ] def is_satisfied_by(self, val): - # TODO(1.4) remove support for Integral. - if isinstance(val, Integral) and not isinstance(val, bool): - warnings.warn( - ( - "Passing an int for a boolean parameter is deprecated in " - " version 1.2 and won't be supported anymore in version 1.4." - ), - FutureWarning, - ) - return any(c.is_satisfied_by(val) for c in self._constraints) def __str__(self): @@ -680,8 +672,8 @@ def __str__(self): class HasMethods(_Constraint): """Constraint representing objects that expose specific methods. - It is useful for parameters following a protocol and where we don't - want to impose an affiliation to a specific module or class. + It is useful for parameters following a protocol and where we don't want to + impose an affiliation to a specific module or class. Parameters ---------- @@ -931,6 +923,7 @@ def generate_valid_param(constraint): _CVObjects, _InstancesOf, _IterablesNotString, + _NanConstraint, _NoneConstraint, _PandasNAConstraint, _RandomStates, diff --git a/imblearn/utils/tests/test_estimator_checks.py b/imblearn/utils/tests/test_estimator_checks.py index dbc337dd1..ca704f222 100644 --- a/imblearn/utils/tests/test_estimator_checks.py +++ b/imblearn/utils/tests/test_estimator_checks.py @@ -101,7 +101,7 @@ def test_check_samplers_nan(): "BaseBadSampler": (AssertionError, "ValueError not raised by fit"), "SamplerSingleClass": (AssertionError, "Sampler can't balance when only"), "NotFittedSampler": (AssertionError, "No fitted attribute"), - "NoAcceptingSparseSampler": (TypeError, "A sparse matrix was passed"), + "NoAcceptingSparseSampler": (TypeError, "dense data is required"), "NotPreservingDtypeSampler": (AssertionError, "X dtype is not preserved"), } diff --git a/imblearn/utils/tests/test_param_validation.py b/imblearn/utils/tests/test_param_validation.py index 3a7ab65a3..38af6642d 100644 --- a/imblearn/utils/tests/test_param_validation.py +++ b/imblearn/utils/tests/test_param_validation.py @@ -27,6 +27,7 @@ _CVObjects, _InstancesOf, _IterablesNotString, + _NanConstraint, _NoneConstraint, _PandasNAConstraint, _RandomStates, @@ -79,16 +80,41 @@ def fit(self, X=None, y=None): def test_interval_range(interval_type): """Check the range of values depending on closed.""" interval = Interval(interval_type, -2, 2, closed="left") - assert -2 in interval and 2 not in interval + assert -2 in interval + assert 2 not in interval interval = Interval(interval_type, -2, 2, closed="right") - assert -2 not in interval and 2 in interval + assert -2 not in interval + assert 2 in interval interval = Interval(interval_type, -2, 2, closed="both") - assert -2 in interval and 2 in interval + assert -2 in interval + assert 2 in interval interval = Interval(interval_type, -2, 2, closed="neither") - assert -2 not in interval and 2 not in interval + assert -2 not in interval + assert 2 not in interval + + +@pytest.mark.parametrize("interval_type", [Integral, Real]) +def test_interval_large_integers(interval_type): + """Check that Interval constraint work with large integers. + + non-regression test for #26648. + """ + interval = Interval(interval_type, 0, 2, closed="neither") + assert 2**65 not in interval + assert 2**128 not in interval + assert float(2**65) not in interval + assert float(2**128) not in interval + + interval = Interval(interval_type, 0, 2**128, closed="neither") + assert 2**65 in interval + assert 2**128 not in interval + assert float(2**65) in interval + assert float(2**128) not in interval + + assert 2**1024 not in interval def test_interval_inf_in_bounds(): @@ -392,8 +418,10 @@ def test_generate_valid_param(constraint): (Real, 0.5), ("boolean", False), ("verbose", 1), + ("nan", np.nan), (MissingValues(), -1), (MissingValues(), -1.0), + (MissingValues(), 2**1028), (MissingValues(), None), (MissingValues(), float("nan")), (MissingValues(), np.nan), @@ -425,10 +453,11 @@ def test_is_satisfied_by(constraint_declaration, value): (MissingValues(numeric_only=True), MissingValues), (HasMethods("fit"), HasMethods), ("cv_object", _CVObjects), + ("nan", _NanConstraint), ], ) def test_make_constraint(constraint_declaration, expected_constraint_class): - """Check that make_constraint dispaches to the appropriate constraint class""" + """Check that make_constraint dispatches to the appropriate constraint class""" constraint = make_constraint(constraint_declaration) assert constraint.__class__ is expected_constraint_class @@ -603,12 +632,6 @@ def f(param): f(True) f(np.bool_(False)) - # an int is also valid but deprecated - with pytest.warns( - FutureWarning, match="Passing an int for a boolean parameter is deprecated" - ): - f(1) - def test_no_validation(): """Check that validation can be skipped for a parameter."""