diff --git a/doc/whats_new/v0.10.rst b/doc/whats_new/v0.10.rst index ca82c0c0d..708c3657f 100644 --- a/doc/whats_new/v0.10.rst +++ b/doc/whats_new/v0.10.rst @@ -1,5 +1,20 @@ .. _changes_0_10: +Version 0.10.1 +============== + +**December 28, 2022** + +Changelog +--------- + +Bug fixes +......... + +- Fix a regression in over-sampler where the string `minority` was rejected as + an unvalid sampling strategy. + :pr:`964` by :user:`Prakhyath Bhandary `. + Version 0.10.0 ============== diff --git a/imblearn/over_sampling/base.py b/imblearn/over_sampling/base.py index d4e4a4541..fbd982bf2 100644 --- a/imblearn/over_sampling/base.py +++ b/imblearn/over_sampling/base.py @@ -61,7 +61,7 @@ class BaseOverSampler(BaseSampler): _parameter_constraints: dict = { "sampling_strategy": [ Interval(numbers.Real, 0, 1, closed="right"), - StrOptions({"auto", "majority", "not minority", "not majority", "all"}), + StrOptions({"auto", "minority", "not minority", "not majority", "all"}), Mapping, callable, ], diff --git a/imblearn/over_sampling/tests/test_random_over_sampler.py b/imblearn/over_sampling/tests/test_random_over_sampler.py index 2db808f5b..b72132d19 100644 --- a/imblearn/over_sampling/tests/test_random_over_sampler.py +++ b/imblearn/over_sampling/tests/test_random_over_sampler.py @@ -7,6 +7,7 @@ import numpy as np import pytest +from sklearn.datasets import make_classification from sklearn.utils._testing import ( _convert_container, assert_allclose, @@ -255,3 +256,20 @@ def test_random_over_sampler_shrinkage_error(data, shrinkage, err_msg): ros = RandomOverSampler(shrinkage=shrinkage) with pytest.raises(ValueError, match=err_msg): ros.fit_resample(X, y) + + +@pytest.mark.parametrize( + "sampling_strategy", ["auto", "minority", "not minority", "not majority", "all"] +) +def test_random_over_sampler_strings(sampling_strategy): + """Check that we support all supposed strings as `sampling_strategy` in + a sampler inheriting from `BaseOverSampler`.""" + + X, y = make_classification( + n_samples=100, + n_clusters_per_class=1, + n_classes=3, + weights=[0.1, 0.3, 0.6], + random_state=0, + ) + RandomOverSampler(sampling_strategy=sampling_strategy).fit_resample(X, y) diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_random_under_sampler.py b/imblearn/under_sampling/_prototype_selection/tests/test_random_under_sampler.py index 2e845e83a..bcb8682e2 100644 --- a/imblearn/under_sampling/_prototype_selection/tests/test_random_under_sampler.py +++ b/imblearn/under_sampling/_prototype_selection/tests/test_random_under_sampler.py @@ -7,6 +7,7 @@ import numpy as np import pytest +from sklearn.datasets import make_classification from sklearn.utils._testing import assert_array_equal from imblearn.under_sampling import RandomUnderSampler @@ -130,3 +131,20 @@ def test_random_under_sampling_nan_inf(): assert y_res.shape == (6,) assert X_res.shape == (6, 2) assert np.any(~np.isfinite(X_res)) + + +@pytest.mark.parametrize( + "sampling_strategy", ["auto", "majority", "not minority", "not majority", "all"] +) +def test_random_under_sampler_strings(sampling_strategy): + """Check that we support all supposed strings as `sampling_strategy` in + a sampler inheriting from `BaseUnderSampler`.""" + + X, y = make_classification( + n_samples=100, + n_clusters_per_class=1, + n_classes=3, + weights=[0.1, 0.3, 0.6], + random_state=0, + ) + RandomUnderSampler(sampling_strategy=sampling_strategy).fit_resample(X, y) diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_tomek_links.py b/imblearn/under_sampling/_prototype_selection/tests/test_tomek_links.py index cd169393c..5fd837866 100644 --- a/imblearn/under_sampling/_prototype_selection/tests/test_tomek_links.py +++ b/imblearn/under_sampling/_prototype_selection/tests/test_tomek_links.py @@ -4,6 +4,8 @@ # License: MIT import numpy as np +import pytest +from sklearn.datasets import make_classification from sklearn.utils._testing import assert_array_equal from imblearn.under_sampling import TomekLinks @@ -68,3 +70,20 @@ def test_tl_fit_resample(): y_gt = np.array([1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0]) assert_array_equal(X_resampled, X_gt) assert_array_equal(y_resampled, y_gt) + + +@pytest.mark.parametrize( + "sampling_strategy", ["auto", "majority", "not minority", "not majority", "all"] +) +def test_tomek_links_strings(sampling_strategy): + """Check that we support all supposed strings as `sampling_strategy` in + a sampler inheriting from `BaseCleaningSampler`.""" + + X, y = make_classification( + n_samples=100, + n_clusters_per_class=1, + n_classes=3, + weights=[0.1, 0.3, 0.6], + random_state=0, + ) + TomekLinks(sampling_strategy=sampling_strategy).fit_resample(X, y)