From 520c69feffa72b8989b1c6b14aef45d739bb99e5 Mon Sep 17 00:00:00 2001
From: Frits Hermans <post@fritshermans.nl>
Date: Fri, 31 Jan 2025 16:20:59 +0100
Subject: [PATCH 01/18] implement InstanceHardnessCV

---
 imblearn/cross_validation/__init__.py         |  3 +
 .../cross_validation/_cross_validation.py     | 68 +++++++++++++++++++
 imblearn/cross_validation/tests/__init__.py   |  0
 .../tests/test_instance_hardness.py           | 30 ++++++++
 4 files changed, 101 insertions(+)
 create mode 100644 imblearn/cross_validation/__init__.py
 create mode 100644 imblearn/cross_validation/_cross_validation.py
 create mode 100644 imblearn/cross_validation/tests/__init__.py
 create mode 100644 imblearn/cross_validation/tests/test_instance_hardness.py

diff --git a/imblearn/cross_validation/__init__.py b/imblearn/cross_validation/__init__.py
new file mode 100644
index 000000000..b6f646989
--- /dev/null
+++ b/imblearn/cross_validation/__init__.py
@@ -0,0 +1,3 @@
+from ._cross_validation import InstanceHardnessCV
+
+__all__ = ["InstanceHardnessCV"]
diff --git a/imblearn/cross_validation/_cross_validation.py b/imblearn/cross_validation/_cross_validation.py
new file mode 100644
index 000000000..b5ea76667
--- /dev/null
+++ b/imblearn/cross_validation/_cross_validation.py
@@ -0,0 +1,68 @@
+import numpy as np
+import pandas as pd
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import StratifiedGroupKFold, cross_val_predict
+
+
+class InstanceHardnessCV:
+    """Instance-hardness CV splitter
+
+    CV splitter that distributes samples with large instance hardness equally
+    over the folds
+
+    Parameters
+    ----------
+    n_splits : int, default=5
+        Number of folds. Must be at least 2.
+
+    clf : classifier, default=None
+        Classifier used to determine instance hardness. Defaults to
+        RandomForestClassifier when set to `None`
+
+    random_state : int, RandomState instance, default=None
+        Determines random_state for reproducible results across multiple calls.
+
+    Examples
+    --------
+    >>> from imblearn.cross_validation import InstanceHardnessCV
+    >>> from sklearn.datasets import make_classification
+    >>> from sklearn.model_selection import cross_validate
+    >>> from sklearn.linear_model import LogisticRegression
+    >>> X, y = make_classification(weights=[0.9, 0.1], class_sep=2,
+    ... n_informative=3, n_redundant=1, flip_y=0.05, n_samples=1000, random_state=10)
+    >>> ih_cv = InstanceHardnessCV(n_splits=5, random_state=10)
+    >>> clf = LogisticRegression(random_state=10)
+    >>> cv_result = cross_validate(clf, X, y, cv=ih_cv)
+    >>> print(f"Standard deviation of test_scores: {cv_result['test_score'].std():.3f}")
+    Standard deviation of test_scores: 0.005
+    """
+
+    def __init__(self, n_splits=5, clf=None, random_state=None):
+        self.n_splits = n_splits
+        self.clf = clf
+        self.random_state = random_state
+
+    def split(self, X, y, groups=None):
+        df = pd.DataFrame(X)
+        features = df.columns
+        df["y"] = y
+        if self.clf is not None:
+            self.clf_ = self.clf
+        else:
+            self.clf_ = RandomForestClassifier(
+                n_jobs=-1, class_weight="balanced", random_state=self.random_state
+            )
+        df["proba"] = cross_val_predict(
+            self.clf_, df[features], df["y"], cv=self.n_splits, method="predict_proba"
+        )[:, 1]
+        df["hardness"] = abs(df["y"] - df["proba"])
+        df = df.sort_values("hardness")
+        df["group"] = np.arange(len(df)) % self.n_splits
+        cv = StratifiedGroupKFold(
+            n_splits=self.n_splits, shuffle=True, random_state=self.random_state
+        )
+        for train_index, test_index in cv.split(df[features], df["y"], df["group"]):
+            yield train_index, test_index
+
+    def get_n_splits(self, X=None, y=None, groups=None):
+        return self.n_splits
diff --git a/imblearn/cross_validation/tests/__init__.py b/imblearn/cross_validation/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/imblearn/cross_validation/tests/test_instance_hardness.py b/imblearn/cross_validation/tests/test_instance_hardness.py
new file mode 100644
index 000000000..7e3b91fed
--- /dev/null
+++ b/imblearn/cross_validation/tests/test_instance_hardness.py
@@ -0,0 +1,30 @@
+import pytest
+from sklearn.datasets import make_classification
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import cross_validate
+from sklearn.utils._testing import assert_almost_equal
+
+from imblearn.cross_validation import InstanceHardnessCV
+
+X, y = make_classification(
+    weights=[0.9, 0.1],
+    class_sep=2,
+    n_informative=3,
+    n_redundant=1,
+    flip_y=0.05,
+    n_samples=1000,
+    random_state=10,
+)
+
+
+def test_instancehardness_cv():
+    ih_cv = InstanceHardnessCV()
+    clf = LogisticRegression(random_state=10)
+    cv_result = cross_validate(clf, X, y, cv=ih_cv)
+    assert_almost_equal(cv_result["test_score"].std(), 0.005, decimal=3)
+
+
+@pytest.mark.parametrize("n_splits", [2, 3, 4])
+def test_instancehardness_cv_n_splits(n_splits):
+    ih_cv = InstanceHardnessCV(n_splits=n_splits, random_state=10)
+    assert ih_cv.get_n_splits() == n_splits

From f54f0352a4f86c8a84e946abbf6e64dc382cfc20 Mon Sep 17 00:00:00 2001
From: fritshermans <post@fritshermans.nl>
Date: Sat, 1 Feb 2025 13:15:40 +0100
Subject: [PATCH 02/18] add documentation

---
 doc/references/index.rst                      |   1 +
 examples/cross_validation/README.txt          |   6 +
 .../plot_instance_hardness_cv.py              | 105 ++++++++++++++++++
 3 files changed, 112 insertions(+)
 create mode 100644 examples/cross_validation/README.txt
 create mode 100644 examples/cross_validation/plot_instance_hardness_cv.py

diff --git a/doc/references/index.rst b/doc/references/index.rst
index f5fe3bf53..be102052a 100644
--- a/doc/references/index.rst
+++ b/doc/references/index.rst
@@ -18,5 +18,6 @@ This is the full API documentation of the `imbalanced-learn` toolbox.
    miscellaneous
    pipeline
    metrics
+   cross_validation
    datasets
    utils
diff --git a/examples/cross_validation/README.txt b/examples/cross_validation/README.txt
new file mode 100644
index 000000000..ee39d15c3
--- /dev/null
+++ b/examples/cross_validation/README.txt
@@ -0,0 +1,6 @@
+.. _cross_validation_examples:
+
+Example using cross validation classes
+======================================
+
+Cross validation classes to be used for classification problems with imbalanced class distributions
diff --git a/examples/cross_validation/plot_instance_hardness_cv.py b/examples/cross_validation/plot_instance_hardness_cv.py
new file mode 100644
index 000000000..a48a39d6a
--- /dev/null
+++ b/examples/cross_validation/plot_instance_hardness_cv.py
@@ -0,0 +1,105 @@
+"""
+===================================================
+Distribute hard-to-classify datapoint over CV folds
+===================================================
+
+'Instance hardness' refers to the difficulty to classify an instance. The way
+hard-to-classify instances are distributed over train and test sets has
+significant effect on the test set performance metrics. In this example we
+show how to deal with this problem. We are making the comparison with normal
+StratifiedKFold cv splitting.
+"""
+
+# Authors: Frits Hermans, https://fritshermans.github.io
+# License: MIT
+
+# %%
+print(__doc__)
+
+# %% [markdown]
+# Create an imbalanced dataset with instance hardness
+# ---------------------------------------------------
+#
+# We will create an imbalanced dataset with using scikit-learn's `make_blobs`
+# function and the `make_imbalance` function. The imbalancedness is set to
+# 0.1; only 10% of the labels is positive.
+
+
+import numpy as np
+from matplotlib import pyplot as plt
+from sklearn.datasets import make_blobs
+
+from imblearn.datasets import make_imbalance
+
+X, y = make_blobs(n_samples=1000, centers=((-3, 0), (3, 0)), random_state=10)
+
+
+# %%
+def sampling_strategy(ratio):
+    def strategy(y):
+        return {0: sum(y), 1: int(ratio * sum(y) / (1 - ratio))}
+
+    return strategy
+
+
+X, y = make_imbalance(X, y, sampling_strategy=sampling_strategy(0.1), random_state=10)
+plt.scatter(X[:, 0], X[:, 1], c=y)
+plt.show()
+
+# %%
+# To introduce instance hardness in our dataset, we flip the labels at the
+# boundaries of the feature space
+y[np.argsort(X[:, 0])[:5]] = 1
+y[np.argsort(X[:, 0])[-5:]] = 0
+plt.scatter(X[:, 0], X[:, 1], c=y)
+plt.show()
+
+# %% [markdown]
+# Compare cross validation scores using StratifiedKFold and InstanceHardnessCV
+# ----------------------------------------------------------------------------
+#
+# We calculate cross validation scores using `cross_validate` and a
+# `LogisticRegression` classifier. We compare the results using a
+# `StratifiedKFold` cv splitter and an `InstanceHardnessCV` splitter.
+# As we are dealing with an imbalanced classification problem, we
+# use `average_precision` for scoring.
+
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import StratifiedKFold, cross_validate
+
+from imblearn.cross_validation import InstanceHardnessCV
+
+# %%
+clf = LogisticRegression()
+
+# %%
+skf_cv = StratifiedKFold(n_splits=5)
+skf_result = cross_validate(clf, X, y, cv=skf_cv, scoring="average_precision")
+
+# %%
+ih_cv = InstanceHardnessCV(n_splits=5, random_state=10)
+ih_result = cross_validate(clf, X, y, cv=ih_cv)
+
+# %%
+# The boxplot below shows that the `InstanceHardnessCV` splitter results
+# in less variation of average precision than `StratifiedKFold` splitter.
+# When doing hyperparameter tuning or feature selection using a wrapper
+# method (like `RFECV`) this will give more stable results.
+
+import pandas as pd
+
+ax = (
+    pd.concat(
+        (pd.DataFrame(skf_result), pd.DataFrame(ih_result)),
+        axis=1,
+        keys=["StratifiedKFold", "InstanceHardnessCV"],
+    )
+    .swaplevel(axis="columns")["test_score"]
+    .plot.box(
+        color={"whiskers": "black", "medians": "black", "caps": "black"}, vert=False
+    )
+)
+plt.xlabel("Average precision")
+_ = plt.title("Test score via cross-validation")
+plt.tight_layout()
+plt.show()

From 4b12063a21aa9d6c4473d26747a15cc6aa64ef36 Mon Sep 17 00:00:00 2001
From: fritshermans <post@fritshermans.nl>
Date: Sat, 1 Feb 2025 13:24:22 +0100
Subject: [PATCH 03/18] add cross_validation.rst

---
 doc/references/cross_validation.rst | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 doc/references/cross_validation.rst

diff --git a/doc/references/cross_validation.rst b/doc/references/cross_validation.rst
new file mode 100644
index 000000000..3e8889407
--- /dev/null
+++ b/doc/references/cross_validation.rst
@@ -0,0 +1,23 @@
+.. _under_sampling_ref:
+
+Cross validation methods
+======================
+
+.. automodule:: imblearn.cross_validation
+    :no-members:
+    :no-inherited-members:
+
+CV splitters
+--------------------
+
+.. automodule:: imblearn.cross_validation._cross_validation
+   :no-members:
+   :no-inherited-members:
+
+.. currentmodule:: imblearn.cross_validation
+
+.. autosummary::
+   :toctree: generated/
+   :template: class.rst
+
+   InstanceHardnessCV

From 0e98d1f94cfff17bce536c9724ea196ef143a061 Mon Sep 17 00:00:00 2001
From: fritshermans <post@fritshermans.nl>
Date: Sat, 1 Feb 2025 15:45:30 +0100
Subject: [PATCH 04/18] fix plot_instance_hardness_cv.py

---
 examples/cross_validation/plot_instance_hardness_cv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/cross_validation/plot_instance_hardness_cv.py b/examples/cross_validation/plot_instance_hardness_cv.py
index a48a39d6a..6c9c393ef 100644
--- a/examples/cross_validation/plot_instance_hardness_cv.py
+++ b/examples/cross_validation/plot_instance_hardness_cv.py
@@ -78,7 +78,7 @@ def strategy(y):
 
 # %%
 ih_cv = InstanceHardnessCV(n_splits=5, random_state=10)
-ih_result = cross_validate(clf, X, y, cv=ih_cv)
+ih_result = cross_validate(clf, X, y, cv=ih_cv, scoring="average_precision")
 
 # %%
 # The boxplot below shows that the `InstanceHardnessCV` splitter results

From 2ceea7f371b7cd2caaa4c94cd1781e6cdb3a63c7 Mon Sep 17 00:00:00 2001
From: fritshermans <post@fritshermans.nl>
Date: Sun, 2 Feb 2025 10:12:07 +0100
Subject: [PATCH 05/18] add initial documentation

---
 doc/cross_validation.rst                       | 18 ++++++++++++++++++
 doc/user_guide.rst                             |  1 +
 imblearn/cross_validation/_cross_validation.py |  2 ++
 3 files changed, 21 insertions(+)
 create mode 100644 doc/cross_validation.rst

diff --git a/doc/cross_validation.rst b/doc/cross_validation.rst
new file mode 100644
index 000000000..58c0300a6
--- /dev/null
+++ b/doc/cross_validation.rst
@@ -0,0 +1,18 @@
+.. _cross_validation:
+
+================
+Cross validation
+================
+
+.. currentmodule:: imblearn.cross_validation
+
+
+.. _instance_hardness_threshold:
+
+The term instance hardness is used in literature to express the difficulty to
+correctly classify an instance. An instance for which the predicted probability
+of the true class is low, has large instance hardness. The way these
+hard-to-classify instances are distributed over train and test sets in cross
+validation, has significant effect on the test set performance metrics. The
+`InstanceHardnessCV` splitter distributes samples with large instance hardness
+equally over the folds, resulting in more robust cross validation.
diff --git a/doc/user_guide.rst b/doc/user_guide.rst
index bfa8c00f9..5bb1be673 100644
--- a/doc/user_guide.rst
+++ b/doc/user_guide.rst
@@ -19,6 +19,7 @@ User Guide
    ensemble.rst
    miscellaneous.rst
    metrics.rst
+   cross_validation.rst
    common_pitfalls.rst
    Dataset loading utilities <datasets/index.rst>
    developers_utils.rst
diff --git a/imblearn/cross_validation/_cross_validation.py b/imblearn/cross_validation/_cross_validation.py
index b5ea76667..a4c4f8f5d 100644
--- a/imblearn/cross_validation/_cross_validation.py
+++ b/imblearn/cross_validation/_cross_validation.py
@@ -10,6 +10,8 @@ class InstanceHardnessCV:
     CV splitter that distributes samples with large instance hardness equally
     over the folds
 
+    Read more in the :ref:`User Guide <instance_hardness_threshold>`.
+
     Parameters
     ----------
     n_splits : int, default=5

From 7ef85ff0d4e687acc8c288dc5d9ef4b3a513ec79 Mon Sep 17 00:00:00 2001
From: fritshermans <post@fritshermans.nl>
Date: Sun, 2 Feb 2025 10:18:06 +0100
Subject: [PATCH 06/18] add docstrings

---
 .../cross_validation/_cross_validation.py     | 45 +++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/imblearn/cross_validation/_cross_validation.py b/imblearn/cross_validation/_cross_validation.py
index a4c4f8f5d..1dfc6a8a8 100644
--- a/imblearn/cross_validation/_cross_validation.py
+++ b/imblearn/cross_validation/_cross_validation.py
@@ -45,6 +45,31 @@ def __init__(self, n_splits=5, clf=None, random_state=None):
         self.random_state = random_state
 
     def split(self, X, y, groups=None):
+        """
+        Generate indices to split data into training and test set.
+
+        Parameters
+        ----------
+        X: array-like of shape (n_samples, n_features)
+            Training data, where n_samples is the number of samples and
+            n_features is the number of features.
+
+        y: array-like of shape (n_samples,)
+            The target variable.
+
+        groups: object
+            Always ignored, exists for compatibility.
+
+        Yields
+        ------
+
+        train: ndarray
+            The training set indices for that split.
+
+        test: ndarray
+            The testing set indices for that split.
+
+        """
         df = pd.DataFrame(X)
         features = df.columns
         df["y"] = y
@@ -67,4 +92,24 @@ def split(self, X, y, groups=None):
             yield train_index, test_index
 
     def get_n_splits(self, X=None, y=None, groups=None):
+        """
+        Returns the number of splitting iterations in the cross-validator.
+
+        Parameters
+        ----------
+        X: object
+            Always ignored, exists for compatibility.
+
+        y: object
+            Always ignored, exists for compatibility.
+
+        groups: object
+            Always ignored, exists for compatibility.
+
+        Returns
+        -------
+        n_splits: int
+            Returns the number of splitting iterations in the cross-validator.
+
+        """
         return self.n_splits

From cc611e221649965c680c3eaf1b4d1dcfffbc1314 Mon Sep 17 00:00:00 2001
From: fritshermans <post@fritshermans.nl>
Date: Wed, 26 Mar 2025 08:58:08 +0100
Subject: [PATCH 07/18] fix random seed in unit test

---
 imblearn/cross_validation/tests/test_instance_hardness.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/imblearn/cross_validation/tests/test_instance_hardness.py b/imblearn/cross_validation/tests/test_instance_hardness.py
index 7e3b91fed..a53e7d7f2 100644
--- a/imblearn/cross_validation/tests/test_instance_hardness.py
+++ b/imblearn/cross_validation/tests/test_instance_hardness.py
@@ -1,8 +1,9 @@
 import pytest
+
 from sklearn.datasets import make_classification
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import cross_validate
-from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._testing import assert_array_equal
 
 from imblearn.cross_validation import InstanceHardnessCV
 
@@ -18,10 +19,10 @@
 
 
 def test_instancehardness_cv():
-    ih_cv = InstanceHardnessCV()
+    ih_cv = InstanceHardnessCV(random_state=10)
     clf = LogisticRegression(random_state=10)
     cv_result = cross_validate(clf, X, y, cv=ih_cv)
-    assert_almost_equal(cv_result["test_score"].std(), 0.005, decimal=3)
+    assert_array_equal(cv_result['test_score'], [0.965, 0.965, 0.96, 0.965, 0.955])
 
 
 @pytest.mark.parametrize("n_splits", [2, 3, 4])

From f0c03bb742c0ae974645addcb8d327ba914383e9 Mon Sep 17 00:00:00 2001
From: fritshermans <post@fritshermans.nl>
Date: Wed, 26 Mar 2025 09:06:54 +0100
Subject: [PATCH 08/18] refactor the way groups are assigned by instance
 hardness in InstanceHardnessCV

---
 .../cross_validation/_cross_validation.py     | 24 ++++++++-----------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/imblearn/cross_validation/_cross_validation.py b/imblearn/cross_validation/_cross_validation.py
index 1dfc6a8a8..a5e930676 100644
--- a/imblearn/cross_validation/_cross_validation.py
+++ b/imblearn/cross_validation/_cross_validation.py
@@ -1,7 +1,6 @@
 import numpy as np
-import pandas as pd
 from sklearn.ensemble import RandomForestClassifier
-from sklearn.model_selection import StratifiedGroupKFold, cross_val_predict
+from sklearn.model_selection import LeaveOneGroupOut, cross_val_predict
 
 
 class InstanceHardnessCV:
@@ -70,25 +69,22 @@ def split(self, X, y, groups=None):
             The testing set indices for that split.
 
         """
-        df = pd.DataFrame(X)
-        features = df.columns
-        df["y"] = y
         if self.clf is not None:
             self.clf_ = self.clf
         else:
             self.clf_ = RandomForestClassifier(
                 n_jobs=-1, class_weight="balanced", random_state=self.random_state
             )
-        df["proba"] = cross_val_predict(
-            self.clf_, df[features], df["y"], cv=self.n_splits, method="predict_proba"
-        )[:, 1]
-        df["hardness"] = abs(df["y"] - df["proba"])
-        df = df.sort_values("hardness")
-        df["group"] = np.arange(len(df)) % self.n_splits
-        cv = StratifiedGroupKFold(
-            n_splits=self.n_splits, shuffle=True, random_state=self.random_state
+        probas = cross_val_predict(
+            self.clf_, X, y, cv=self.n_splits, method="predict_proba"
         )
-        for train_index, test_index in cv.split(df[features], df["y"], df["group"]):
+        # by sorting first on y then on proba rows are ordered by instance hardness
+        # within the group having the same label
+        sorted_indices = np.lexsort((probas[:, 1], y))
+        groups = np.zeros(len(X), dtype=int)
+        groups[sorted_indices] = np.arange(len(X)) % self.n_splits
+        cv = LeaveOneGroupOut()
+        for train_index, test_index in cv.split(X, y, groups):
             yield train_index, test_index
 
     def get_n_splits(self, X=None, y=None, groups=None):

From 018df65a6c329c0cc42d79366bae2f1b8e5880dd Mon Sep 17 00:00:00 2001
From: fritshermans <post@fritshermans.nl>
Date: Wed, 26 Mar 2025 09:13:28 +0100
Subject: [PATCH 09/18] simplify plotting code in plot_instance_hardness_cv.py

---
 .../plot_instance_hardness_cv.py               | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/examples/cross_validation/plot_instance_hardness_cv.py b/examples/cross_validation/plot_instance_hardness_cv.py
index 6c9c393ef..dcd0144c0 100644
--- a/examples/cross_validation/plot_instance_hardness_cv.py
+++ b/examples/cross_validation/plot_instance_hardness_cv.py
@@ -86,20 +86,8 @@ def strategy(y):
 # When doing hyperparameter tuning or feature selection using a wrapper
 # method (like `RFECV`) this will give more stable results.
 
-import pandas as pd
-
-ax = (
-    pd.concat(
-        (pd.DataFrame(skf_result), pd.DataFrame(ih_result)),
-        axis=1,
-        keys=["StratifiedKFold", "InstanceHardnessCV"],
-    )
-    .swaplevel(axis="columns")["test_score"]
-    .plot.box(
-        color={"whiskers": "black", "medians": "black", "caps": "black"}, vert=False
-    )
-)
-plt.xlabel("Average precision")
-_ = plt.title("Test score via cross-validation")
+# %%
+plt.boxplot([skf_result['test_score'], ih_result['test_score']],
+            labels=["StratifiedKFold", "InstanceHardnessCV"], vert=False)
 plt.tight_layout()
 plt.show()

From 2fdca6f202b9a09da70557231c3b1830da934f9b Mon Sep 17 00:00:00 2001
From: fritshermans <post@fritshermans.nl>
Date: Wed, 26 Mar 2025 09:18:45 +0100
Subject: [PATCH 10/18] update docstring

---
 imblearn/cross_validation/_cross_validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/imblearn/cross_validation/_cross_validation.py b/imblearn/cross_validation/_cross_validation.py
index a5e930676..db60f0b05 100644
--- a/imblearn/cross_validation/_cross_validation.py
+++ b/imblearn/cross_validation/_cross_validation.py
@@ -35,7 +35,7 @@ class InstanceHardnessCV:
     >>> clf = LogisticRegression(random_state=10)
     >>> cv_result = cross_validate(clf, X, y, cv=ih_cv)
     >>> print(f"Standard deviation of test_scores: {cv_result['test_score'].std():.3f}")
-    Standard deviation of test_scores: 0.005
+    Standard deviation of test_scores: 0.004
     """
 
     def __init__(self, n_splits=5, clf=None, random_state=None):

From 0ce2eb3bc4705a6439a21d213ed1a3c623fc465a Mon Sep 17 00:00:00 2001
From: fritshermans <post@fritshermans.nl>
Date: Wed, 26 Mar 2025 09:21:09 +0100
Subject: [PATCH 11/18] update 'labels' to 'tick_labels' in boxplot code

---
 examples/cross_validation/plot_instance_hardness_cv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/cross_validation/plot_instance_hardness_cv.py b/examples/cross_validation/plot_instance_hardness_cv.py
index dcd0144c0..3fa2c1dc6 100644
--- a/examples/cross_validation/plot_instance_hardness_cv.py
+++ b/examples/cross_validation/plot_instance_hardness_cv.py
@@ -88,6 +88,6 @@ def strategy(y):
 
 # %%
 plt.boxplot([skf_result['test_score'], ih_result['test_score']],
-            labels=["StratifiedKFold", "InstanceHardnessCV"], vert=False)
+            tick_labels=["StratifiedKFold", "InstanceHardnessCV"], vert=False)
 plt.tight_layout()
 plt.show()

From a394cf2c4bdccf2e1d1fbef63cf05682a6f0939d Mon Sep 17 00:00:00 2001
From: fritshermans <post@fritshermans.nl>
Date: Wed, 26 Mar 2025 09:50:52 +0100
Subject: [PATCH 12/18] rename clf to estimator

---
 imblearn/cross_validation/_cross_validation.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/imblearn/cross_validation/_cross_validation.py b/imblearn/cross_validation/_cross_validation.py
index db60f0b05..f4c2b24d1 100644
--- a/imblearn/cross_validation/_cross_validation.py
+++ b/imblearn/cross_validation/_cross_validation.py
@@ -16,7 +16,7 @@ class InstanceHardnessCV:
     n_splits : int, default=5
         Number of folds. Must be at least 2.
 
-    clf : classifier, default=None
+    estimator : classifier, default=None
         Classifier used to determine instance hardness. Defaults to
         RandomForestClassifier when set to `None`
 
@@ -32,15 +32,15 @@ class InstanceHardnessCV:
     >>> X, y = make_classification(weights=[0.9, 0.1], class_sep=2,
     ... n_informative=3, n_redundant=1, flip_y=0.05, n_samples=1000, random_state=10)
     >>> ih_cv = InstanceHardnessCV(n_splits=5, random_state=10)
-    >>> clf = LogisticRegression(random_state=10)
-    >>> cv_result = cross_validate(clf, X, y, cv=ih_cv)
+    >>> estimator = LogisticRegression(random_state=10)
+    >>> cv_result = cross_validate(estimator, X, y, cv=ih_cv)
     >>> print(f"Standard deviation of test_scores: {cv_result['test_score'].std():.3f}")
     Standard deviation of test_scores: 0.004
     """
 
-    def __init__(self, n_splits=5, clf=None, random_state=None):
+    def __init__(self, n_splits=5, estimator=None, random_state=None):
         self.n_splits = n_splits
-        self.clf = clf
+        self.estimator = estimator
         self.random_state = random_state
 
     def split(self, X, y, groups=None):
@@ -69,14 +69,14 @@ def split(self, X, y, groups=None):
             The testing set indices for that split.
 
         """
-        if self.clf is not None:
-            self.clf_ = self.clf
+        if self.estimator is not None:
+            self.estimator_ = self.estimator
         else:
-            self.clf_ = RandomForestClassifier(
+            self.estimator_ = RandomForestClassifier(
                 n_jobs=-1, class_weight="balanced", random_state=self.random_state
             )
         probas = cross_val_predict(
-            self.clf_, X, y, cv=self.n_splits, method="predict_proba"
+            self.estimator_, X, y, cv=self.n_splits, method="predict_proba"
         )
         # by sorting first on y then on proba rows are ordered by instance hardness
         # within the group having the same label

From d06c5802b38326674d01becc8f1dd211b3f4bd6e Mon Sep 17 00:00:00 2001
From: fritshermans <post@fritshermans.nl>
Date: Wed, 26 Mar 2025 10:38:53 +0100
Subject: [PATCH 13/18] change data generation in plot_instance_hardness_cv.py

---
 .../plot_instance_hardness_cv.py              | 34 ++++++-------------
 1 file changed, 11 insertions(+), 23 deletions(-)

diff --git a/examples/cross_validation/plot_instance_hardness_cv.py b/examples/cross_validation/plot_instance_hardness_cv.py
index 3fa2c1dc6..1192052ad 100644
--- a/examples/cross_validation/plot_instance_hardness_cv.py
+++ b/examples/cross_validation/plot_instance_hardness_cv.py
@@ -21,36 +21,24 @@
 # ---------------------------------------------------
 #
 # We will create an imbalanced dataset with using scikit-learn's `make_blobs`
-# function and the `make_imbalance` function. The imbalancedness is set to
-# 0.1; only 10% of the labels is positive.
+# function and set the imbalancedness to 5%; only 5% of the labels is positive.
 
 
 import numpy as np
 from matplotlib import pyplot as plt
 from sklearn.datasets import make_blobs
 
-from imblearn.datasets import make_imbalance
-
-X, y = make_blobs(n_samples=1000, centers=((-3, 0), (3, 0)), random_state=10)
-
-
-# %%
-def sampling_strategy(ratio):
-    def strategy(y):
-        return {0: sum(y), 1: int(ratio * sum(y) / (1 - ratio))}
-
-    return strategy
-
-
-X, y = make_imbalance(X, y, sampling_strategy=sampling_strategy(0.1), random_state=10)
+X, y = make_blobs(n_samples=[950,50], centers=((-3, 0), (3, 0)), random_state=10)
 plt.scatter(X[:, 0], X[:, 1], c=y)
 plt.show()
 
 # %%
-# To introduce instance hardness in our dataset, we flip the labels at the
-# boundaries of the feature space
-y[np.argsort(X[:, 0])[:5]] = 1
-y[np.argsort(X[:, 0])[-5:]] = 0
+# To introduce instance hardness in our dataset, we add some hard to classify samples:
+X_hard, y_hard = make_blobs(n_samples=10, centers=((3, 0), (-3, 0)),
+                            cluster_std=1,
+                            random_state=10)
+X = np.vstack((X, X_hard))
+y = np.hstack((y, y_hard))
 plt.scatter(X[:, 0], X[:, 1], c=y)
 plt.show()
 
@@ -70,14 +58,14 @@ def strategy(y):
 from imblearn.cross_validation import InstanceHardnessCV
 
 # %%
-clf = LogisticRegression()
+clf = LogisticRegression(random_state=10)
 
 # %%
-skf_cv = StratifiedKFold(n_splits=5)
+skf_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=10)
 skf_result = cross_validate(clf, X, y, cv=skf_cv, scoring="average_precision")
 
 # %%
-ih_cv = InstanceHardnessCV(n_splits=5, random_state=10)
+ih_cv = InstanceHardnessCV(n_splits=5, estimator=clf, random_state=10)
 ih_result = cross_validate(clf, X, y, cv=ih_cv, scoring="average_precision")
 
 # %%

From 38509ddef88c94ba5ff32965b09a764952b1007c Mon Sep 17 00:00:00 2001
From: fritshermans <post@fritshermans.nl>
Date: Wed, 26 Mar 2025 10:43:48 +0100
Subject: [PATCH 14/18] describe InstanceHardnessCV in User Guide

---
 doc/cross_validation.rst | 67 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/doc/cross_validation.rst b/doc/cross_validation.rst
index 58c0300a6..e9692e328 100644
--- a/doc/cross_validation.rst
+++ b/doc/cross_validation.rst
@@ -16,3 +16,70 @@ hard-to-classify instances are distributed over train and test sets in cross
 validation, has significant effect on the test set performance metrics. The
 `InstanceHardnessCV` splitter distributes samples with large instance hardness
 equally over the folds, resulting in more robust cross validation.
+
+We will discuss instance hardness in this document and explain how to use the
+`InstanceHardnessCV` splitter.
+
+Instance hardness and average precision
+=======================================
+
+Let’s start by creating a dataset to work with. We create a dataset with 5% class
+imbalance using scikit-learn’s `make_blobs` function.
+
+  >>> import numpy as np
+  >>> from matplotlib import pyplot as plt
+  >>> from sklearn.datasets import make_blobs
+  >>> from imblearn.datasets import make_imbalance
+  >>> random_state = 10
+  >>> X, y = make_blobs(n_samples=[950, 50], centers=((-3, 0), (3, 0)),
+  ...                   random_state=random_state)
+  >>> plt.scatter(X[:, 0], X[:, 1], c=y)
+  >>> plt.show()
+
+.. image:: ./auto_examples/cross_validation/images/sphx_glr_plot_instance_hardness_cv_001.png
+   :target: ./auto_examples/cross_validation/plot_instance_hardness_cv.html
+   :align: center
+
+Now we add some samples with large instance hardness
+
+  >>> X_hard, y_hard = make_blobs(n_samples=10, centers=((3, 0), (-3, 0)),
+  ...                             cluster_std=1,
+  ...                             random_state=random_state)
+  >>> X = np.vstack((X, X_hard))
+  >>> y = np.hstack((y, y_hard))
+  >>> plt.scatter(X[:, 0], X[:, 1], c=y)
+  >>> plt.show()
+
+.. image:: ./auto_examples/cross_validation/images/sphx_glr_plot_instance_hardness_cv_002.png
+   :target: ./auto_examples/cross_validation/plot_instance_hardness_cv.html
+   :align: center
+
+Then we take a `LogisticRegressionClassifier` and assess the cross validation
+performance using a `StratifiedKFold` cv splitter and the `cross_validate`
+function.
+
+  >>> from sklearn.ensemble import LogisticRegressionClassifier
+  >>> clf = LogisticRegressionClassifier(random_state=random_state)
+  >>> skf_cv = StratifiedKFold(n_splits=5, shuffle=True,
+  ...                           random_state=random_state)
+  >>> skf_result = cross_validate(clf, X, y, cv=skf_cv, scoring="average_precision")
+
+Now, we do the same using an `InstanceHardnessCV` splitter. We use provide our
+classifier to the splitter to calculate instance hardness and distribute samples
+with large instance hardness equally over the folds.
+
+  >>> ih_cv = InstanceHardnessCV(n_splits=5, estimator=clf,
+  ...                               random_state=random_state)
+  >>> ih_result = cross_validate(clf, X, y, cv=ih_cv, scoring="average_precision")
+
+When we plot the test scores for both cv splitters, we see that the variance using
+the `InstanceHardnessCV` splitter is lower than for the `StratifiedKFold` splitter.
+
+  >>> plt.boxplot([skf_result['test_score'], ih_result['test_score']],
+  ...               tick_labels=["StratifiedKFold", "InstanceHardnessCV"],
+  ...               vert=False)
+  >>> plt.tight_layout()
+
+.. image:: ./auto_examples/cross_validation/images/sphx_glr_plot_instance_hardness_cv_003.png
+   :target: ./auto_examples/cross_validation/plot_instance_hardness_cv.html
+   :align: center
\ No newline at end of file

From aded9e991752de07bcf99f18d75ac99745c37beb Mon Sep 17 00:00:00 2001
From: fritshermans <post@fritshermans.nl>
Date: Wed, 26 Mar 2025 13:22:33 +0100
Subject: [PATCH 15/18] add x label to boxplot

---
 examples/cross_validation/plot_instance_hardness_cv.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/cross_validation/plot_instance_hardness_cv.py b/examples/cross_validation/plot_instance_hardness_cv.py
index 1192052ad..8b4858439 100644
--- a/examples/cross_validation/plot_instance_hardness_cv.py
+++ b/examples/cross_validation/plot_instance_hardness_cv.py
@@ -77,5 +77,6 @@
 # %%
 plt.boxplot([skf_result['test_score'], ih_result['test_score']],
             tick_labels=["StratifiedKFold", "InstanceHardnessCV"], vert=False)
+plt.xlabel('Average precision')
 plt.tight_layout()
 plt.show()

From 4647a2b9b34c1426050dd077ec94bf15ef5c68f2 Mon Sep 17 00:00:00 2001
From: fritshermans <post@fritshermans.nl>
Date: Sat, 29 Mar 2025 17:17:36 +0100
Subject: [PATCH 16/18] fix typo

---
 examples/cross_validation/plot_instance_hardness_cv.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/cross_validation/plot_instance_hardness_cv.py b/examples/cross_validation/plot_instance_hardness_cv.py
index 8b4858439..cd708bb01 100644
--- a/examples/cross_validation/plot_instance_hardness_cv.py
+++ b/examples/cross_validation/plot_instance_hardness_cv.py
@@ -1,7 +1,7 @@
 """
-===================================================
-Distribute hard-to-classify datapoint over CV folds
-===================================================
+====================================================
+Distribute hard-to-classify datapoints over CV folds
+====================================================
 
 'Instance hardness' refers to the difficulty to classify an instance. The way
 hard-to-classify instances are distributed over train and test sets has

From 636dc5b3cc2691bc75e24bac0a011a94f2fea083 Mon Sep 17 00:00:00 2001
From: fritshermans <post@fritshermans.nl>
Date: Sat, 29 Mar 2025 17:17:57 +0100
Subject: [PATCH 17/18] explain instance hardness in user guide

---
 doc/cross_validation.rst | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/doc/cross_validation.rst b/doc/cross_validation.rst
index e9692e328..0c51e3e9b 100644
--- a/doc/cross_validation.rst
+++ b/doc/cross_validation.rst
@@ -22,6 +22,36 @@ We will discuss instance hardness in this document and explain how to use the
 
 Instance hardness and average precision
 =======================================
+Instance hardness is defined as 1 minus the probability of the most probable class:
+
+.. math::
+
+   H(x) = 1 - P(\hat{y}|x)
+
+In this equation :math:`H(x)` is the instance hardness for a sample with features
+:math:`x` and :math:`P(\hat{y}|x)` the probability of predicted label :math:`\hat{y}`
+given the features. If the model predicts label 0 and gives a `predict_proba` output
+of [0.9, 0.1], the probability of the most probable class (0) is 0.9 and the
+instance hardness is 1-0.9=0.1.
+
+Samples with large instance hardness have significant effect on the area under
+precision-recall curve, or average precision. Especially samples with label 0
+with large instance hardness (so the model predicts label 1) reduce the average
+precision a lot as these points affect the precision-recall curve in the left
+where the area is largest; the precision is lowered in the range of low recall
+and high thresholds. When doing cross validation, e.g. in case of hyperparameter
+tuning or recursive feature elimination, random gathering of these points in
+some folds introduce variance in CV results that deteriorates robustness of the
+cross validation task. The `InstanceHardnessCV`
+splitter aims to distribute the samples with large instance hardness over the
+folds in order to reduce undesired variance. Note that one should use this
+splitter to make model *selection* tasks robust like hyperparameter tuning and
+feature selection but not for model *performance estimation* for which you also
+want to know the variance of performance to be expected in production.
+
+
+Create imbalanced dataset with samples with large instance hardness
+===================================================================
 
 Let’s start by creating a dataset to work with. We create a dataset with 5% class
 imbalance using scikit-learn’s `make_blobs` function.
@@ -54,6 +84,9 @@ Now we add some samples with large instance hardness
    :target: ./auto_examples/cross_validation/plot_instance_hardness_cv.html
    :align: center
 
+Assess cross validation performance variance using InstanceHardnessCV splitter
+==============================================================================
+
 Then we take a `LogisticRegressionClassifier` and assess the cross validation
 performance using a `StratifiedKFold` cv splitter and the `cross_validate`
 function.
@@ -78,6 +111,7 @@ the `InstanceHardnessCV` splitter is lower than for the `StratifiedKFold` splitt
   >>> plt.boxplot([skf_result['test_score'], ih_result['test_score']],
   ...               tick_labels=["StratifiedKFold", "InstanceHardnessCV"],
   ...               vert=False)
+  >>> plt.xlabel('Average precision')
   >>> plt.tight_layout()
 
 .. image:: ./auto_examples/cross_validation/images/sphx_glr_plot_instance_hardness_cv_003.png

From 1c642cebf04e34e795a9d35804da18598c9f809d Mon Sep 17 00:00:00 2001
From: fritshermans <post@fritshermans.nl>
Date: Sat, 29 Mar 2025 17:27:43 +0100
Subject: [PATCH 18/18] remove default random forest as estimator for
 InstanceHardnessCV

---
 doc/cross_validation.rst                             |  2 +-
 .../cross_validation/plot_instance_hardness_cv.py    |  2 +-
 imblearn/cross_validation/_cross_validation.py       | 12 ++++++------
 .../cross_validation/tests/test_instance_hardness.py |  7 ++++---
 4 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/doc/cross_validation.rst b/doc/cross_validation.rst
index 0c51e3e9b..7759a91b8 100644
--- a/doc/cross_validation.rst
+++ b/doc/cross_validation.rst
@@ -101,7 +101,7 @@ Now, we do the same using an `InstanceHardnessCV` splitter. We use provide our
 classifier to the splitter to calculate instance hardness and distribute samples
 with large instance hardness equally over the folds.
 
-  >>> ih_cv = InstanceHardnessCV(n_splits=5, estimator=clf,
+  >>> ih_cv = InstanceHardnessCV(estimator=clf, n_splits=5,
   ...                               random_state=random_state)
   >>> ih_result = cross_validate(clf, X, y, cv=ih_cv, scoring="average_precision")
 
diff --git a/examples/cross_validation/plot_instance_hardness_cv.py b/examples/cross_validation/plot_instance_hardness_cv.py
index cd708bb01..5e7a2202c 100644
--- a/examples/cross_validation/plot_instance_hardness_cv.py
+++ b/examples/cross_validation/plot_instance_hardness_cv.py
@@ -65,7 +65,7 @@
 skf_result = cross_validate(clf, X, y, cv=skf_cv, scoring="average_precision")
 
 # %%
-ih_cv = InstanceHardnessCV(n_splits=5, estimator=clf, random_state=10)
+ih_cv = InstanceHardnessCV(estimator=clf, n_splits=5, random_state=10)
 ih_result = cross_validate(clf, X, y, cv=ih_cv, scoring="average_precision")
 
 # %%
diff --git a/imblearn/cross_validation/_cross_validation.py b/imblearn/cross_validation/_cross_validation.py
index f4c2b24d1..c8808ebc6 100644
--- a/imblearn/cross_validation/_cross_validation.py
+++ b/imblearn/cross_validation/_cross_validation.py
@@ -13,13 +13,13 @@ class InstanceHardnessCV:
 
     Parameters
     ----------
+    estimator : estimator object
+        Classifier to be used to estimate instance hardness of the samples.
+        This classifier should implement `predict_proba`.
+
     n_splits : int, default=5
         Number of folds. Must be at least 2.
 
-    estimator : classifier, default=None
-        Classifier used to determine instance hardness. Defaults to
-        RandomForestClassifier when set to `None`
-
     random_state : int, RandomState instance, default=None
         Determines random_state for reproducible results across multiple calls.
 
@@ -31,14 +31,14 @@ class InstanceHardnessCV:
     >>> from sklearn.linear_model import LogisticRegression
     >>> X, y = make_classification(weights=[0.9, 0.1], class_sep=2,
     ... n_informative=3, n_redundant=1, flip_y=0.05, n_samples=1000, random_state=10)
-    >>> ih_cv = InstanceHardnessCV(n_splits=5, random_state=10)
     >>> estimator = LogisticRegression(random_state=10)
+    >>> ih_cv = InstanceHardnessCV(estimator=estimator, n_splits=5,random_state=10)
     >>> cv_result = cross_validate(estimator, X, y, cv=ih_cv)
     >>> print(f"Standard deviation of test_scores: {cv_result['test_score'].std():.3f}")
     Standard deviation of test_scores: 0.004
     """
 
-    def __init__(self, n_splits=5, estimator=None, random_state=None):
+    def __init__(self, estimator, n_splits=5, random_state=None):
         self.n_splits = n_splits
         self.estimator = estimator
         self.random_state = random_state
diff --git a/imblearn/cross_validation/tests/test_instance_hardness.py b/imblearn/cross_validation/tests/test_instance_hardness.py
index a53e7d7f2..096c9259b 100644
--- a/imblearn/cross_validation/tests/test_instance_hardness.py
+++ b/imblearn/cross_validation/tests/test_instance_hardness.py
@@ -19,13 +19,14 @@
 
 
 def test_instancehardness_cv():
-    ih_cv = InstanceHardnessCV(random_state=10)
     clf = LogisticRegression(random_state=10)
+    ih_cv = InstanceHardnessCV(estimator=clf, random_state=10)
     cv_result = cross_validate(clf, X, y, cv=ih_cv)
-    assert_array_equal(cv_result['test_score'], [0.965, 0.965, 0.96, 0.965, 0.955])
+    assert_array_equal(cv_result['test_score'], [0.975, 0.965, 0.96,  0.955, 0.965])
 
 
 @pytest.mark.parametrize("n_splits", [2, 3, 4])
 def test_instancehardness_cv_n_splits(n_splits):
-    ih_cv = InstanceHardnessCV(n_splits=n_splits, random_state=10)
+    clf = LogisticRegression(random_state=10)
+    ih_cv = InstanceHardnessCV(estimator=clf, n_splits=n_splits, random_state=10)
     assert ih_cv.get_n_splits() == n_splits