From c30791bfb3512f6bf0c8dbcfda85c9a700464890 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Mon, 20 Sep 2021 11:16:05 +0200
Subject: [PATCH 001/106] API: remove custom blank line at end of simulation
 output

---
 src/facet/simulation/viz/_style.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/facet/simulation/viz/_style.py b/src/facet/simulation/viz/_style.py
index d1f92912..fc4af71a 100644
--- a/src/facet/simulation/viz/_style.py
+++ b/src/facet/simulation/viz/_style.py
@@ -373,12 +373,6 @@ def draw_histogram(
             )
         )
 
-    def finalize_drawing(self, **kwargs: Any) -> None:
-        """[see superclass]"""
-        super().finalize_drawing(**kwargs)
-        # print two trailing line breaks
-        self.out.write("\n")
-
     @staticmethod
     def _partition_format(is_categorical: bool) -> str:
         if is_categorical:

From 8529981137f3543ba42e03b6a8fad46dfdb365ec Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Mon, 20 Sep 2021 11:50:45 +0200
Subject: [PATCH 002/106] API: return LearnerInspector matrix outputs as Matrix
 instances

---
 src/facet/inspection/_inspection.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/facet/inspection/_inspection.py b/src/facet/inspection/_inspection.py
index d80f67d1..4fe9f99c 100644
--- a/src/facet/inspection/_inspection.py
+++ b/src/facet/inspection/_inspection.py
@@ -11,6 +11,7 @@
 from scipy.spatial.distance import squareform
 
 from pytools.api import AllTracker, inheritdoc
+from pytools.data import Matrix
 from pytools.fit import FittableMixin
 from pytools.parallelization import ParallelizableMixin
 from pytools.viz.dendrogram import LinkageTree
@@ -491,7 +492,7 @@ def feature_synergy_matrix(
         symmetrical: bool = False,
         aggregation: Optional[str] = AGG_MEAN,
         clustered: bool = True,
-    ) -> Union[pd.DataFrame, List[pd.DataFrame]]:
+    ) -> Union[Matrix, List[Matrix]]:
         """
         Calculate the feature synergy matrix.
 
@@ -550,7 +551,7 @@ def feature_redundancy_matrix(
         symmetrical: bool = False,
         aggregation: Optional[str] = AGG_MEAN,
         clustered: bool = True,
-    ) -> Union[pd.DataFrame, List[pd.DataFrame]]:
+    ) -> Union[Matrix, List[Matrix]]:
         """
         Calculate the feature redundancy matrix.
 
@@ -609,7 +610,7 @@ def feature_association_matrix(
         symmetrical: bool = False,
         aggregation: Optional[str] = AGG_MEAN,
         clustered: bool = True,
-    ) -> Union[pd.DataFrame, List[pd.DataFrame]]:
+    ) -> Union[Matrix, List[Matrix]]:
         """
         Calculate the feature association matrix.
 
@@ -911,7 +912,7 @@ def __feature_affinity_matrix(
         affinity_matrices: List[pd.DataFrame],
         affinity_symmetrical: np.ndarray,
         clustered: bool,
-    ):
+    ) -> Matrix:
         if clustered:
             affinity_matrices = LearnerInspector.__sort_affinity_matrices(
                 affinity_matrices=affinity_matrices,
@@ -1053,11 +1054,11 @@ def _ensure_shap_interaction(self) -> None:
     @staticmethod
     def __isolate_single_frame(
         frames: List[pd.DataFrame],
-    ) -> Union[pd.DataFrame, List[pd.DataFrame]]:
+    ) -> Union[Matrix, List[Matrix]]:
         if len(frames) == 1:
-            return frames[0]
+            return Matrix.from_frame(frames[0])
         else:
-            return frames
+            return list(map(Matrix.from_frame, frames))
 
     @staticmethod
     def __validate_aggregation_method(aggregation: str) -> None:

From acbe4c34f4b566e7daa273d5b7f404a963a1ca20 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Mon, 20 Sep 2021 12:58:45 +0200
Subject: [PATCH 003/106] API: move class LinkageTree to module pytools.data

---
 .../source/tutorial/Classification_with_Facet.ipynb  | 12 ++++++------
 src/facet/inspection/_inspection.py                  |  3 +--
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/sphinx/source/tutorial/Classification_with_Facet.ipynb b/sphinx/source/tutorial/Classification_with_Facet.ipynb
index 31cdbdff..28887433 100644
--- a/sphinx/source/tutorial/Classification_with_Facet.ipynb
+++ b/sphinx/source/tutorial/Classification_with_Facet.ipynb
@@ -203,7 +203,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from pytools.viz.dendrogram import DendrogramDrawer, LinkageTree\n",
+    "from pytools.viz.dendrogram import DendrogramDrawer\n",
     "from pytools.viz.matrix import MatrixDrawer"
    ]
   },
@@ -3152,10 +3152,10 @@
      "evalue": "name 'TableOne' is not defined",
      "output_type": "error",
      "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-38-2d93f376f83b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     18\u001b[0m ]\n\u001b[1;32m     19\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 20\u001b[0;31m mytable = TableOne(\n\u001b[0m\u001b[1;32m     21\u001b[0m     \u001b[0mprediab_eda\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     22\u001b[0m     \u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprediab_eda\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Pre_diab\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_list\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mNameError\u001b[0m: name 'TableOne' is not defined"
+      "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
+      "\u001B[0;31mNameError\u001B[0m                                 Traceback (most recent call last)",
+      "\u001B[0;32m<ipython-input-38-2d93f376f83b>\u001B[0m in \u001B[0;36m<module>\u001B[0;34m\u001B[0m\n\u001B[1;32m     18\u001B[0m ]\n\u001B[1;32m     19\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 20\u001B[0;31m mytable = TableOne(\n\u001B[0m\u001B[1;32m     21\u001B[0m     \u001B[0mprediab_eda\u001B[0m\u001B[0;34m,\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m     22\u001B[0m     \u001B[0mcolumns\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mprediab_eda\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcolumns\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mdrop\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m\"Pre_diab\"\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mto_list\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m,\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+      "\u001B[0;31mNameError\u001B[0m: name 'TableOne' is not defined"
      ]
     }
    ],
@@ -3294,4 +3294,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
\ No newline at end of file
diff --git a/src/facet/inspection/_inspection.py b/src/facet/inspection/_inspection.py
index 4fe9f99c..5da8f83f 100644
--- a/src/facet/inspection/_inspection.py
+++ b/src/facet/inspection/_inspection.py
@@ -11,10 +11,9 @@
 from scipy.spatial.distance import squareform
 
 from pytools.api import AllTracker, inheritdoc
-from pytools.data import Matrix
+from pytools.data import LinkageTree, Matrix
 from pytools.fit import FittableMixin
 from pytools.parallelization import ParallelizableMixin
-from pytools.viz.dendrogram import LinkageTree
 from sklearndf import ClassifierDF, LearnerDF, RegressorDF
 from sklearndf.pipeline import LearnerPipelineDF
 

From 11060f018915addde0e7e1d30fc1af5717704770 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Mon, 20 Sep 2021 13:01:51 +0200
Subject: [PATCH 004/106] TEST: update unit tests for new Matrix class

---
 test/test/facet/test_inspection.py         | 38 +++++++++++-----------
 test/test/facet/test_shap_decomposition.py | 14 +++-----
 2 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/test/test/facet/test_inspection.py b/test/test/facet/test_inspection.py
index 32fdea44..4ddca500 100644
--- a/test/test/facet/test_inspection.py
+++ b/test/test/facet/test_inspection.py
@@ -230,7 +230,7 @@ def test_model_inspection_classifier_binary(
         association_matrix = model_inspector.feature_association_matrix(
             clustered=True, symmetrical=True
         )
-        assert association_matrix.values == pytest.approx(
+        assert association_matrix.data == pytest.approx(
             np.array(
                 [
                     [1.000, 0.692, 0.195, 0.052],
@@ -334,7 +334,7 @@ def test_model_inspection_classifier_multi_class(
             clustered=False
         )
 
-        assert np.hstack([m.values for m in synergy_matrix]) == pytest.approx(
+        assert np.hstack([m.data for m in synergy_matrix]) == pytest.approx(
             np.array(
                 [
                     [1.000, 0.009, 0.057, 0.055, 1.000, 0.042]
@@ -353,7 +353,7 @@ def test_model_inspection_classifier_multi_class(
         redundancy_matrix = iris_inspector_multi_class.feature_redundancy_matrix(
             clustered=False
         )
-        assert np.hstack([m.values for m in redundancy_matrix]) == (
+        assert np.hstack([m.data for m in redundancy_matrix]) == (
             pytest.approx(
                 np.array(
                     [
@@ -374,7 +374,7 @@ def test_model_inspection_classifier_multi_class(
         association_matrix = iris_inspector_multi_class.feature_association_matrix(
             clustered=False
         )
-        assert np.hstack([m.values for m in association_matrix]) == (
+        assert np.hstack([m.data for m in association_matrix]) == (
             pytest.approx(
                 np.array(
                     [
@@ -550,7 +550,7 @@ def test_model_inspection_classifier_interaction(
         synergy_matrix = model_inspector.feature_synergy_matrix(
             clustered=False, symmetrical=True
         )
-        assert synergy_matrix.values == pytest.approx(
+        assert synergy_matrix.data == pytest.approx(
             np.array(
                 [
                     [1.000, 0.011, 0.006, 0.007],
@@ -563,7 +563,7 @@ def test_model_inspection_classifier_interaction(
         )
         assert model_inspector.feature_synergy_matrix(
             absolute=True, symmetrical=True
-        ).values == pytest.approx(
+        ).data == pytest.approx(
             np.array(
                 [
                     [0.425, 0.001, 0.002, 0.001],
@@ -576,7 +576,7 @@ def test_model_inspection_classifier_interaction(
         )
 
         synergy_matrix = model_inspector.feature_synergy_matrix(clustered=True)
-        assert synergy_matrix.values == pytest.approx(
+        assert synergy_matrix.data == pytest.approx(
             np.array(
                 [
                     [1.000, 0.000, 0.001, 0.004],
@@ -589,7 +589,7 @@ def test_model_inspection_classifier_interaction(
         )
         assert model_inspector.feature_synergy_matrix(
             absolute=True
-        ).values == pytest.approx(
+        ).data == pytest.approx(
             np.array(
                 [
                     [0.425, 0.000, 0.000, 0.001],
@@ -602,7 +602,7 @@ def test_model_inspection_classifier_interaction(
         )
         assert model_inspector_full_sample.feature_synergy_matrix(
             clustered=True
-        ).values == pytest.approx(
+        ).data == pytest.approx(
             np.array(
                 [
                     [1.000, 0.000, 0.000, 0.001],
@@ -617,7 +617,7 @@ def test_model_inspection_classifier_interaction(
         redundancy_matrix = model_inspector.feature_redundancy_matrix(
             clustered=False, symmetrical=True
         )
-        assert redundancy_matrix.values == pytest.approx(
+        assert redundancy_matrix.data == pytest.approx(
             np.array(
                 [
                     [1.000, 0.080, 0.316, 0.208],
@@ -630,7 +630,7 @@ def test_model_inspection_classifier_interaction(
         )
         assert model_inspector.feature_redundancy_matrix(
             absolute=True, symmetrical=True
-        ).values == pytest.approx(
+        ).data == pytest.approx(
             np.array(
                 [
                     [0.425, 0.316, 0.052, 0.010],
@@ -643,7 +643,7 @@ def test_model_inspection_classifier_interaction(
         )
 
         redundancy_matrix = model_inspector.feature_redundancy_matrix(clustered=True)
-        assert redundancy_matrix.values == pytest.approx(
+        assert redundancy_matrix.data == pytest.approx(
             np.array(
                 [
                     [1.000, 0.691, 0.209, 0.045],
@@ -656,7 +656,7 @@ def test_model_inspection_classifier_interaction(
         )
         assert model_inspector.feature_redundancy_matrix(
             absolute=True
-        ).values == pytest.approx(
+        ).data == pytest.approx(
             np.array(
                 [
                     [0.425, 0.294, 0.092, 0.020],
@@ -670,7 +670,7 @@ def test_model_inspection_classifier_interaction(
 
         assert model_inspector_full_sample.feature_redundancy_matrix(
             clustered=True
-        ).values == pytest.approx(
+        ).data == pytest.approx(
             np.array(
                 [
                     [1.000, 0.677, 0.384, 0.003],
@@ -685,7 +685,7 @@ def test_model_inspection_classifier_interaction(
         association_matrix = model_inspector.feature_association_matrix(
             clustered=False, symmetrical=True
         )
-        assert association_matrix.values == pytest.approx(
+        assert association_matrix.data == pytest.approx(
             np.array(
                 [
                     [1.000, 0.074, 0.309, 0.205],
@@ -698,7 +698,7 @@ def test_model_inspection_classifier_interaction(
         )
         assert model_inspector.feature_association_matrix(
             absolute=True, symmetrical=True
-        ).values == pytest.approx(
+        ).data == pytest.approx(
             np.array(
                 [
                     [0.425, 0.317, 0.051, 0.009],
@@ -711,7 +711,7 @@ def test_model_inspection_classifier_interaction(
         )
 
         association_matrix = model_inspector.feature_association_matrix(clustered=True)
-        assert association_matrix.values == pytest.approx(
+        assert association_matrix.data == pytest.approx(
             np.array(
                 [
                     [1.000, 0.694, 0.205, 0.040],
@@ -724,7 +724,7 @@ def test_model_inspection_classifier_interaction(
         )
         assert model_inspector.feature_association_matrix(
             absolute=True
-        ).values == pytest.approx(
+        ).data == pytest.approx(
             np.array(
                 [
                     [0.425, 0.295, 0.090, 0.018],
@@ -738,7 +738,7 @@ def test_model_inspection_classifier_interaction(
 
         assert model_inspector_full_sample.feature_association_matrix(
             clustered=True
-        ).values == pytest.approx(
+        ).data == pytest.approx(
             np.array(
                 [
                     [1.000, 0.678, 0.383, 0.001],
diff --git a/test/test/facet/test_shap_decomposition.py b/test/test/facet/test_shap_decomposition.py
index f2591cd6..2c0734df 100644
--- a/test/test/facet/test_shap_decomposition.py
+++ b/test/test/facet/test_shap_decomposition.py
@@ -32,17 +32,13 @@ def test_shap_decomposition_matrices(
     ):
         matrix_full_name = f"feature {matrix_name} matrix"
         n_features = len(feature_names)
-        assert len(matrix) == n_features, f"rows in {matrix_full_name}"
-        assert len(matrix.columns) == n_features, f"columns in {matrix_full_name}"
+        assert matrix.data.shape[0] == n_features, f"rows in {matrix_full_name}"
+        assert matrix.data.shape[1] == n_features, f"columns in {matrix_full_name}"
 
         # check values
-        for c in matrix.columns:
-            assert (
-                0.0
-                <= matrix.fillna(0).loc[:, c].min()
-                <= matrix.fillna(0).loc[:, c].max()
-                <= 1.0
-            ), f"Values of [0.0, 1.0] in {matrix_full_name}"
+        assert (
+            matrix.data.min() >= 0.0 and matrix.data.max() <= 1.0
+        ), f"Values of [0.0, 1.0] in {matrix_full_name}"
 
 
 #

From 46f1b2a6b8747612179b85916c130accb371534a Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Mon, 20 Sep 2021 13:39:30 +0200
Subject: [PATCH 005/106] API: annotate affinity matrices with weights and axis
 labels

---
 src/facet/inspection/_inspection.py | 58 ++++++++++++++++++++++++++---
 1 file changed, 52 insertions(+), 6 deletions(-)

diff --git a/src/facet/inspection/_inspection.py b/src/facet/inspection/_inspection.py
index 5da8f83f..4e1168c3 100644
--- a/src/facet/inspection/_inspection.py
+++ b/src/facet/inspection/_inspection.py
@@ -540,6 +540,7 @@ def feature_synergy_matrix(
             affinity_symmetrical=explainer.synergy(
                 symmetrical=True, absolute=False, std=False
             ),
+            affinity_metric="synergy",
             clustered=clustered,
         )
 
@@ -599,6 +600,7 @@ def feature_redundancy_matrix(
             affinity_symmetrical=explainer.redundancy(
                 symmetrical=True, absolute=False, std=False
             ),
+            affinity_metric="redundancy",
             clustered=clustered,
         )
 
@@ -662,6 +664,7 @@ def feature_association_matrix(
             affinity_symmetrical=global_explainer.association(
                 symmetrical=True, absolute=False, std=False
             ),
+            affinity_metric="association",
             clustered=clustered,
         )
 
@@ -906,18 +909,21 @@ def __feature_matrix_to_df(
                 for m in matrix
             ]
 
-    @staticmethod
     def __feature_affinity_matrix(
+        self,
         affinity_matrices: List[pd.DataFrame],
         affinity_symmetrical: np.ndarray,
+        affinity_metric: str,
         clustered: bool,
     ) -> Matrix:
         if clustered:
-            affinity_matrices = LearnerInspector.__sort_affinity_matrices(
+            affinity_matrices = self.__sort_affinity_matrices(
                 affinity_matrices=affinity_matrices,
                 symmetrical_affinity_matrices=affinity_symmetrical,
             )
-        return LearnerInspector.__isolate_single_frame(affinity_matrices)
+        return self.__isolate_single_frame(
+            affinity_matrices, affinity_metric=affinity_metric
+        )
 
     @staticmethod
     def __sort_affinity_matrices(
@@ -1050,14 +1056,54 @@ def _ensure_shap_interaction(self) -> None:
                 "enable calculations involving SHAP interaction values."
             )
 
-    @staticmethod
     def __isolate_single_frame(
+        self,
         frames: List[pd.DataFrame],
+        affinity_metric: str,
     ) -> Union[Matrix, List[Matrix]]:
+        feature_importance = self.feature_importance()
+
         if len(frames) == 1:
-            return Matrix.from_frame(frames[0])
+            assert isinstance(feature_importance, pd.Series)
+            return self.__frame_to_matrix(
+                frames[0],
+                affinity_metric=affinity_metric,
+                feature_importance=feature_importance,
+            )
         else:
-            return list(map(Matrix.from_frame, frames))
+            return [
+                self.__frame_to_matrix(
+                    frame,
+                    affinity_metric=affinity_metric,
+                    feature_importance=frame_importance,
+                    feature_importance_category=str(frame_name),
+                )
+                for frame, (frame_name, frame_importance) in zip(
+                    frames, feature_importance.items()
+                )
+            ]
+
+    @staticmethod
+    def __frame_to_matrix(
+        frame: pd.DataFrame,
+        *,
+        affinity_metric: str,
+        feature_importance: pd.Series,
+        feature_importance_category: Optional[str] = None,
+    ) -> Matrix:
+        return Matrix.from_frame(
+            frame,
+            weights=(
+                feature_importance.reindex(frame.index),
+                feature_importance.reindex(frame.columns),
+            ),
+            name_labels=("feature", "dependent feature"),
+            weight_label=(
+                f"{affinity_metric} ({feature_importance_category})"
+                if feature_importance_category
+                else affinity_metric
+            ),
+        )
 
     @staticmethod
     def __validate_aggregation_method(aggregation: str) -> None:

From 5f7a6c07bd62c9e86baca20f389fbe66e4efbc9e Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 22 Sep 2021 17:25:46 +0200
Subject: [PATCH 006/106] API: feature/dependent feature --> primary
 feature/associated feature

---
 src/facet/inspection/_inspection.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/facet/inspection/_inspection.py b/src/facet/inspection/_inspection.py
index 4e1168c3..f26c9fff 100644
--- a/src/facet/inspection/_inspection.py
+++ b/src/facet/inspection/_inspection.py
@@ -1097,7 +1097,7 @@ def __frame_to_matrix(
                 feature_importance.reindex(frame.index),
                 feature_importance.reindex(frame.columns),
             ),
-            name_labels=("feature", "dependent feature"),
+            name_labels=("primary feature", "associated feature"),
             weight_label=(
                 f"{affinity_metric} ({feature_importance_category})"
                 if feature_importance_category

From f67a3391e50492bb16f7145334405c92cdcab84a Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 22 Sep 2021 22:34:49 +0200
Subject: [PATCH 007/106] VIZ: set color of axis labels to foreground color

---
 src/facet/simulation/viz/_style.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/facet/simulation/viz/_style.py b/src/facet/simulation/viz/_style.py
index fc4af71a..048a3617 100644
--- a/src/facet/simulation/viz/_style.py
+++ b/src/facet/simulation/viz/_style.py
@@ -144,15 +144,18 @@ def draw_uplift(
             x = range(len(partitions))
         else:
             x = partitions
+
+        # get axes and color scheme
         ax = self.ax
+        colors = self.colors
 
         # plot the confidence bounds and the median
-        (line_min,) = ax.plot(x, outputs_lower_bound, color=self.colors.accent_3)
-        (line_median,) = ax.plot(x, outputs_median, color=self.colors.accent_2)
-        (line_max,) = ax.plot(x, outputs_upper_bound, color=self.colors.accent_3)
+        (line_min,) = ax.plot(x, outputs_lower_bound, color=colors.accent_3)
+        (line_median,) = ax.plot(x, outputs_median, color=colors.accent_2)
+        (line_max,) = ax.plot(x, outputs_upper_bound, color=colors.accent_3)
 
         # add a horizontal line at the baseline
-        line_base = ax.axhline(y=baseline, linewidth=0.5, color=self.colors.accent_1)
+        line_base = ax.axhline(y=baseline, linewidth=0.5, color=colors.accent_1)
 
         # add a legend
         labels = self._legend(confidence_level=confidence_level)
@@ -160,7 +163,7 @@ def draw_uplift(
         ax.legend(handles, labels)
 
         # label the y axis
-        ax.set_ylabel(output_unit)
+        ax.set_ylabel(output_unit, color=colors.foreground)
 
         # format and label the x axis
         ax.tick_params(

From c0880d302d2458f00ecac912421354a5f7a3feb9 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Thu, 23 Sep 2021 14:44:15 +0200
Subject: [PATCH 008/106] DOC: update release notes

---
 RELEASE_NOTES.rst | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst
index 558a41e8..699ab228 100644
--- a/RELEASE_NOTES.rst
+++ b/RELEASE_NOTES.rst
@@ -1,6 +1,17 @@
 Release Notes
 =============
 
+FACET 2.0
+---------
+
+2.0.0
+~~~~~
+
+- API: return :class:`.LearnerInspector` matrix outputs as :class:`.Matrix` instances
+- VIZ: minor tweaks to simulation plots and reports generated by
+  :class:`.SimulationDrawer`
+
+
 FACET 1.2
 ---------
 

From 5cace36d87321a060ecaa5c51c1eef885789e232 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Thu, 23 Sep 2021 14:58:17 +0200
Subject: [PATCH 009/106] API: order feature linkage leaves for minimal
 neighbour distance

---
 RELEASE_NOTES.rst                   |  7 +++++++
 src/facet/inspection/_inspection.py | 32 +++++++++++++++--------------
 2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst
index 699ab228..b3076715 100644
--- a/RELEASE_NOTES.rst
+++ b/RELEASE_NOTES.rst
@@ -8,6 +8,13 @@ FACET 2.0
 ~~~~~
 
 - API: return :class:`.LearnerInspector` matrix outputs as :class:`.Matrix` instances
+- API: the leaf order of :class:`.LinkageTree` objects generated by
+  `feature_…_linkage` methods of :class:`.LearnerInspector` is now the same as the
+  row and column order of :class:`.Matrix` objects returned by the corresponding
+  `feature_…_matrix` methods of :class:`.LearnerInspector`, minimizing the distance
+  between adjacent leaves.
+  The old sorting behaviour of FACET 1.x can be restored using method
+  :meth:`.LinkageTree.sort_by_weight`
 - VIZ: minor tweaks to simulation plots and reports generated by
   :class:`.SimulationDrawer`
 
diff --git a/src/facet/inspection/_inspection.py b/src/facet/inspection/_inspection.py
index f26c9fff..ab93295d 100644
--- a/src/facet/inspection/_inspection.py
+++ b/src/facet/inspection/_inspection.py
@@ -934,22 +934,14 @@ def __sort_affinity_matrices(
         fn_linkage = LearnerInspector.__linkage_matrix_from_affinity_matrix_for_output
 
         return [
-            affinity_matrix.iloc[feature_order, feature_order]
+            (lambda feature_order: affinity_matrix.iloc[feature_order, feature_order])(
+                feature_order=leaves_list(
+                    Z=fn_linkage(feature_affinity_matrix=symmetrical_affinity_matrix)
+                )
+            )
             for affinity_matrix, symmetrical_affinity_matrix in zip(
                 affinity_matrices, symmetrical_affinity_matrices
             )
-            for feature_order in (
-                leaves_list(
-                    Z=optimal_leaf_ordering(
-                        Z=fn_linkage(
-                            feature_affinity_matrix=symmetrical_affinity_matrix
-                        ),
-                        y=symmetrical_affinity_matrix,
-                    )
-                )
-                # reverse the index list so larger values tend to end up on top
-                [::-1],
-            )
         ]
 
     @staticmethod
@@ -1043,10 +1035,20 @@ def __linkage_matrix_from_affinity_matrix_for_output(
         # (1 = closest, 0 = most distant)
 
         # compress the distance matrix (required by SciPy)
-        compressed_distance_vector = squareform(1 - abs(feature_affinity_matrix))
+        compressed_distance_matrix: np.ndarray = squareform(
+            1 - abs(feature_affinity_matrix)
+        )
 
         # calculate the linkage matrix
-        return linkage(y=compressed_distance_vector, method="single")
+        leaf_ordering: np.ndarray = optimal_leaf_ordering(
+            Z=linkage(y=compressed_distance_matrix, method="single"),
+            y=compressed_distance_matrix,
+        )
+
+        # reverse the leaf ordering, so that larger values tend to end up on top
+        leaf_ordering[:, [1, 0]] = leaf_ordering[:, [0, 1]]
+
+        return leaf_ordering
 
     def _ensure_shap_interaction(self) -> None:
         if not self._shap_interaction:

From 15a3a22257a4f99e69c87ad18393c90c7a20f2ed Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Mon, 27 Sep 2021 13:14:52 +0200
Subject: [PATCH 010/106] API: rename Matrix.data to .values, and .weight_label
 to .value_label

---
 src/facet/inspection/_inspection.py        |  4 +--
 test/test/facet/test_inspection.py         | 38 +++++++++++-----------
 test/test/facet/test_shap_decomposition.py |  6 ++--
 3 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/src/facet/inspection/_inspection.py b/src/facet/inspection/_inspection.py
index ab93295d..e8e9f939 100644
--- a/src/facet/inspection/_inspection.py
+++ b/src/facet/inspection/_inspection.py
@@ -1099,12 +1099,12 @@ def __frame_to_matrix(
                 feature_importance.reindex(frame.index),
                 feature_importance.reindex(frame.columns),
             ),
-            name_labels=("primary feature", "associated feature"),
-            weight_label=(
+            value_label=(
                 f"{affinity_metric} ({feature_importance_category})"
                 if feature_importance_category
                 else affinity_metric
             ),
+            name_labels=("primary feature", "associated feature"),
         )
 
     @staticmethod
diff --git a/test/test/facet/test_inspection.py b/test/test/facet/test_inspection.py
index 853221b5..bcfa99b3 100644
--- a/test/test/facet/test_inspection.py
+++ b/test/test/facet/test_inspection.py
@@ -175,7 +175,7 @@ def test_model_inspection_classifier_binary(
         association_matrix = model_inspector.feature_association_matrix(
             clustered=True, symmetrical=True
         )
-        assert association_matrix.data == pytest.approx(
+        assert association_matrix.values == pytest.approx(
             np.array(
                 [
                     [1.000, 0.692, 0.195, 0.052],
@@ -279,7 +279,7 @@ def test_model_inspection_classifier_multi_class(
             clustered=False
         )
 
-        assert np.hstack([m.data for m in synergy_matrix]) == pytest.approx(
+        assert np.hstack([m.values for m in synergy_matrix]) == pytest.approx(
             np.array(
                 [
                     [1.000, 0.009, 0.057, 0.055, 1.000, 0.042]
@@ -298,7 +298,7 @@ def test_model_inspection_classifier_multi_class(
         redundancy_matrix = iris_inspector_multi_class.feature_redundancy_matrix(
             clustered=False
         )
-        assert np.hstack([m.data for m in redundancy_matrix]) == (
+        assert np.hstack([m.values for m in redundancy_matrix]) == (
             pytest.approx(
                 np.array(
                     [
@@ -319,7 +319,7 @@ def test_model_inspection_classifier_multi_class(
         association_matrix = iris_inspector_multi_class.feature_association_matrix(
             clustered=False
         )
-        assert np.hstack([m.data for m in association_matrix]) == (
+        assert np.hstack([m.values for m in association_matrix]) == (
             pytest.approx(
                 np.array(
                     [
@@ -495,7 +495,7 @@ def test_model_inspection_classifier_interaction(
         synergy_matrix = model_inspector.feature_synergy_matrix(
             clustered=False, symmetrical=True
         )
-        assert synergy_matrix.data == pytest.approx(
+        assert synergy_matrix.values == pytest.approx(
             np.array(
                 [
                     [1.000, 0.011, 0.006, 0.007],
@@ -508,7 +508,7 @@ def test_model_inspection_classifier_interaction(
         )
         assert model_inspector.feature_synergy_matrix(
             absolute=True, symmetrical=True
-        ).data == pytest.approx(
+        ).values == pytest.approx(
             np.array(
                 [
                     [0.425, 0.001, 0.002, 0.001],
@@ -521,7 +521,7 @@ def test_model_inspection_classifier_interaction(
         )
 
         synergy_matrix = model_inspector.feature_synergy_matrix(clustered=True)
-        assert synergy_matrix.data == pytest.approx(
+        assert synergy_matrix.values == pytest.approx(
             np.array(
                 [
                     [1.000, 0.000, 0.001, 0.004],
@@ -534,7 +534,7 @@ def test_model_inspection_classifier_interaction(
         )
         assert model_inspector.feature_synergy_matrix(
             absolute=True
-        ).data == pytest.approx(
+        ).values == pytest.approx(
             np.array(
                 [
                     [0.425, 0.000, 0.000, 0.001],
@@ -547,7 +547,7 @@ def test_model_inspection_classifier_interaction(
         )
         assert model_inspector_full_sample.feature_synergy_matrix(
             clustered=True
-        ).data == pytest.approx(
+        ).values == pytest.approx(
             np.array(
                 [
                     [1.000, 0.000, 0.000, 0.001],
@@ -562,7 +562,7 @@ def test_model_inspection_classifier_interaction(
         redundancy_matrix = model_inspector.feature_redundancy_matrix(
             clustered=False, symmetrical=True
         )
-        assert redundancy_matrix.data == pytest.approx(
+        assert redundancy_matrix.values == pytest.approx(
             np.array(
                 [
                     [1.000, 0.080, 0.316, 0.208],
@@ -575,7 +575,7 @@ def test_model_inspection_classifier_interaction(
         )
         assert model_inspector.feature_redundancy_matrix(
             absolute=True, symmetrical=True
-        ).data == pytest.approx(
+        ).values == pytest.approx(
             np.array(
                 [
                     [0.425, 0.316, 0.052, 0.010],
@@ -588,7 +588,7 @@ def test_model_inspection_classifier_interaction(
         )
 
         redundancy_matrix = model_inspector.feature_redundancy_matrix(clustered=True)
-        assert redundancy_matrix.data == pytest.approx(
+        assert redundancy_matrix.values == pytest.approx(
             np.array(
                 [
                     [1.000, 0.691, 0.209, 0.045],
@@ -601,7 +601,7 @@ def test_model_inspection_classifier_interaction(
         )
         assert model_inspector.feature_redundancy_matrix(
             absolute=True
-        ).data == pytest.approx(
+        ).values == pytest.approx(
             np.array(
                 [
                     [0.425, 0.294, 0.092, 0.020],
@@ -615,7 +615,7 @@ def test_model_inspection_classifier_interaction(
 
         assert model_inspector_full_sample.feature_redundancy_matrix(
             clustered=True
-        ).data == pytest.approx(
+        ).values == pytest.approx(
             np.array(
                 [
                     [1.000, 0.677, 0.384, 0.003],
@@ -630,7 +630,7 @@ def test_model_inspection_classifier_interaction(
         association_matrix = model_inspector.feature_association_matrix(
             clustered=False, symmetrical=True
         )
-        assert association_matrix.data == pytest.approx(
+        assert association_matrix.values == pytest.approx(
             np.array(
                 [
                     [1.000, 0.074, 0.309, 0.205],
@@ -643,7 +643,7 @@ def test_model_inspection_classifier_interaction(
         )
         assert model_inspector.feature_association_matrix(
             absolute=True, symmetrical=True
-        ).data == pytest.approx(
+        ).values == pytest.approx(
             np.array(
                 [
                     [0.425, 0.317, 0.051, 0.009],
@@ -656,7 +656,7 @@ def test_model_inspection_classifier_interaction(
         )
 
         association_matrix = model_inspector.feature_association_matrix(clustered=True)
-        assert association_matrix.data == pytest.approx(
+        assert association_matrix.values == pytest.approx(
             np.array(
                 [
                     [1.000, 0.694, 0.205, 0.040],
@@ -669,7 +669,7 @@ def test_model_inspection_classifier_interaction(
         )
         assert model_inspector.feature_association_matrix(
             absolute=True
-        ).data == pytest.approx(
+        ).values == pytest.approx(
             np.array(
                 [
                     [0.425, 0.295, 0.090, 0.018],
@@ -683,7 +683,7 @@ def test_model_inspection_classifier_interaction(
 
         assert model_inspector_full_sample.feature_association_matrix(
             clustered=True
-        ).data == pytest.approx(
+        ).values == pytest.approx(
             np.array(
                 [
                     [1.000, 0.678, 0.383, 0.001],
diff --git a/test/test/facet/test_shap_decomposition.py b/test/test/facet/test_shap_decomposition.py
index 2c0734df..7af94e9e 100644
--- a/test/test/facet/test_shap_decomposition.py
+++ b/test/test/facet/test_shap_decomposition.py
@@ -32,12 +32,12 @@ def test_shap_decomposition_matrices(
     ):
         matrix_full_name = f"feature {matrix_name} matrix"
         n_features = len(feature_names)
-        assert matrix.data.shape[0] == n_features, f"rows in {matrix_full_name}"
-        assert matrix.data.shape[1] == n_features, f"columns in {matrix_full_name}"
+        assert matrix.values.shape[0] == n_features, f"rows in {matrix_full_name}"
+        assert matrix.values.shape[1] == n_features, f"columns in {matrix_full_name}"
 
         # check values
         assert (
-            matrix.data.min() >= 0.0 and matrix.data.max() <= 1.0
+            matrix.values.min() >= 0.0 and matrix.values.max() <= 1.0
         ), f"Values of [0.0, 1.0] in {matrix_full_name}"
 
 

From cf35f899d18fc562e66a4735b16b61c40b41e9f0 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 6 Oct 2021 09:43:58 +0200
Subject: [PATCH 011/106] TEST: support NAN values in print_expected_matrix()
 helper function

---
 test/test/facet/test_inspection.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/test/test/facet/test_inspection.py b/test/test/facet/test_inspection.py
index bcfa99b3..5547a24b 100644
--- a/test/test/facet/test_inspection.py
+++ b/test/test/facet/test_inspection.py
@@ -761,12 +761,14 @@ def test_shap_plot_data(
 
 
 def print_expected_matrix(error: AssertionError, split: bool = False):
-    # used to print expected output for copy/paste into assertion statement
+    # print expected output for copy/paste into assertion statement
 
     import re
 
     matrix: List[List[float]] = eval(
-        re.search(r"array\(([^)]+)\)", error.args[0])[1].replace(r"\n", "\n")
+        re.search(r"array\(([^)]+)\)", error.args[0])[1]
+        .replace(r"\n", "\n")
+        .replace("nan", "np.nan")
     )
 
     print("==== matrix assertion failed ====\nExpected Matrix:")
@@ -778,7 +780,7 @@ def print_expected_matrix(error: AssertionError, split: bool = False):
             if split and i == halfpoint:
                 txt += "] + ["
             elif i > 0:
-                txt += ","
-            txt += f"{x:.3f}"
+                txt += ", "
+            txt += "np.nan" if np.isnan(x) else f"{x:.3f}"
         print(txt + "],")
     print("]")

From 3b23ab47bb69417fd4a5c743bbbffc5211cc1f5c Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 6 Oct 2021 09:45:22 +0200
Subject: [PATCH 012/106] API: set diagonals of affinity matrices to np.nan

---
 src/facet/inspection/_inspection.py           |   6 +-
 .../inspection/_shap_global_explanation.py    |   4 +-
 src/facet/inspection/_shap_projection.py      |  12 +-
 test/test/facet/test_inspection.py            | 325 +++++++++---------
 test/test/facet/test_shap_decomposition.py    |   2 +-
 5 files changed, 179 insertions(+), 170 deletions(-)

diff --git a/src/facet/inspection/_inspection.py b/src/facet/inspection/_inspection.py
index e8e9f939..48140767 100644
--- a/src/facet/inspection/_inspection.py
+++ b/src/facet/inspection/_inspection.py
@@ -1035,9 +1035,9 @@ def __linkage_matrix_from_affinity_matrix_for_output(
         # (1 = closest, 0 = most distant)
 
         # compress the distance matrix (required by SciPy)
-        compressed_distance_matrix: np.ndarray = squareform(
-            1 - abs(feature_affinity_matrix)
-        )
+        distance_matrix = 1.0 - abs(feature_affinity_matrix)
+        np.fill_diagonal(distance_matrix, 0.0)
+        compressed_distance_matrix: np.ndarray = squareform(distance_matrix)
 
         # calculate the linkage matrix
         leaf_ordering: np.ndarray = optimal_leaf_ordering(
diff --git a/src/facet/inspection/_shap_global_explanation.py b/src/facet/inspection/_shap_global_explanation.py
index 50acf41b..ee6903e3 100644
--- a/src/facet/inspection/_shap_global_explanation.py
+++ b/src/facet/inspection/_shap_global_explanation.py
@@ -119,8 +119,8 @@ def from_relative_affinity(
             where=affinity_abs_sym_ij_2x > 0.0,
         )
 
-        # re-set the diagonal to 1.0 in case of rounding errors
-        fill_diagonal(affinity_rel_sym_ij, 1.0)
+        # affinity of a feature with itself is undefined
+        fill_diagonal(affinity_rel_sym_ij, np.nan)
 
         # return the AffinityMatrices object
         return AffinityMatrix(
diff --git a/src/facet/inspection/_shap_projection.py b/src/facet/inspection/_shap_projection.py
index 0f791868..beb01708 100644
--- a/src/facet/inspection/_shap_projection.py
+++ b/src/facet/inspection/_shap_projection.py
@@ -113,8 +113,8 @@ def _calculate_association(context: ShapContext) -> AffinityMatrix:
         # calculate association as the coefficient of determination for p[i] and p[j]
         ass_ij = cov_p_i_p_j_over_var_p_i * transpose(cov_p_i_p_j_over_var_p_i)
 
-        # we define the association of a feature with itself as 1
-        fill_diagonal(ass_ij, 1.0)
+        # association of a feature with itself is undefined
+        fill_diagonal(ass_ij, np.nan)
 
         return AffinityMatrix.from_relative_affinity(
             affinity_rel_ij=ass_ij, std_p_i=sqrt(var_p_i)
@@ -253,8 +253,8 @@ def _calculate_synergy_redundancy(
         # this is the coefficient of determination of the interaction vector
         syn_ij = cov_p_i_p_ij_over_var_p_i * cov_p_i_p_ij_over_var_p_ij
 
-        # we define the synergy of a feature with itself as 1
-        fill_diagonal(syn_ij, 1.0)
+        # synergy of a feature with itself is undefined
+        fill_diagonal(syn_ij, np.nan)
 
         #
         # Redundancy: red[i, j]
@@ -291,8 +291,8 @@ def _calculate_synergy_redundancy(
         # scale to accommodate variance already explained by synergy
         red_ij *= 1 - syn_ij
 
-        # we define the redundancy of a feature with itself as 1
-        fill_diagonal(red_ij, 1.0)
+        # redundancy of a feature with itself is undefined
+        fill_diagonal(red_ij, np.nan)
 
         #
         # SHAP decomposition as relative contributions of
diff --git a/test/test/facet/test_inspection.py b/test/test/facet/test_inspection.py
index 5547a24b..41d09613 100644
--- a/test/test/facet/test_inspection.py
+++ b/test/test/facet/test_inspection.py
@@ -8,6 +8,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+from numpy.testing import assert_allclose
 from pandas.testing import assert_frame_equal, assert_series_equal
 from sklearn.datasets import make_classification
 from sklearn.model_selection import KFold
@@ -175,16 +176,17 @@ def test_model_inspection_classifier_binary(
         association_matrix = model_inspector.feature_association_matrix(
             clustered=True, symmetrical=True
         )
-        assert association_matrix.values == pytest.approx(
+        assert_allclose(
+            association_matrix.values,
             np.array(
                 [
-                    [1.000, 0.692, 0.195, 0.052],
-                    [0.692, 1.000, 0.290, 0.041],
-                    [0.195, 0.290, 1.000, 0.081],
-                    [0.052, 0.041, 0.081, 1.000],
+                    [np.nan, 0.692, 0.195, 0.052],
+                    [0.692, np.nan, 0.290, 0.041],
+                    [0.195, 0.290, np.nan, 0.081],
+                    [0.052, 0.041, 0.081, np.nan],
                 ]
             ),
-            abs=0.02,
+            atol=0.02,
         )
     except AssertionError as error:
         print_expected_matrix(error=error)
@@ -260,7 +262,8 @@ def test_model_inspection_classifier_multi_class(
     assert feature_importance.columns.equals(
         pd.Index(iris_inspector_multi_class.output_names_, name="class")
     )
-    assert feature_importance.values == pytest.approx(
+    assert_allclose(
+        feature_importance.values,
         np.array(
             [
                 [0.125, 0.085, 0.104],
@@ -269,7 +272,7 @@ def test_model_inspection_classifier_multi_class(
                 [0.432, 0.441, 0.425],
             ]
         ),
-        abs=0.02,
+        atol=0.02,
     )
 
     # Shap decomposition matrices (feature dependencies)
@@ -279,62 +282,61 @@ def test_model_inspection_classifier_multi_class(
             clustered=False
         )
 
-        assert np.hstack([m.values for m in synergy_matrix]) == pytest.approx(
+        assert_allclose(
+            np.hstack([m.values for m in synergy_matrix]),
             np.array(
                 [
-                    [1.000, 0.009, 0.057, 0.055, 1.000, 0.042]
-                    + [0.418, 0.418, 1.000, 0.004, 0.085, 0.097],
-                    [0.101, 1.000, 0.052, 0.072, 0.094, 1.000]
-                    + [0.117, 0.156, 0.090, 1.000, 0.237, 0.258],
-                    [0.003, 0.001, 1.000, 0.002, 0.027, 0.005]
-                    + [1.000, 0.041, 0.012, 0.004, 1.000, 0.031],
-                    [0.002, 0.000, 0.001, 1.000, 0.029, 0.005]
-                    + [0.043, 1.000, 0.015, 0.005, 0.036, 1.000],
+                    [np.nan, 0.009, 0.057, 0.055, np.nan, 0.042]
+                    + [0.418, 0.418, np.nan, 0.004, 0.085, 0.097],
+                    [0.101, np.nan, 0.052, 0.072, 0.094, np.nan]
+                    + [0.117, 0.156, 0.090, np.nan, 0.237, 0.258],
+                    [0.003, 0.001, np.nan, 0.002, 0.027, 0.005]
+                    + [np.nan, 0.041, 0.012, 0.004, np.nan, 0.031],
+                    [0.002, 0.000, 0.001, np.nan, 0.029, 0.005]
+                    + [0.043, np.nan, 0.015, 0.005, 0.036, np.nan],
                 ]
             ),
-            abs=0.02,
+            atol=0.02,
         )
 
         redundancy_matrix = iris_inspector_multi_class.feature_redundancy_matrix(
             clustered=False
         )
-        assert np.hstack([m.values for m in redundancy_matrix]) == (
-            pytest.approx(
-                np.array(
-                    [
-                        [1.000, 0.087, 0.643, 0.656, 1.000, 0.065]
-                        + [0.265, 0.234, 1.000, 0.034, 0.594, 0.505],
-                        [0.082, 1.000, 0.297, 0.292, 0.064, 1.000]
-                        + [0.117, 0.171, 0.031, 1.000, 0.024, 0.021],
-                        [0.682, 0.314, 1.000, 0.996, 0.471, 0.130]
-                        + [1.000, 0.743, 0.642, 0.031, 1.000, 0.761],
-                        [0.695, 0.315, 0.997, 1.000, 0.406, 0.194]
-                        + [0.741, 1.000, 0.550, 0.028, 0.756, 1.000],
-                    ]
-                ),
-                abs=0.02,
-            )
+        assert_allclose(
+            np.hstack([m.values for m in redundancy_matrix]),
+            np.array(
+                [
+                    [np.nan, 0.087, 0.643, 0.656, np.nan, 0.065]
+                    + [0.265, 0.234, np.nan, 0.034, 0.594, 0.505],
+                    [0.082, np.nan, 0.297, 0.292, 0.064, np.nan]
+                    + [0.117, 0.171, 0.031, np.nan, 0.024, 0.021],
+                    [0.682, 0.314, np.nan, 0.996, 0.471, 0.130]
+                    + [np.nan, 0.743, 0.642, 0.031, np.nan, 0.761],
+                    [0.695, 0.315, 0.997, np.nan, 0.406, 0.194]
+                    + [0.741, np.nan, 0.550, 0.028, 0.756, np.nan],
+                ]
+            ),
+            atol=0.02,
         )
 
         association_matrix = iris_inspector_multi_class.feature_association_matrix(
             clustered=False
         )
-        assert np.hstack([m.values for m in association_matrix]) == (
-            pytest.approx(
-                np.array(
-                    [
-                        [1.000, 0.077, 0.662, 0.670, 1.000, 0.046]
-                        + [0.370, 0.334, 1.000, 0.031, 0.634, 0.550],
-                        [0.077, 1.000, 0.301, 0.295, 0.046, 1.000]
-                        + [0.127, 0.173, 0.031, 1.000, 0.025, 0.020],
-                        [0.662, 0.301, 1.000, 0.998, 0.370, 0.127]
-                        + [1.000, 0.783, 0.634, 0.025, 1.000, 0.790],
-                        [0.670, 0.295, 0.998, 1.000, 0.334, 0.173]
-                        + [0.783, 1.000, 0.550, 0.020, 0.790, 1.000],
-                    ]
-                ),
-                abs=0.02,
-            )
+        assert_allclose(
+            np.hstack([m.values for m in association_matrix]),
+            np.array(
+                [
+                    [np.nan, 0.077, 0.662, 0.670, np.nan, 0.046]
+                    + [0.370, 0.334, np.nan, 0.031, 0.634, 0.550],
+                    [0.077, np.nan, 0.301, 0.295, 0.046, np.nan]
+                    + [0.127, 0.173, 0.031, np.nan, 0.025, 0.020],
+                    [0.662, 0.301, np.nan, 0.998, 0.370, 0.127]
+                    + [np.nan, 0.783, 0.634, 0.025, np.nan, 0.790],
+                    [0.670, 0.295, 0.998, np.nan, 0.334, 0.173]
+                    + [0.783, np.nan, 0.550, 0.020, 0.790, np.nan],
+                ]
+            ),
+            atol=0.02,
         )
     except AssertionError as error:
         print_expected_matrix(error=error, split=True)
@@ -495,204 +497,211 @@ def test_model_inspection_classifier_interaction(
         synergy_matrix = model_inspector.feature_synergy_matrix(
             clustered=False, symmetrical=True
         )
-        assert synergy_matrix.values == pytest.approx(
+        assert_allclose(
+            synergy_matrix.values,
             np.array(
                 [
-                    [1.000, 0.011, 0.006, 0.007],
-                    [0.011, 1.000, 0.006, 0.007],
-                    [0.006, 0.006, 1.000, 0.003],
-                    [0.007, 0.007, 0.003, 1.000],
+                    [np.nan, 0.011, 0.006, 0.007],
+                    [0.011, np.nan, 0.006, 0.007],
+                    [0.006, 0.006, np.nan, 0.003],
+                    [0.007, 0.007, 0.003, np.nan],
                 ]
             ),
-            abs=0.02,
+            atol=0.02,
         )
-        assert model_inspector.feature_synergy_matrix(
-            absolute=True, symmetrical=True
-        ).values == pytest.approx(
+        assert_allclose(
+            model_inspector.feature_synergy_matrix(
+                absolute=True, symmetrical=True
+            ).values,
             np.array(
                 [
-                    [0.425, 0.001, 0.002, 0.001],
-                    [0.001, 0.019, 0.000, 0.002],
-                    [0.002, 0.000, 0.068, 0.002],
-                    [0.001, 0.002, 0.002, 0.488],
+                    [np.nan, 0.001, 0.002, 0.001],
+                    [0.001, np.nan, 0.000, 0.002],
+                    [0.002, 0.000, np.nan, 0.002],
+                    [0.001, 0.002, 0.002, np.nan],
                 ]
             ),
-            abs=0.02,
+            atol=0.02,
         )
 
         synergy_matrix = model_inspector.feature_synergy_matrix(clustered=True)
-        assert synergy_matrix.values == pytest.approx(
+        assert_allclose(
+            synergy_matrix.values,
             np.array(
                 [
-                    [1.000, 0.000, 0.001, 0.004],
-                    [0.149, 1.000, 0.045, 0.157],
-                    [0.040, 0.004, 1.000, 0.044],
-                    [0.003, 0.001, 0.001, 1.000],
+                    [np.nan, 0.000, 0.001, 0.004],
+                    [0.149, np.nan, 0.045, 0.157],
+                    [0.040, 0.004, np.nan, 0.044],
+                    [0.003, 0.001, 0.001, np.nan],
                 ]
             ),
-            abs=0.02,
+            atol=0.02,
         )
-        assert model_inspector.feature_synergy_matrix(
-            absolute=True
-        ).values == pytest.approx(
+        assert_allclose(
+            model_inspector.feature_synergy_matrix(absolute=True).values,
             np.array(
                 [
-                    [0.425, 0.000, 0.000, 0.001],
-                    [0.003, 0.019, 0.001, 0.003],
-                    [0.003, 0.000, 0.068, 0.003],
-                    [0.001, 0.000, 0.001, 0.488],
+                    [np.nan, 0.000, 0.000, 0.001],
+                    [0.003, np.nan, 0.001, 0.003],
+                    [0.003, 0.000, np.nan, 0.003],
+                    [0.001, 0.000, 0.001, np.nan],
                 ]
             ),
-            abs=0.02,
+            atol=0.02,
         )
-        assert model_inspector_full_sample.feature_synergy_matrix(
-            clustered=True
-        ).values == pytest.approx(
+        assert_allclose(
+            model_inspector_full_sample.feature_synergy_matrix(clustered=True).values,
             np.array(
                 [
-                    [1.000, 0.000, 0.000, 0.001],
-                    [0.386, 1.000, 0.108, 0.314],
-                    [0.005, 0.002, 1.000, 0.059],
-                    [0.002, 0.000, 0.001, 1.000],
+                    [np.nan, 0.000, 0.000, 0.001],
+                    [0.386, np.nan, 0.108, 0.314],
+                    [0.005, 0.002, np.nan, 0.059],
+                    [0.002, 0.000, 0.001, np.nan],
                 ]
             ),
-            abs=0.02,
+            atol=0.02,
         )
 
         redundancy_matrix = model_inspector.feature_redundancy_matrix(
             clustered=False, symmetrical=True
         )
-        assert redundancy_matrix.values == pytest.approx(
+        assert_allclose(
+            redundancy_matrix.values,
             np.array(
                 [
-                    [1.000, 0.080, 0.316, 0.208],
-                    [0.080, 1.000, 0.036, 0.044],
-                    [0.316, 0.036, 1.000, 0.691],
-                    [0.208, 0.044, 0.691, 1.000],
+                    [np.nan, 0.080, 0.316, 0.208],
+                    [0.080, np.nan, 0.036, 0.044],
+                    [0.316, 0.036, np.nan, 0.691],
+                    [0.208, 0.044, 0.691, np.nan],
                 ]
             ),
-            abs=0.02,
+            atol=0.02,
         )
-        assert model_inspector.feature_redundancy_matrix(
-            absolute=True, symmetrical=True
-        ).values == pytest.approx(
+        assert_allclose(
+            model_inspector.feature_redundancy_matrix(
+                absolute=True, symmetrical=True
+            ).values,
             np.array(
                 [
-                    [0.425, 0.316, 0.052, 0.010],
-                    [0.316, 0.488, 0.087, 0.009],
-                    [0.052, 0.087, 0.068, 0.004],
-                    [0.010, 0.009, 0.004, 0.019],
+                    [np.nan, 0.316, 0.052, 0.010],
+                    [0.316, np.nan, 0.087, 0.009],
+                    [0.052, 0.087, np.nan, 0.004],
+                    [0.010, 0.009, 0.004, np.nan],
                 ]
             ),
-            abs=0.02,
+            atol=0.02,
         )
 
         redundancy_matrix = model_inspector.feature_redundancy_matrix(clustered=True)
-        assert redundancy_matrix.values == pytest.approx(
+        assert_allclose(
+            redundancy_matrix.values,
             np.array(
                 [
-                    [1.000, 0.691, 0.209, 0.045],
-                    [0.692, 1.000, 0.317, 0.037],
-                    [0.201, 0.303, 1.000, 0.081],
-                    [0.040, 0.031, 0.076, 1.000],
+                    [np.nan, 0.691, 0.209, 0.045],
+                    [0.692, np.nan, 0.317, 0.037],
+                    [0.201, 0.303, np.nan, 0.081],
+                    [0.040, 0.031, 0.076, np.nan],
                 ]
             ),
-            abs=0.02,
+            atol=0.02,
         )
-        assert model_inspector.feature_redundancy_matrix(
-            absolute=True
-        ).values == pytest.approx(
+        assert_allclose(
+            model_inspector.feature_redundancy_matrix(absolute=True).values,
             np.array(
                 [
-                    [0.425, 0.294, 0.092, 0.020],
-                    [0.337, 0.488, 0.154, 0.017],
-                    [0.013, 0.020, 0.068, 0.006],
-                    [0.001, 0.001, 0.001, 0.019],
+                    [np.nan, 0.294, 0.092, 0.020],
+                    [0.337, np.nan, 0.154, 0.017],
+                    [0.013, 0.020, np.nan, 0.006],
+                    [0.001, 0.001, 0.001, np.nan],
                 ]
             ),
-            abs=0.02,
+            atol=0.02,
         )
 
-        assert model_inspector_full_sample.feature_redundancy_matrix(
-            clustered=True
-        ).values == pytest.approx(
+        assert_allclose(
+            model_inspector_full_sample.feature_redundancy_matrix(
+                clustered=True
+            ).values,
             np.array(
                 [
-                    [1.000, 0.677, 0.384, 0.003],
-                    [0.676, 1.000, 0.465, 0.000],
-                    [0.382, 0.438, 1.000, 0.013],
-                    [0.002, 0.000, 0.012, 1.000],
+                    [np.nan, 0.677, 0.384, 0.003],
+                    [0.676, np.nan, 0.465, 0.000],
+                    [0.382, 0.438, np.nan, 0.013],
+                    [0.002, 0.000, 0.012, np.nan],
                 ]
             ),
-            abs=0.02,
+            atol=0.02,
         )
 
         association_matrix = model_inspector.feature_association_matrix(
             clustered=False, symmetrical=True
         )
-        assert association_matrix.values == pytest.approx(
+        assert_allclose(
+            association_matrix.values,
             np.array(
                 [
-                    [1.000, 0.074, 0.309, 0.205],
-                    [0.074, 1.000, 0.030, 0.040],
-                    [0.309, 0.030, 1.000, 0.694],
-                    [0.205, 0.040, 0.694, 1.000],
+                    [np.nan, 0.074, 0.309, 0.205],
+                    [0.074, np.nan, 0.030, 0.040],
+                    [0.309, 0.030, np.nan, 0.694],
+                    [0.205, 0.040, 0.694, np.nan],
                 ]
             ),
-            abs=0.02,
+            atol=0.02,
         )
-        assert model_inspector.feature_association_matrix(
-            absolute=True, symmetrical=True
-        ).values == pytest.approx(
+        assert_allclose(
+            model_inspector.feature_association_matrix(
+                absolute=True, symmetrical=True
+            ).values,
             np.array(
                 [
-                    [0.425, 0.317, 0.051, 0.009],
-                    [0.317, 0.488, 0.085, 0.007],
-                    [0.051, 0.085, 0.068, 0.003],
-                    [0.009, 0.007, 0.003, 0.019],
+                    [np.nan, 0.317, 0.051, 0.009],
+                    [0.317, np.nan, 0.085, 0.007],
+                    [0.051, 0.085, np.nan, 0.003],
+                    [0.009, 0.007, 0.003, np.nan],
                 ]
             ),
-            abs=0.02,
+            atol=0.02,
         )
 
         association_matrix = model_inspector.feature_association_matrix(clustered=True)
-        assert association_matrix.values == pytest.approx(
+        assert_allclose(
+            association_matrix.values,
             np.array(
                 [
-                    [1.000, 0.694, 0.205, 0.040],
-                    [0.694, 1.000, 0.309, 0.030],
-                    [0.205, 0.309, 1.000, 0.074],
-                    [0.040, 0.030, 0.074, 1.000],
+                    [np.nan, 0.694, 0.205, 0.040],
+                    [0.694, np.nan, 0.309, 0.030],
+                    [0.205, 0.309, np.nan, 0.074],
+                    [0.040, 0.030, 0.074, np.nan],
                 ]
             ),
-            abs=0.02,
+            atol=0.02,
         )
-        assert model_inspector.feature_association_matrix(
-            absolute=True
-        ).values == pytest.approx(
+        assert_allclose(
+            model_inspector.feature_association_matrix(absolute=True).values,
             np.array(
                 [
-                    [0.425, 0.295, 0.090, 0.018],
-                    [0.338, 0.488, 0.150, 0.014],
-                    [0.013, 0.020, 0.068, 0.005],
-                    [0.001, 0.001, 0.001, 0.019],
+                    [np.nan, 0.295, 0.090, 0.018],
+                    [0.338, np.nan, 0.150, 0.014],
+                    [0.013, 0.020, np.nan, 0.005],
+                    [0.001, 0.001, 0.001, np.nan],
                 ]
             ),
-            abs=0.02,
+            atol=0.02,
         )
 
-        assert model_inspector_full_sample.feature_association_matrix(
-            clustered=True
-        ).values == pytest.approx(
+        assert_allclose(
+            model_inspector_full_sample.feature_association_matrix(
+                clustered=True
+            ).values,
             np.array(
                 [
-                    [1.000, 0.678, 0.383, 0.001],
-                    [0.678, 1.000, 0.447, 0.000],
-                    [0.383, 0.447, 1.000, 0.009],
-                    [0.001, 0.000, 0.009, 1.000],
+                    [np.nan, 0.678, 0.383, 0.001],
+                    [0.678, np.nan, 0.447, 0.000],
+                    [0.383, 0.447, np.nan, 0.009],
+                    [0.001, 0.000, 0.009, np.nan],
                 ]
             ),
-            abs=0.02,
+            atol=0.02,
         )
 
     except AssertionError as error:
diff --git a/test/test/facet/test_shap_decomposition.py b/test/test/facet/test_shap_decomposition.py
index 7af94e9e..5ec3fe89 100644
--- a/test/test/facet/test_shap_decomposition.py
+++ b/test/test/facet/test_shap_decomposition.py
@@ -37,7 +37,7 @@ def test_shap_decomposition_matrices(
 
         # check values
         assert (
-            matrix.values.min() >= 0.0 and matrix.values.max() <= 1.0
+            np.nanmin(matrix.values) >= 0.0 and np.nanmax(matrix.values) <= 1.0
         ), f"Values of [0.0, 1.0] in {matrix_full_name}"
 
 

From 06f87bd9685bc9e7e6533b67c2f41fd36b064445 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 6 Oct 2021 09:47:03 +0200
Subject: [PATCH 013/106] API: return Matrix instances from
 feature_interaction_matrix()

---
 src/facet/inspection/_inspection.py | 44 +++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 9 deletions(-)

diff --git a/src/facet/inspection/_inspection.py b/src/facet/inspection/_inspection.py
index 48140767..183ce0ae 100644
--- a/src/facet/inspection/_inspection.py
+++ b/src/facet/inspection/_inspection.py
@@ -728,7 +728,7 @@ def feature_association_linkage(self) -> Union[LinkageTree, List[LinkageTree]]:
             )
         )
 
-    def feature_interaction_matrix(self) -> Union[pd.DataFrame, List[pd.DataFrame]]:
+    def feature_interaction_matrix(self) -> Union[Matrix, List[Matrix]]:
         """
         Calculate relative shap interaction values for all feature pairings.
 
@@ -828,7 +828,9 @@ def feature_interaction_matrix(self) -> Union[pd.DataFrame, List[pd.DataFrame]]:
         )[np.newaxis, :, :]
 
         # create a data frame from the feature matrix
-        return self.__feature_matrix_to_df(interaction_matrix)
+        return self.__feature_matrix_to_df(
+            interaction_matrix, value_label="relative shap interaction"
+        )
 
     def shap_plot_data(self) -> ShapPlotData:
         """
@@ -886,8 +888,8 @@ def shap_plot_data(self) -> ShapPlotData:
         )
 
     def __feature_matrix_to_df(
-        self, matrix: np.ndarray
-    ) -> Union[pd.DataFrame, List[pd.DataFrame]]:
+        self, matrix: np.ndarray, value_label: str
+    ) -> Union[Matrix, List[Matrix]]:
         # transform a matrix of shape (n_outputs, n_features, n_features)
         # to a data frame
 
@@ -900,13 +902,21 @@ def __feature_matrix_to_df(
 
         # convert array to data frame(s) with features as row and column indices
         if len(matrix) == 1:
-            return pd.DataFrame(
-                data=matrix[0], index=feature_index, columns=feature_index
+            return self.__array_to_matrix(
+                matrix[0],
+                feature_importance=self.feature_importance(),
+                value_label=value_label,
             )
         else:
             return [
-                pd.DataFrame(data=m, index=feature_index, columns=feature_index)
-                for m in matrix
+                self.__array_to_matrix(
+                    m,
+                    feature_importance=feature_importance,
+                    value_label=f"{value_label} ({output_name})",
+                )
+                for m, (_, feature_importance), output_name in zip(
+                    matrix, self.feature_importance().items(), self.output_names_
+                )
             ]
 
     def __feature_affinity_matrix(
@@ -987,7 +997,8 @@ def __linkages_from_affinity_matrices(
 
             return [
                 self.__linkage_tree_from_affinity_matrix_for_output(
-                    feature_affinity_for_output, feature_importance_for_output
+                    feature_affinity_for_output,
+                    feature_importance_for_output,
                 )
                 for feature_affinity_for_output, (
                     _,
@@ -1085,6 +1096,21 @@ def __isolate_single_frame(
                 )
             ]
 
+    @staticmethod
+    def __array_to_matrix(
+        a: np.ndarray,
+        *,
+        feature_importance: pd.Series,
+        value_label: str,
+    ) -> Matrix:
+        return Matrix(
+            a,
+            names=(feature_importance.index, feature_importance.index),
+            weights=(feature_importance, feature_importance),
+            value_label=value_label,
+            name_labels=("feature", "feature"),
+        )
+
     @staticmethod
     def __frame_to_matrix(
         frame: pd.DataFrame,

From bebad34abef0783200dda3abf73c0cb368ffa493 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 6 Oct 2021 11:19:06 +0200
Subject: [PATCH 014/106] REFACTOR: rename __feature_matrix_to_df to
 __arrays_to_matrix

---
 src/facet/inspection/_inspection.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/facet/inspection/_inspection.py b/src/facet/inspection/_inspection.py
index 183ce0ae..a8cde15e 100644
--- a/src/facet/inspection/_inspection.py
+++ b/src/facet/inspection/_inspection.py
@@ -828,7 +828,7 @@ def feature_interaction_matrix(self) -> Union[Matrix, List[Matrix]]:
         )[np.newaxis, :, :]
 
         # create a data frame from the feature matrix
-        return self.__feature_matrix_to_df(
+        return self.__arrays_to_matrix(
             interaction_matrix, value_label="relative shap interaction"
         )
 
@@ -887,7 +887,7 @@ def shap_plot_data(self) -> ShapPlotData:
             sample=sample,
         )
 
-    def __feature_matrix_to_df(
+    def __arrays_to_matrix(
         self, matrix: np.ndarray, value_label: str
     ) -> Union[Matrix, List[Matrix]]:
         # transform a matrix of shape (n_outputs, n_features, n_features)

From c8f048851baea001afe51a29b9b3dbadfcc67409 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 6 Oct 2021 11:40:13 +0200
Subject: [PATCH 015/106] REFACTOR: import scipy modules instead of individual
 functions

---
 src/facet/inspection/_inspection.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/facet/inspection/_inspection.py b/src/facet/inspection/_inspection.py
index a8cde15e..81ba2588 100644
--- a/src/facet/inspection/_inspection.py
+++ b/src/facet/inspection/_inspection.py
@@ -7,8 +7,8 @@
 
 import numpy as np
 import pandas as pd
-from scipy.cluster.hierarchy import leaves_list, linkage, optimal_leaf_ordering
-from scipy.spatial.distance import squareform
+from scipy.cluster import hierarchy
+from scipy.spatial import distance
 
 from pytools.api import AllTracker, inheritdoc
 from pytools.data import LinkageTree, Matrix
@@ -945,7 +945,7 @@ def __sort_affinity_matrices(
 
         return [
             (lambda feature_order: affinity_matrix.iloc[feature_order, feature_order])(
-                feature_order=leaves_list(
+                feature_order=hierarchy.leaves_list(
                     Z=fn_linkage(feature_affinity_matrix=symmetrical_affinity_matrix)
                 )
             )
@@ -1048,11 +1048,11 @@ def __linkage_matrix_from_affinity_matrix_for_output(
         # compress the distance matrix (required by SciPy)
         distance_matrix = 1.0 - abs(feature_affinity_matrix)
         np.fill_diagonal(distance_matrix, 0.0)
-        compressed_distance_matrix: np.ndarray = squareform(distance_matrix)
+        compressed_distance_matrix: np.ndarray = distance.squareform(distance_matrix)
 
         # calculate the linkage matrix
-        leaf_ordering: np.ndarray = optimal_leaf_ordering(
-            Z=linkage(y=compressed_distance_matrix, method="single"),
+        leaf_ordering: np.ndarray = hierarchy.optimal_leaf_ordering(
+            Z=hierarchy.linkage(y=compressed_distance_matrix, method="single"),
             y=compressed_distance_matrix,
         )
 

From 4e04a9ed69760df2bac850ee5a1f50aa78eb6613 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 6 Oct 2021 12:42:19 +0200
Subject: [PATCH 016/106] DOC: update release notes

---
 RELEASE_NOTES.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst
index 5f1c8fa7..c3cb58d9 100644
--- a/RELEASE_NOTES.rst
+++ b/RELEASE_NOTES.rst
@@ -8,6 +8,8 @@ FACET 2.0
 ~~~~~
 
 - API: return :class:`.LearnerInspector` matrix outputs as :class:`.Matrix` instances
+- API: diagonals of feature synergy, redundancy, and association matrices are now
+  ``nan`` instead of 1.0.
 - API: the leaf order of :class:`.LinkageTree` objects generated by
   `feature_…_linkage` methods of :class:`.LearnerInspector` is now the same as the
   row and column order of :class:`.Matrix` objects returned by the corresponding

From df7d7c7eb2b71e71c73c4c2ef85df999ca20dd79 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 6 Oct 2021 12:42:42 +0200
Subject: [PATCH 017/106] DOC: correct typos and glitches in release notes

---
 RELEASE_NOTES.rst | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst
index c3cb58d9..b1e2691b 100644
--- a/RELEASE_NOTES.rst
+++ b/RELEASE_NOTES.rst
@@ -11,9 +11,9 @@ FACET 2.0
 - API: diagonals of feature synergy, redundancy, and association matrices are now
   ``nan`` instead of 1.0.
 - API: the leaf order of :class:`.LinkageTree` objects generated by
-  `feature_…_linkage` methods of :class:`.LearnerInspector` is now the same as the
+  ``feature_…_linkage`` methods of :class:`.LearnerInspector` is now the same as the
   row and column order of :class:`.Matrix` objects returned by the corresponding
-  `feature_…_matrix` methods of :class:`.LearnerInspector`, minimizing the distance
+  ``feature_…_matrix`` methods of :class:`.LearnerInspector`, minimizing the distance
   between adjacent leaves.
   The old sorting behaviour of FACET 1.x can be restored using method
   :meth:`.LinkageTree.sort_by_weight`
@@ -42,7 +42,7 @@ the baseline of a simulation.
 ~~~~~
 
 - BUILD: added support for *sklearndf* 1.2 and *scikit-learn* 0.24
-- API: new optional parameter `subsample` in method 
+- API: new optional parameter ``subsample`` in method
   :meth:`.BaseUnivariateSimulator.simulate_feature` can be used to specify a subsample
   to be used in the simulation (but simulating using a crossfit based on the full
   sample)
@@ -65,7 +65,7 @@ by the :class:`.LearnerInspector`.
 ~~~~~
 
 - API: SHAP interaction vectors can (in part) also be influenced by redundancy among
-  features. This can inflate quantificatios of synergy, especially in cases where two
+  features. This can inflate quantifications of synergy, especially in cases where two
   variables are highly redundant. FACET now corrects interaction vectors for redundancy
   prior to calculating synergy. Technically we ensure that each interaction vector is
   orthogonal w.r.t the main effect vectors of both associated features.
@@ -86,7 +86,8 @@ FACET 1.0
 1.0.3
 ~~~~~
 
-- FIX: restrict package requirements to *gamma-pytools* 1.0.* and *sklearndf* 1.0.x, since FACET 1.0 is not compatible with *gamma-pytools* 1.1.* 
+- FIX: restrict package requirements to *gamma-pytools* 1.0.* and *sklearndf* 1.0.x,
+  since FACET 1.0 is not compatible with *gamma-pytools* 1.1.*
 
 1.0.2
 ~~~~~

From 38484830231d5ca3c90f69cc9a3fdac3efb862e2 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 6 Oct 2021 13:20:21 +0200
Subject: [PATCH 018/106] DOC: tweak release notes

---
 RELEASE_NOTES.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst
index b1e2691b..20652e52 100644
--- a/RELEASE_NOTES.rst
+++ b/RELEASE_NOTES.rst
@@ -9,12 +9,12 @@ FACET 2.0
 
 - API: return :class:`.LearnerInspector` matrix outputs as :class:`.Matrix` instances
 - API: diagonals of feature synergy, redundancy, and association matrices are now
-  ``nan`` instead of 1.0.
+  ``nan`` instead of 1.0
 - API: the leaf order of :class:`.LinkageTree` objects generated by
   ``feature_…_linkage`` methods of :class:`.LearnerInspector` is now the same as the
   row and column order of :class:`.Matrix` objects returned by the corresponding
   ``feature_…_matrix`` methods of :class:`.LearnerInspector`, minimizing the distance
-  between adjacent leaves.
+  between adjacent leaves
   The old sorting behaviour of FACET 1.x can be restored using method
   :meth:`.LinkageTree.sort_by_weight`
 - VIZ: minor tweaks to simulation plots and reports generated by

From b782d7de959443ba2d4788b4d3157b3eca0000eb Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Sun, 17 Oct 2021 14:16:06 +0200
Subject: [PATCH 019/106] API: run LearnerInspector on full sample instead of
 crossfit splits

---
 src/facet/inspection/_inspection.py           | 146 ++----
 src/facet/inspection/_shap.py                 | 261 +++--------
 .../inspection/_shap_global_explanation.py    |  31 +-
 src/facet/inspection/_shap_projection.py      |  53 +--
 test/test/conftest.py                         |  15 +-
 test/test/facet/test_inspection.py            | 434 +++++++-----------
 6 files changed, 315 insertions(+), 625 deletions(-)

diff --git a/src/facet/inspection/_inspection.py b/src/facet/inspection/_inspection.py
index 81ba2588..170b6016 100644
--- a/src/facet/inspection/_inspection.py
+++ b/src/facet/inspection/_inspection.py
@@ -17,7 +17,6 @@
 from sklearndf import ClassifierDF, LearnerDF, RegressorDF
 from sklearndf.pipeline import LearnerPipelineDF
 
-from ..crossfit import LearnerCrossfit
 from ..data import Sample
 from ._explainer import ExplainerFactory, TreeExplainerFactory
 from ._shap import (
@@ -104,7 +103,7 @@ def target(self) -> Union[pd.Series, pd.DataFrame]:
 
 @inheritdoc(match="[see superclass]")
 class LearnerInspector(
-    FittableMixin[LearnerCrossfit], ParallelizableMixin, Generic[T_LearnerPipelineDF]
+    FittableMixin[Sample], ParallelizableMixin, Generic[T_LearnerPipelineDF]
 ):
     """
     Explain regressors and classifiers based on SHAP values.
@@ -159,6 +158,7 @@ class LearnerInspector(
     def __init__(
         self,
         *,
+        pipeline: T_LearnerPipelineDF,
         explainer_factory: Optional[ExplainerFactory] = None,
         shap_interaction: bool = True,
         n_jobs: Optional[int] = None,
@@ -167,6 +167,7 @@ def __init__(
         verbose: Optional[int] = None,
     ) -> None:
         """
+        :param pipeline: the learner pipeline to inspect
         :param explainer_factory: optional function that creates a shap Explainer
             (default: ``TreeExplainerFactory``)
         :param shap_interaction: if ``True``, calculate SHAP interaction values, else
@@ -182,6 +183,15 @@ def __init__(
             verbose=verbose,
         )
 
+        if not pipeline.is_fitted:
+            raise ValueError("arg pipeline must be fitted")
+
+        if not isinstance(pipeline.final_estimator, (ClassifierDF, RegressorDF)):
+            raise TypeError(
+                "learner in arg pipeline must be a classifier or a regressor,"
+                f"but is a {type(pipeline.final_estimator).__name__}"
+            )
+
         if explainer_factory:
             if not explainer_factory.explains_raw_output:
                 raise ValueError(
@@ -200,73 +210,63 @@ def __init__(
                 )
                 shap_interaction = False
 
-        self._explainer_factory = explainer_factory
-        self._shap_interaction = shap_interaction
+        self.pipeline = pipeline
+        self.explainer_factory = explainer_factory
+        self.shap_interaction = shap_interaction
 
-        self._crossfit: Optional[LearnerCrossfit[T_LearnerPipelineDF]] = None
         self._shap_calculator: Optional[ShapCalculator] = None
         self._shap_global_decomposer: Optional[ShapGlobalExplainer] = None
         self._shap_global_projector: Optional[ShapGlobalExplainer] = None
+        self._sample: Optional[Sample] = None
 
     __init__.__doc__ += ParallelizableMixin.__init__.__doc__
 
-    def fit(self: T_Self, crossfit: LearnerCrossfit, **fit_params: Any) -> T_Self:
+    def fit(self: T_Self, sample: Sample, **fit_params: Any) -> T_Self:
         """
         Fit the inspector with the given crossfit.
 
         This will calculate SHAP values and, if enabled in the underlying SHAP
         explainer, also SHAP interaction values.
 
-        :param crossfit: the model crossfit to be explained by this model inspector
+        :param sample: the background sample to be used for the global explanation
+            of this model
         :param fit_params: additional keyword arguments (ignored; accepted for
             compatibility with :class:`.FittableMixin`)
         :return: ``self``
         """
-        # :param full_sample: if ``True``, explain only a single model fitted on the
-        # full sample; otherwise, explain all models in the crossfit and aggregate
-        # results
-        full_sample = bool(fit_params.get("full_sample", False))
 
         self: LearnerInspector  # support type hinting in PyCharm
 
-        if not crossfit.is_fitted:
-            raise ValueError("crossfit in arg pipeline is not fitted")
-
-        learner: LearnerDF = crossfit.pipeline.final_estimator
+        learner: LearnerDF = self.pipeline.final_estimator
 
         if isinstance(learner, ClassifierDF):
-            if isinstance(crossfit.sample_.target_name, list):
+            if isinstance(sample.target_name, list):
                 raise ValueError(
                     "only single-output classifiers (binary or multi-class) are "
                     "supported, but the classifier in the given crossfit has been "
                     "fitted on multiple columns "
-                    f"{crossfit.sample_.target_name}"
+                    f"{sample.target_name}"
                 )
 
             is_classifier = True
 
-        elif isinstance(learner, RegressorDF):
-            is_classifier = False
-
         else:
-            raise TypeError(
-                "learner in given crossfit must be a classifier or a regressor,"
-                f"but is a {type(learner).__name__}"
-            )
+            assert isinstance(learner, RegressorDF)
+            is_classifier = False
 
         shap_global_projector: Union[
             ShapVectorProjector, ShapInteractionVectorProjector, None
         ]
 
-        if self._shap_interaction:
+        if self.shap_interaction:
             shap_calculator_type = (
                 ClassifierShapInteractionValuesCalculator
                 if is_classifier
                 else RegressorShapInteractionValuesCalculator
             )
             shap_calculator = shap_calculator_type(
-                explain_full_sample=full_sample,
-                explainer_factory=self._explainer_factory,
+                pipeline=self.pipeline,
+                explainer_factory=self.explainer_factory,
                 n_jobs=self.n_jobs,
                 shared_memory=self.shared_memory,
                 pre_dispatch=self.pre_dispatch,
@@ -282,8 +282,8 @@ def fit(self: T_Self, crossfit: LearnerCrossfit, **fit_params: Any) -> T_Self:
                 else RegressorShapValuesCalculator
             )
             shap_calculator = shap_calculator_type(
-                explain_full_sample=full_sample,
-                explainer_factory=self._explainer_factory,
+                pipeline=self.pipeline,
+                explainer_factory=self.explainer_factory,
                 n_jobs=self.n_jobs,
                 shared_memory=self.shared_memory,
                 pre_dispatch=self.pre_dispatch,
@@ -292,14 +292,13 @@ def fit(self: T_Self, crossfit: LearnerCrossfit, **fit_params: Any) -> T_Self:
 
             shap_global_projector = ShapVectorProjector()
 
-        shap_calculator.fit(crossfit=crossfit)
+        shap_calculator.fit(sample)
         shap_global_projector.fit(shap_calculator=shap_calculator)
 
+        self._sample = sample
         self._shap_calculator = shap_calculator
         self._shap_global_projector = shap_global_projector
 
-        self._crossfit = crossfit
-
         return self
 
     @property
@@ -309,23 +308,15 @@ def _shap_global_explainer(self) -> ShapGlobalExplainer:
     @property
     def is_fitted(self) -> bool:
         """[see superclass]"""
-        return self._crossfit is not None
-
-    @property
-    def crossfit_(self) -> LearnerCrossfit[T_LearnerPipelineDF]:
-        """
-        The crossfit with which this inspector was fitted.
-        """
-        self._ensure_fitted()
-        return self._crossfit
+        return self._sample is not None
 
     @property
     def sample_(self) -> Sample:
         """
-        The training sample of the crossfit with which this inspector was fitted.
+        The background sample used to fit this inspector.
         """
         self._ensure_fitted()
-        return self._crossfit.sample_
+        return self._sample
 
     @property
     def output_names_(self) -> List[str]:
@@ -352,44 +343,19 @@ def features_(self) -> List[str]:
         """
         return self.crossfit_.pipeline.feature_names_out_.to_list()
 
-    def shap_values(
-        self, aggregation: Optional[str] = AGG_MEAN
-    ) -> Union[pd.DataFrame, List[pd.DataFrame]]:
+    def shap_values(self) -> Union[pd.DataFrame, List[pd.DataFrame]]:
         """
         Calculate the SHAP values for all observations and features.
 
         Returns a data frame of SHAP values where each row corresponds to an
         observation, and each column corresponds to a feature.
 
-        By default, one SHAP value is returned for each observation and feature; this
-        value is calculated as the mean SHAP value across all crossfits.
-
-        The ``aggregation`` argument can be used to disable or change the aggregation
-        of SHAP values:
-
-        - passing ``aggregation=None`` will disable SHAP value aggregation,
-          generating one row for every crossfit and observation (identified by
-          a hierarchical index with two levels)
-        - passing ``aggregation="mean"`` (the default) will calculate the mean SHAP
-          values across all crossfits
-        - passing ``aggregation="std"`` will calculate the standard deviation of SHAP
-          values across all crossfits, as the basis for determining the uncertainty
-          of SHAP calculations
-
-        :param aggregation: aggregation SHAP values across splits;
-            permissible values are ``"mean"`` (calculate the mean), ``"std"``
-            (calculate the standard deviation), or ``None`` to prevent aggregation
-            (default: ``"mean"``)
         :return: a data frame with SHAP values
         """
         self._ensure_fitted()
-        return self.__split_multi_output_df(
-            self._shap_calculator.get_shap_values(aggregation=aggregation)
-        )
+        return self.__split_multi_output_df(self._shap_calculator.get_shap_values())
 
-    def shap_interaction_values(
-        self, aggregation: Optional[str] = AGG_MEAN
-    ) -> Union[pd.DataFrame, List[pd.DataFrame]]:
+    def shap_interaction_values(self) -> Union[pd.DataFrame, List[pd.DataFrame]]:
         """
         Calculate the SHAP interaction values for all observations and pairs of
         features.
@@ -398,33 +364,11 @@ def shap_interaction_values(
         observation and a feature (identified by a hierarchical index with two levels),
         and each column corresponds to a feature.
 
-        By default, one SHAP interaction value is returned for each observation and
-        feature pairing; this value is calculated as the mean SHAP interaction value
-        across all crossfits.
-
-        The ``aggregation`` argument can be used to disable or change the aggregation
-        of SHAP interaction values:
-
-        - passing ``aggregation=None`` will disable SHAP interaction value
-          aggregation, generating one row for every crossfit, observation and
-          feature (identified by a hierarchical index with three levels)
-        - passing ``aggregation="mean"`` (the default) will calculate the mean SHAP
-          interaction values across all crossfits
-        - passing ``aggregation="std"`` will calculate the standard deviation of SHAP
-          interaction values across all crossfits, as the basis for determining the
-          uncertainty of SHAP calculations
-
-        :param aggregation: aggregate SHAP interaction values across splits;
-            permissible values are ``"mean"`` (calculate the mean), ``"std"``
-            (calculate the standard deviation), or ``None`` to prevent aggregation
-            (default: ``"mean"``)
         :return: a data frame with SHAP interaction values
         """
         self._ensure_fitted()
         return self.__split_multi_output_df(
-            self.__shap_interaction_values_calculator.get_shap_interaction_values(
-                aggregation=aggregation
-            )
+            self.__shap_interaction_values_calculator.get_shap_interaction_values()
         )
 
     def feature_importance(
@@ -451,9 +395,7 @@ def feature_importance(
         if method not in methods:
             raise ValueError(f'arg method="{method}" must be one of {methods}')
 
-        shap_matrix: pd.DataFrame = self._shap_calculator.get_shap_values(
-            aggregation="mean"
-        )
+        shap_matrix: pd.DataFrame = self._shap_calculator.get_shap_values()
         weight: Optional[pd.Series] = self.sample_.weight
 
         abs_importance: pd.Series
@@ -788,7 +730,7 @@ def feature_interaction_matrix(self) -> Union[Matrix, List[Matrix]]:
         # (n_observations, n_outputs, n_features, n_features)
         # where the innermost feature x feature arrays are symmetrical
         im_matrix_per_observation_and_output = (
-            self.shap_interaction_values(aggregation=None)
+            self.shap_interaction_values()
             .values.reshape((-1, n_features, n_outputs, n_features))
             .swapaxes(1, 2)
         )
@@ -863,9 +805,7 @@ def shap_plot_data(self) -> ShapPlotData:
         :return: consolidated SHAP and feature values for use shap plots
         """
 
-        shap_values: Union[pd.DataFrame, List[pd.DataFrame]] = self.shap_values(
-            aggregation="mean"
-        )
+        shap_values: Union[pd.DataFrame, List[pd.DataFrame]] = self.shap_values()
 
         output_names: List[str] = self.output_names_
         shap_values_numpy: Union[np.ndarray, List[np.ndarray]]
@@ -880,7 +820,7 @@ def shap_plot_data(self) -> ShapPlotData:
             shap_values_numpy = shap_values.values
             included_observations = shap_values.index
 
-        sample: Sample = self.crossfit_.sample_.subsample(loc=included_observations)
+        sample: Sample = self.sample_.subsample(loc=included_observations)
 
         return ShapPlotData(
             shap_values=shap_values_numpy,
@@ -1062,7 +1002,7 @@ def __linkage_matrix_from_affinity_matrix_for_output(
         return leaf_ordering
 
     def _ensure_shap_interaction(self) -> None:
-        if not self._shap_interaction:
+        if not self.shap_interaction:
             raise RuntimeError(
                 "SHAP interaction values have not been calculated. "
                 "Create an inspector with parameter 'shap_interaction=True' to "
diff --git a/src/facet/inspection/_shap.py b/src/facet/inspection/_shap.py
index 5f7c5b89..2c1a09dc 100644
--- a/src/facet/inspection/_shap.py
+++ b/src/facet/inspection/_shap.py
@@ -11,14 +11,13 @@
 
 from pytools.api import AllTracker, inheritdoc
 from pytools.fit import FittableMixin
-from pytools.parallelization import Job, JobRunner, ParallelizableMixin
+from pytools.parallelization import ParallelizableMixin
 from sklearndf.pipeline import (
     ClassifierPipelineDF,
     LearnerPipelineDF,
     RegressorPipelineDF,
 )
 
-from ..crossfit import LearnerCrossfit
 from ..data import Sample
 from ._explainer import BaseExplainer, ExplainerFactory
 
@@ -66,7 +65,7 @@
 
 @inheritdoc(match="[see superclass]")
 class ShapCalculator(
-    FittableMixin[LearnerCrossfit[T_LearnerPipelineDF]],
+    FittableMixin[Sample],
     ParallelizableMixin,
     Generic[T_LearnerPipelineDF],
     metaclass=ABCMeta,
@@ -92,44 +91,37 @@ class ShapCalculator(
 
     def __init__(
         self,
+        pipeline: T_LearnerPipelineDF,
         explainer_factory: ExplainerFactory,
         *,
-        explain_full_sample: bool,
         n_jobs: Optional[int] = None,
         shared_memory: Optional[bool] = None,
         pre_dispatch: Optional[Union[str, int]] = None,
         verbose: Optional[int] = None,
     ) -> None:
-        """
-        :param explain_full_sample: if ``True``, calculate SHAP values for full sample,
-            otherwise only use OOB sample for each crossfit
-        """
         super().__init__(
             n_jobs=n_jobs,
             shared_memory=shared_memory,
             pre_dispatch=pre_dispatch,
             verbose=verbose,
         )
-        self.explain_full_sample = explain_full_sample
+        self.pipeline = pipeline
         self._explainer_factory = explainer_factory
         self.shap_: Optional[pd.DataFrame] = None
         self.feature_index_: Optional[pd.Index] = None
         self.output_names_: Optional[List[str]] = None
         self.sample_: Optional[Sample] = None
-        self.n_splits_: Optional[int] = None
 
     @property
     def is_fitted(self) -> bool:
         """[see superclass]"""
         return self.shap_ is not None
 
-    def fit(
-        self: T_Self, crossfit: LearnerCrossfit[T_LearnerPipelineDF], **fit_params
-    ) -> T_Self:
+    def fit(self: T_Self, sample: Sample, **fit_params) -> T_Self:
         """
         Calculate the SHAP values.
 
-        :param crossfit: the learner crossfit for which to calculate SHAP values
+        :param sample: the observations for which to calculate SHAP values
         :param fit_params: additional fit parameters (unused)
         :return: self
         """
@@ -140,50 +132,49 @@ def fit(
         # reset fit in case we get an exception along the way
         self.shap_ = None
 
-        training_sample = crossfit.sample_
-        self.feature_index_ = crossfit.pipeline.feature_names_out_.rename(
+        self.feature_index_ = self.pipeline.feature_names_out_.rename(
             Sample.IDX_FEATURE
         )
-        self.output_names_ = self._get_output_names(crossfit=crossfit)
-        self.sample_ = training_sample
+        self.output_names_ = self._get_output_names(sample)
+        self.sample_ = sample
 
         # calculate shap values and re-order the observation index to match the
         # sequence in the original training sample
-        shap_all_splits_df: pd.DataFrame = self._get_shap_all_splits(crossfit=crossfit)
-
-        assert 2 <= shap_all_splits_df.index.nlevels <= 3
-        assert shap_all_splits_df.index.names[1] == training_sample.index.name
-
-        self.shap_ = shap_all_splits_df.reindex(
-            index=training_sample.index.intersection(
-                cast(pd.MultiIndex, shap_all_splits_df.index).levels[1], sort=False
+        shap_df: pd.DataFrame = self._get_shap(sample)
+
+        n_levels = shap_df.index.nlevels
+        assert 1 <= n_levels <= 2
+        assert shap_df.index.names[0] == sample.index.name
+
+        self.shap_ = shap_df.reindex(
+            index=sample.index.intersection(
+                (
+                    shap_df.index
+                    if n_levels == 1
+                    else cast(pd.MultiIndex, shap_df.index).levels[0]
+                ),
+                sort=False,
             ),
-            level=1,
+            level=0,
             copy=False,
         )
 
-        self.n_splits_ = 1 if self.explain_full_sample else crossfit.n_splits_
-
         return self
 
     @abstractmethod
-    def get_shap_values(self, aggregation: Optional[str]) -> pd.DataFrame:
+    def get_shap_values(self) -> pd.DataFrame:
         """
-        The resulting aggregated shap values as a data frame,
-        aggregated to averaged SHAP contributions per feature and observation.
+        The resulting shap values, per observation and feature, as a data frame.
 
-        :param aggregation: aggregation method, or ``None`` for no aggregation
         :return: SHAP contribution values with shape
             (n_observations, n_outputs * n_features)
         """
 
     @abstractmethod
-    def get_shap_interaction_values(self, aggregation: Optional[str]) -> pd.DataFrame:
+    def get_shap_interaction_values(self) -> pd.DataFrame:
         """
-        The resulting aggregated shap interaction values as a data frame,
-        aggregated to averaged SHAP interaction values per observation.
+        Get the resulting shap interaction values as a data frame.
 
-        :param aggregation: aggregation method, or ``None`` for no aggregation
         :return: SHAP contribution values with shape
             (n_observations * n_features, n_outputs * n_features)
         :raise TypeError: this SHAP calculator does not support interaction values
@@ -202,12 +193,9 @@ def _get_multi_output_names(
     ) -> List[str]:
         pass
 
-    def _get_shap_all_splits(
-        self, crossfit: LearnerCrossfit[T_LearnerPipelineDF]
-    ) -> pd.DataFrame:
-        crossfit: LearnerCrossfit[LearnerPipelineDF]
+    def _get_shap(self, sample: Sample) -> pd.DataFrame:
 
-        sample = crossfit.sample_
+        pipeline = self.pipeline
 
         # prepare the background dataset
 
@@ -215,7 +203,6 @@ def _get_shap_all_splits(
 
         if self._explainer_factory.uses_background_dataset:
             background_dataset = sample.features
-            pipeline = crossfit.pipeline
             if pipeline.preprocessing:
                 background_dataset = pipeline.preprocessing.transform(
                     X=background_dataset
@@ -253,89 +240,29 @@ def _make_explainer(_model: T_LearnerPipelineDF) -> BaseExplainer:
                 ),
             )
 
-        shap_df_per_split: List[pd.DataFrame]
-
-        if self.explain_full_sample:
-            # we explain the full sample using the model fitted on the full sample
-            # so the result is a list with a single data frame of shap values
-            model = crossfit.pipeline
-            shap_df_per_split = [
-                self._get_shap_for_split(
-                    model=model,
-                    sample=sample,
-                    explainer=_make_explainer(model),
-                    features_out=self.feature_index_,
-                    shap_matrix_for_split_to_df_fn=self._convert_raw_shap_to_df,
-                    multi_output_type=self.get_multi_output_type(),
-                    multi_output_names=self._get_multi_output_names(
-                        model=model, sample=sample
-                    ),
-                )
-            ]
-
-        else:
-            shap_df_per_split = JobRunner.from_parallelizable(self).run_jobs(
-                Job.delayed(self._get_shap_for_split)(
-                    model,
-                    sample,
-                    _make_explainer(model),
-                    self.feature_index_,
-                    self._convert_raw_shap_to_df,
-                    self.get_multi_output_type(),
-                    self._get_multi_output_names(model=model, sample=sample),
-                )
-                for model, sample in zip(
-                    crossfit.models(),
-                    (
-                        sample.subsample(iloc=oob_split)
-                        for _, oob_split in crossfit.splits()
-                    ),
-                )
-            )
-
-        return self._concatenate_splits(shap_df_per_split=shap_df_per_split)
-
-    @abstractmethod
-    def _concatenate_splits(
-        self, shap_df_per_split: List[pd.DataFrame]
-    ) -> pd.DataFrame:
-        pass
-
-    @staticmethod
-    def _aggregate_splits(
-        shap_all_splits_df: pd.DataFrame, method: Optional[str]
-    ) -> pd.DataFrame:
-        # Group SHAP values by observation ID, aggregate SHAP values using mean or std,
-        # then restore the original order of observations
-
-        if method is None:
-            return shap_all_splits_df
-
-        index = shap_all_splits_df.index
-        n_levels = index.nlevels
-
-        assert n_levels > 1
-        assert index.names[0] == ShapCalculator.IDX_SPLIT
-
-        level = 1 if n_levels == 2 else tuple(range(1, n_levels))
-
-        if method == ShapCalculator.AGG_MEAN:
-            shap_aggregated = shap_all_splits_df.groupby(level=level).mean()
-        elif method == ShapCalculator.AGG_STD:
-            shap_aggregated = shap_all_splits_df.groupby(level=level).std()
-        else:
-            raise ValueError(f"unknown aggregation method: {method}")
-
-        return shap_aggregated
+        # we explain the full sample using the model fitted on the full sample
+        # so the result is a list with a single data frame of shap values
+        return self._calculate_shap(
+            model=pipeline,
+            sample=sample,
+            explainer=_make_explainer(pipeline),
+            features_out=self.feature_index_,
+            shap_matrix_to_df_fn=self._convert_raw_shap_to_df,
+            multi_output_type=self.get_multi_output_type(),
+            multi_output_names=self._get_multi_output_names(
+                model=pipeline, sample=sample
+            ),
+        )
 
     @staticmethod
     @abstractmethod
-    def _get_shap_for_split(
+    def _calculate_shap(
+        *,
         model: LearnerPipelineDF,
         sample: Sample,
         explainer: BaseExplainer,
         features_out: pd.Index,
-        shap_matrix_for_split_to_df_fn: ShapToDataFrameFunction,
+        shap_matrix_to_df_fn: ShapToDataFrameFunction,
         multi_output_type: str,
         multi_output_names: Sequence[str],
     ) -> pd.DataFrame:
@@ -418,9 +345,8 @@ def _convert_raw_shap_to_df(
         """
         pass
 
-    @staticmethod
     @abstractmethod
-    def _get_output_names(crossfit: LearnerCrossfit[T_LearnerPipelineDF]) -> List[str]:
+    def _get_output_names(self, sample: Sample) -> List[str]:
         pass
 
 
@@ -432,20 +358,17 @@ class ShapValuesCalculator(
     Base class for calculating SHAP contribution values.
     """
 
-    def get_shap_values(self, aggregation: Optional[str]) -> pd.DataFrame:
+    def get_shap_values(self) -> pd.DataFrame:
         """[see superclass]"""
         self._ensure_fitted()
-        return ShapCalculator._aggregate_splits(
-            shap_all_splits_df=self.shap_, method=aggregation
-        )
+        return self.shap_
 
-    def get_shap_interaction_values(self, aggregation: Optional[str]) -> pd.DataFrame:
+    def get_shap_interaction_values(self) -> pd.DataFrame:
         """
         Not implemented.
 
-        :param aggregation: (ignored)
-        :return: (never returns)
-        :raise TypeError: always raises this - SHAP interaction values are not supported
+        :return: (never returns anything)
+        :raise TypeError: SHAP interaction values are not supported - always raised
         """
         raise TypeError(
             f"{type(self).__name__}"
@@ -454,12 +377,13 @@ def get_shap_interaction_values(self, aggregation: Optional[str]) -> pd.DataFram
         )
 
     @staticmethod
-    def _get_shap_for_split(
+    def _calculate_shap(
+        *,
         model: LearnerPipelineDF,
         sample: Sample,
         explainer: BaseExplainer,
         features_out: pd.Index,
-        shap_matrix_for_split_to_df_fn: ShapToDataFrameFunction,
+        shap_matrix_to_df_fn: ShapToDataFrameFunction,
         multi_output_type: str,
         multi_output_names: Sequence[str],
     ) -> pd.DataFrame:
@@ -483,7 +407,7 @@ def _get_shap_for_split(
         # shap_matrix_for_split_to_df_fn)
         shap_values_df_per_output: List[pd.DataFrame] = [
             shap.reindex(columns=features_out, copy=False, fill_value=0.0)
-            for shap in shap_matrix_for_split_to_df_fn(shap_values, x.index, x.columns)
+            for shap in shap_matrix_to_df_fn(shap_values, x.index, x.columns)
         ]
 
         # if we have a single output, return the data frame for that output;
@@ -508,20 +432,15 @@ class ShapInteractionValuesCalculator(
     Base class for calculating SHAP interaction values.
     """
 
-    def get_shap_values(self, aggregation: Optional[str]) -> pd.DataFrame:
+    def get_shap_values(self) -> pd.DataFrame:
         """[see superclass]"""
         self._ensure_fitted()
-        return ShapCalculator._aggregate_splits(
-            shap_all_splits_df=self.shap_.groupby(level=(0, 1)).sum(),
-            method=aggregation,
-        )
+        return self.shap_.groupby(level=0).sum()
 
-    def get_shap_interaction_values(self, aggregation: Optional[str]) -> pd.DataFrame:
+    def get_shap_interaction_values(self) -> pd.DataFrame:
         """[see superclass]"""
         self._ensure_fitted()
-        return ShapCalculator._aggregate_splits(
-            shap_all_splits_df=self.shap_, method=aggregation
-        )
+        return self.shap_
 
     def get_diagonals(self) -> pd.DataFrame:
         """
@@ -554,12 +473,13 @@ def get_diagonals(self) -> pd.DataFrame:
         )
 
     @staticmethod
-    def _get_shap_for_split(
+    def _calculate_shap(
+        *,
         model: LearnerPipelineDF,
         sample: Sample,
         explainer: BaseExplainer,
         features_out: pd.Index,
-        shap_matrix_for_split_to_df_fn: ShapToDataFrameFunction,
+        shap_matrix_to_df_fn: ShapToDataFrameFunction,
         multi_output_type: str,
         multi_output_names: Sequence[str],
     ) -> pd.DataFrame:
@@ -593,9 +513,7 @@ def _get_shap_for_split(
                 copy=False,
                 fill_value=0.0,
             )
-            for im in shap_matrix_for_split_to_df_fn(
-                shap_interaction_tensors, x.index, x.columns
-            )
+            for im in shap_matrix_to_df_fn(shap_interaction_tensors, x.index, x.columns)
         ]
 
         # if we have a single output, use the data frame for that output;
@@ -618,10 +536,9 @@ class RegressorShapCalculator(ShapCalculator[RegressorPipelineDF], metaclass=ABC
     Calculates SHAP (interaction) values for regression models.
     """
 
-    @staticmethod
-    def _get_output_names(crossfit: LearnerCrossfit[RegressorPipelineDF]) -> List[str]:
+    def _get_output_names(self, sample: Sample) -> List[str]:
         # noinspection PyProtectedMember
-        return crossfit.sample_._target_names
+        return sample._target_names
 
     @staticmethod
     def get_multi_output_type() -> str:
@@ -634,15 +551,6 @@ def _get_multi_output_names(
         # noinspection PyProtectedMember
         return sample._target_names
 
-    def _concatenate_splits(
-        self, shap_df_per_split: List[pd.DataFrame]
-    ) -> pd.DataFrame:
-        return pd.concat(
-            shap_df_per_split,
-            keys=range(len(shap_df_per_split)),
-            names=[ShapCalculator.IDX_SPLIT],
-        )
-
 
 class RegressorShapValuesCalculator(
     RegressorShapCalculator, ShapValuesCalculator[RegressorPipelineDF]
@@ -703,14 +611,14 @@ class ClassifierShapCalculator(ShapCalculator[ClassifierPipelineDF], metaclass=A
 
     COL_CLASS = "class"
 
-    @staticmethod
     def _get_output_names(
-        crossfit: LearnerCrossfit[ClassifierPipelineDF],
+        self,
+        sample: Sample,
     ) -> Sequence[str]:
         assert not isinstance(
-            crossfit.sample_.target_name, list
+            sample.target_name, list
         ), "classification model is single-output"
-        classifier_df = crossfit.pipeline.final_estimator
+        classifier_df = self.pipeline.final_estimator
         assert classifier_df.is_fitted, "classifier used in crossfit must be fitted"
 
         try:
@@ -754,35 +662,6 @@ def _get_multi_output_names(
         # noinspection PyUnresolvedReferences
         return [str(class_) for class_ in root_classifier.classes_]
 
-    def _concatenate_splits(
-        self, shap_df_per_split: List[pd.DataFrame]
-    ) -> pd.DataFrame:
-        output_names = self.output_names_
-
-        split_keys = range(len(shap_df_per_split))
-        if len(output_names) == 1:
-            return pd.concat(
-                shap_df_per_split, keys=split_keys, names=[ShapCalculator.IDX_SPLIT]
-            )
-
-        else:
-            # for multi-class classifiers, ensure that all data frames include
-            # columns for all classes (even if a class was missing in any split)
-
-            columns = pd.MultiIndex.from_product(
-                iterables=[output_names, self.feature_index_],
-                names=[self.get_multi_output_type(), self.feature_index_.name],
-            )
-
-            return pd.concat(
-                [
-                    shap_df.reindex(columns=columns, fill_value=0.0)
-                    for shap_df in shap_df_per_split
-                ],
-                keys=split_keys,
-                names=[ShapCalculator.IDX_SPLIT],
-            )
-
 
 class ClassifierShapValuesCalculator(
     ClassifierShapCalculator, ShapValuesCalculator[ClassifierPipelineDF]
diff --git a/src/facet/inspection/_shap_global_explanation.py b/src/facet/inspection/_shap_global_explanation.py
index ee6903e3..4fecced7 100644
--- a/src/facet/inspection/_shap_global_explanation.py
+++ b/src/facet/inspection/_shap_global_explanation.py
@@ -5,7 +5,7 @@
 """
 import logging
 from abc import ABCMeta, abstractmethod
-from typing import Any, Iterable, List, Optional, TypeVar, Union
+from typing import Any, List, Optional, TypeVar, Union
 
 import numpy as np
 import pandas as pd
@@ -134,23 +134,6 @@ def from_relative_affinity(
             ).reshape((2, 2, *affinity_rel_ij.shape))
         )
 
-    @staticmethod
-    def aggregate(affinity_matrices: Iterable["AffinityMatrix"]) -> "AffinityMatrix":
-        """
-        Aggregate several sets of affinity matrices (obtained from different splits)
-        into one, by calculating the mean and standard deviation for each value in the
-        provided iterable of affinity matrices.
-
-        :param affinity_matrices: sets of affinity matrices to aggregate
-        :return: the aggregated set of affinity matrices
-        """
-        matrix_values = np.stack(
-            tuple(affinity_matrix._matrices for affinity_matrix in affinity_matrices)
-        )
-        return AffinityMatrix(
-            matrices=matrix_values.mean(axis=0), matrices_std=matrix_values.std(axis=0)
-        )
-
     def get_values(
         self, symmetrical: bool, absolute: bool, std: bool
     ) -> Optional[np.ndarray]:
@@ -544,10 +527,8 @@ class ShapValueContext(ShapContext):
     Contextual data for global SHAP calculations based on SHAP values.
     """
 
-    def __init__(self, shap_calculator: ShapCalculator, split_id: int) -> None:
-        shap_values: pd.DataFrame = shap_calculator.get_shap_values(
-            aggregation=None
-        ).xs(split_id, level=0)
+    def __init__(self, shap_calculator: ShapCalculator) -> None:
+        shap_values: pd.DataFrame = shap_calculator.get_shap_values()
 
         def _p_i() -> np.ndarray:
             n_outputs: int = len(shap_calculator.output_names_)
@@ -583,10 +564,8 @@ class ShapInteractionValueContext(ShapContext):
     Contextual data for global SHAP calculations based on SHAP interaction values.
     """
 
-    def __init__(self, shap_calculator: ShapCalculator, split_id: int) -> None:
-        shap_values: pd.DataFrame = shap_calculator.get_shap_interaction_values(
-            aggregation=None
-        ).xs(split_id, level=0)
+    def __init__(self, shap_calculator: ShapCalculator) -> None:
+        shap_values: pd.DataFrame = shap_calculator.get_shap_interaction_values()
 
         n_features: int = len(shap_calculator.feature_index_)
         n_outputs: int = len(shap_calculator.output_names_)
diff --git a/src/facet/inspection/_shap_projection.py b/src/facet/inspection/_shap_projection.py
index beb01708..b75c6eec 100644
--- a/src/facet/inspection/_shap_projection.py
+++ b/src/facet/inspection/_shap_projection.py
@@ -5,7 +5,7 @@
 """
 import logging
 from abc import ABCMeta, abstractmethod
-from typing import Iterable, List, Optional, Tuple, TypeVar
+from typing import Optional, Tuple, TypeVar
 
 import numpy as np
 
@@ -84,11 +84,11 @@ def _reset_fit(self) -> None:
         self.association_ = None
 
     @abstractmethod
-    def _get_context(self, shap_calculator: ShapCalculator) -> List[ShapContext]:
+    def _get_context(self, shap_calculator: ShapCalculator) -> ShapContext:
         pass
 
     @abstractmethod
-    def _calculate(self, contexts: Iterable[ShapContext]) -> AffinityMatrix:
+    def _calculate(self, context: ShapContext) -> AffinityMatrix:
         pass
 
     @staticmethod
@@ -129,17 +129,12 @@ class ShapVectorProjector(ShapProjector):
     onto a feature's main SHAP vector.
     """
 
-    def _get_context(self, shap_calculator: ShapCalculator) -> List[ShapContext]:
-        return [
-            ShapValueContext(shap_calculator=shap_calculator, split_id=split_id)
-            for split_id in range(shap_calculator.n_splits_)
-        ]
+    def _get_context(self, shap_calculator: ShapCalculator) -> ShapContext:
+        return ShapValueContext(shap_calculator=shap_calculator)
 
-    def _calculate(self, contexts: Iterable[ShapContext]) -> None:
+    def _calculate(self, context: ShapContext) -> None:
         # calculate association matrices for each SHAP context, then aggregate
-        self.association_ = AffinityMatrix.aggregate(
-            affinity_matrices=map(self._calculate_association, contexts)
-        )
+        self.association_ = self._calculate_association(context)
 
 
 @inheritdoc(match="""[see superclass]""")
@@ -179,32 +174,20 @@ def redundancy(
             symmetrical=symmetrical, absolute=absolute, std=std
         )
 
-    def _get_context(self, shap_calculator: ShapCalculator) -> List[ShapContext]:
-        return [
-            ShapInteractionValueContext(
-                shap_calculator=shap_calculator, split_id=split_id
-            )
-            for split_id in range(shap_calculator.n_splits_)
-        ]
-
-    def _calculate(self, contexts: Iterable[ShapContext]) -> None:
-        # calculate association, synergy, and redundancy matrices for each SHAP context,
-        # then aggregate each of them
-        self.association_, self.synergy_, self.redundancy_ = map(
-            AffinityMatrix.aggregate,
-            zip(
-                *(
-                    (
-                        self._calculate_association(context=context),
-                        *self._calculate_synergy_redundancy(context=context),
-                    )
-                    for context in contexts
-                )
-            ),
+    def _get_context(self, shap_calculator: ShapCalculator) -> ShapContext:
+        return ShapInteractionValueContext(shap_calculator=shap_calculator)
+
+    def _calculate(self, context: ShapContext) -> None:
+        # calculate association, synergy, and redundancy matrices for the SHAP context
+
+        self.association_ = self._calculate_association(context=context)
+        self.synergy_, self.redundancy_ = self._calculate_synergy_redundancy(
+            context=context
         )
 
+    @staticmethod
     def _calculate_synergy_redundancy(
-        self, context: ShapContext
+        context: ShapContext,
     ) -> Tuple[AffinityMatrix, AffinityMatrix]:
         p_i = context.p_i
         var_p_i = context.var_p_i
diff --git a/test/test/conftest.py b/test/test/conftest.py
index bc584ab6..45b14966 100644
--- a/test/test/conftest.py
+++ b/test/test/conftest.py
@@ -200,15 +200,12 @@ def regressor_inspector(
     best_lgbm_crossfit: LearnerCrossfit[RegressorPipelineDF], n_jobs: int
 ) -> LearnerInspector:
     inspector = LearnerInspector(
+        pipeline=best_lgbm_crossfit.pipeline,
         explainer_factory=TreeExplainerFactory(
             feature_perturbation="tree_path_dependent", use_background_dataset=True
         ),
         n_jobs=n_jobs,
-    ).fit(crossfit=best_lgbm_crossfit)
-
-    # disable legacy calculations; we used them in the constructor so the legacy
-    # SHAP decomposer is created along with the new SHAP vector projector
-    inspector._legacy = False
+    ).fit(sample=best_lgbm_crossfit.sample_)
 
     return inspector
 
@@ -402,9 +399,11 @@ def iris_inspector_multi_class(
     ],
     n_jobs: int,
 ) -> LearnerInspector[ClassifierPipelineDF[RandomForestClassifierDF]]:
-    return LearnerInspector(shap_interaction=True, n_jobs=n_jobs).fit(
-        crossfit=iris_classifier_crossfit_multi_class
-    )
+    return LearnerInspector(
+        pipeline=iris_classifier_crossfit_multi_class.pipeline,
+        shap_interaction=True,
+        n_jobs=n_jobs,
+    ).fit(sample=iris_classifier_crossfit_multi_class.sample_)
 
 
 #
diff --git a/test/test/facet/test_inspection.py b/test/test/facet/test_inspection.py
index 41d09613..00936107 100644
--- a/test/test/facet/test_inspection.py
+++ b/test/test/facet/test_inspection.py
@@ -3,7 +3,7 @@
 """
 import logging
 import warnings
-from typing import List, Sequence, Set, TypeVar
+from typing import List, Optional, Sequence, Set, TypeVar, Union
 
 import numpy as np
 import pandas as pd
@@ -52,74 +52,49 @@ def test_model_inspection(
 ) -> None:
 
     # define checksums for this test
-    expected_scores = [0.418, 0.400, 0.386, 0.385, 0.122] + [
-        0.122,
-        -0.074,
-        -0.074,
-        -0.074,
-        -0.074,
-    ]
-
     log.debug(f"\n{regressor_ranker.summary_report()}")
 
     check_ranking(
         ranking=regressor_ranker.ranking_,
-        expected_scores=expected_scores,
+        expected_scores=(
+            [0.418, 0.400, 0.386, 0.385, 0.122, 0.122, -0.074, -0.074, -0.074, -0.074]
+        ),
         expected_learners=None,
         expected_parameters=None,
     )
 
-    # using an invalid consolidation method raises an exception
-    with pytest.raises(ValueError, match="unknown aggregation method: invalid"):
-        regressor_inspector.shap_values(aggregation="invalid")
-
-    shap_values_raw: pd.DataFrame = regressor_inspector.shap_values(aggregation=None)
-    shap_values_mean = regressor_inspector.shap_values(
-        aggregation=LearnerInspector.AGG_MEAN
-    )
-    shap_values_std = regressor_inspector.shap_values(
-        aggregation=LearnerInspector.AGG_STD
-    )
-
-    # method shap_values without parameter is equal to "mean" consolidation
-    assert_frame_equal(shap_values_mean, regressor_inspector.shap_values())
+    shap_values: pd.DataFrame = regressor_inspector.shap_values()
 
     # the length of rows in shap_values should be equal to the unique observation
     # indices we have had in the predictions_df
-    assert len(shap_values_mean) == len(sample)
+    assert len(shap_values) == len(sample)
 
     # index names
-    assert shap_values_mean.index.names == [Sample.IDX_OBSERVATION]
-    assert shap_values_mean.columns.names == [Sample.IDX_FEATURE]
-    assert shap_values_std.index.names == [Sample.IDX_OBSERVATION]
-    assert shap_values_std.columns.names == [Sample.IDX_FEATURE]
-    assert shap_values_raw.index.names == ["split", "observation"]
-    assert shap_values_raw.columns.names == [Sample.IDX_FEATURE]
+    assert shap_values.index.names == [Sample.IDX_OBSERVATION]
+    assert shap_values.columns.names == [Sample.IDX_FEATURE]
 
     # column index
-    assert set(shap_values_mean.columns) == feature_names
+    assert set(shap_values.columns) == feature_names
 
     # check that the SHAP values add up to the predictions
-    shap_totals_raw = shap_values_raw.sum(axis=1)
+    shap_totals = shap_values.sum(axis=1)
 
-    for split_id, model in enumerate(best_lgbm_crossfit.models()):
-        # for each model in the crossfit, calculate the difference between total
-        # SHAP values and prediction for every observation. This is always the same
-        # constant value, so `mad` (mean absolute deviation) is zero
+    # calculate the difference between total SHAP values and prediction
+    # for every observation. This is always the same constant value,
+    # therefore the mean absolute deviation is zero
 
-        shap_minus_pred = shap_totals_raw.xs(key=split_id) - model.predict(
-            X=sample.features
-        )
-        assert (
-            round(shap_minus_pred.mad(), 12) == 0.0
-        ), f"predictions matching total SHAP for split {split_id}"
+    shap_minus_pred = shap_totals - best_lgbm_crossfit.pipeline.predict(
+        X=sample.features
+    )
+    assert round(shap_minus_pred.mad(), 12) == 0.0, "predictions matching total SHAP"
 
     #  test the ModelInspector with a KernelExplainer:
 
     inspector_2 = LearnerInspector(
+        pipeline=best_lgbm_crossfit.pipeline,
         explainer_factory=KernelExplainerFactory(link="identity", data_size_limit=20),
         n_jobs=n_jobs,
-    ).fit(crossfit=best_lgbm_crossfit)
+    ).fit(sample=best_lgbm_crossfit.sample_)
     inspector_2.shap_values()
 
     linkage_tree = inspector_2.feature_association_linkage()
@@ -149,26 +124,23 @@ def test_model_inspection_classifier_binary(
     iris_sample_binary: Sample, iris_classifier_crossfit_binary, n_jobs: int
 ) -> None:
 
-    model_inspector = LearnerInspector(shap_interaction=False, n_jobs=n_jobs).fit(
-        crossfit=iris_classifier_crossfit_binary
-    )
+    model_inspector = LearnerInspector(
+        pipeline=iris_classifier_crossfit_binary.pipeline,
+        shap_interaction=False,
+        n_jobs=n_jobs,
+    ).fit(sample=iris_classifier_crossfit_binary.sample_)
 
     # calculate the shap value matrix, without any consolidation
-    shap_values = model_inspector.shap_values(aggregation=None)
+    shap_values = model_inspector.shap_values()
 
     # do the shap values add up to predictions minus a constant value?
     _validate_shap_values_against_predictions(
         shap_values=shap_values, crossfit=iris_classifier_crossfit_binary
     )
 
-    shap_matrix_mean = model_inspector.shap_values()
-
-    # is the consolidation correct?
-    assert_frame_equal(shap_matrix_mean, shap_values.groupby(level=1).mean())
-
     # the length of rows in shap_values should be equal to the unique observation
     # indices we have had in the predictions_df
-    assert len(shap_matrix_mean) == len(iris_sample_binary)
+    assert len(shap_values) == len(iris_sample_binary)
 
     # Shap decomposition matrices (feature dependencies)
 
@@ -180,10 +152,10 @@ def test_model_inspection_classifier_binary(
             association_matrix.values,
             np.array(
                 [
-                    [np.nan, 0.692, 0.195, 0.052],
-                    [0.692, np.nan, 0.290, 0.041],
-                    [0.195, 0.290, np.nan, 0.081],
-                    [0.052, 0.041, 0.081, np.nan],
+                    [np.nan, 0.684, 0.368, 0.002],
+                    [0.684, np.nan, 0.442, 0.000],
+                    [0.368, 0.442, np.nan, 0.010],
+                    [0.002, 0.000, 0.010, np.nan],
                 ]
             ),
             atol=0.02,
@@ -224,7 +196,7 @@ def test_model_inspection_classifier_binary_single_shap_output() -> None:
     ).fit(sample_df)
 
     # fit the inspector
-    LearnerInspector(n_jobs=-3).fit(crossfit=crossfit)
+    LearnerInspector(pipeline=crossfit.pipeline, n_jobs=-3).fit(sample=crossfit.sample_)
 
 
 # noinspection DuplicatedCode
@@ -236,23 +208,13 @@ def test_model_inspection_classifier_multi_class(
 ) -> None:
 
     # calculate the shap value matrix, without any consolidation
-    shap_values = iris_inspector_multi_class.shap_values(aggregation=None)
+    shap_values = iris_inspector_multi_class.shap_values()
 
     # do the shap values add up to predictions minus a constant value?
     _validate_shap_values_against_predictions(
         shap_values=shap_values, crossfit=iris_classifier_crossfit_multi_class
     )
 
-    shap_matrix_mean: List[pd.DataFrame] = iris_inspector_multi_class.shap_values()
-
-    for _mean, _raw in zip(shap_matrix_mean, shap_values):
-        # is the consolidation correct?
-        assert_frame_equal(_mean, _raw.groupby(level=1).mean())
-
-        # the length of rows in shap_values should be equal to the unique observation
-        # indices we have had in the predictions_df
-        assert len(_mean) == len(iris_sample)
-
     # Feature importance
 
     feature_importance: pd.DataFrame = iris_inspector_multi_class.feature_importance()
@@ -266,10 +228,10 @@ def test_model_inspection_classifier_multi_class(
         feature_importance.values,
         np.array(
             [
-                [0.125, 0.085, 0.104],
-                [0.020, 0.019, 0.010],
-                [0.424, 0.456, 0.461],
-                [0.432, 0.441, 0.425],
+                [0.122, 0.086, 0.102],
+                [0.020, 0.021, 0.007],
+                [0.433, 0.465, 0.481],
+                [0.424, 0.428, 0.410],
             ]
         ),
         atol=0.02,
@@ -286,14 +248,14 @@ def test_model_inspection_classifier_multi_class(
             np.hstack([m.values for m in synergy_matrix]),
             np.array(
                 [
-                    [np.nan, 0.009, 0.057, 0.055, np.nan, 0.042]
-                    + [0.418, 0.418, np.nan, 0.004, 0.085, 0.097],
-                    [0.101, np.nan, 0.052, 0.072, 0.094, np.nan]
-                    + [0.117, 0.156, 0.090, np.nan, 0.237, 0.258],
-                    [0.003, 0.001, np.nan, 0.002, 0.027, 0.005]
-                    + [np.nan, 0.041, 0.012, 0.004, np.nan, 0.031],
-                    [0.002, 0.000, 0.001, np.nan, 0.029, 0.005]
-                    + [0.043, np.nan, 0.015, 0.005, 0.036, np.nan],
+                    [np.nan, 0.008, 0.032, 0.037, np.nan, 0.002]
+                    + [0.367, 0.343, np.nan, 0.001, 0.081, 0.067],
+                    [0.124, np.nan, 0.042, 0.035, 0.094, np.nan]
+                    + [0.061, 0.055, 0.160, np.nan, 0.643, 0.456],
+                    [0.002, 0.000, np.nan, 0.003, 0.041, 0.008]
+                    + [np.nan, 0.048, 0.015, 0.000, np.nan, 0.034],
+                    [0.002, 0.000, 0.003, np.nan, 0.025, 0.009]
+                    + [0.042, np.nan, 0.008, 0.012, 0.034, np.nan],
                 ]
             ),
             atol=0.02,
@@ -306,14 +268,14 @@ def test_model_inspection_classifier_multi_class(
             np.hstack([m.values for m in redundancy_matrix]),
             np.array(
                 [
-                    [np.nan, 0.087, 0.643, 0.656, np.nan, 0.065]
-                    + [0.265, 0.234, np.nan, 0.034, 0.594, 0.505],
-                    [0.082, np.nan, 0.297, 0.292, 0.064, np.nan]
-                    + [0.117, 0.171, 0.031, np.nan, 0.024, 0.021],
-                    [0.682, 0.314, np.nan, 0.996, 0.471, 0.130]
-                    + [np.nan, 0.743, 0.642, 0.031, np.nan, 0.761],
-                    [0.695, 0.315, 0.997, np.nan, 0.406, 0.194]
-                    + [0.741, np.nan, 0.550, 0.028, 0.756, np.nan],
+                    [np.nan, 0.080, 0.734, 0.721, np.nan, 0.156]
+                    + [0.327, 0.315, np.nan, 0.002, 0.671, 0.610],
+                    [0.071, np.nan, 0.382, 0.388, 0.142, np.nan]
+                    + [0.333, 0.403, 0.002, np.nan, 0.039, 0.021],
+                    [0.757, 0.398, np.nan, 0.995, 0.495, 0.352]
+                    + [np.nan, 0.741, 0.720, 0.109, np.nan, 0.754],
+                    [0.747, 0.402, 0.995, np.nan, 0.468, 0.423]
+                    + [0.746, np.nan, 0.649, 0.038, 0.753, np.nan],
                 ]
             ),
             atol=0.02,
@@ -326,14 +288,14 @@ def test_model_inspection_classifier_multi_class(
             np.hstack([m.values for m in association_matrix]),
             np.array(
                 [
-                    [np.nan, 0.077, 0.662, 0.670, np.nan, 0.046]
-                    + [0.370, 0.334, np.nan, 0.031, 0.634, 0.550],
-                    [0.077, np.nan, 0.301, 0.295, 0.046, np.nan]
-                    + [0.127, 0.173, 0.031, np.nan, 0.025, 0.020],
-                    [0.662, 0.301, np.nan, 0.998, 0.370, 0.127]
-                    + [np.nan, 0.783, 0.634, 0.025, np.nan, 0.790],
-                    [0.670, 0.295, 0.998, np.nan, 0.334, 0.173]
-                    + [0.783, np.nan, 0.550, 0.020, 0.790, np.nan],
+                    [np.nan, 0.087, 0.746, 0.735, np.nan, 0.132]
+                    + [0.466, 0.419, np.nan, 0.003, 0.719, 0.643],
+                    [0.087, np.nan, 0.387, 0.390, 0.132, np.nan]
+                    + [0.357, 0.428, 0.003, np.nan, 0.034, 0.046],
+                    [0.746, 0.387, np.nan, 0.998, 0.466, 0.357]
+                    + [np.nan, 0.788, 0.719, 0.034, np.nan, 0.787],
+                    [0.735, 0.390, 0.998, np.nan, 0.419, 0.428]
+                    + [0.788, np.nan, 0.643, 0.046, 0.787, np.nan],
                 ]
             ),
             atol=0.02,
@@ -359,62 +321,59 @@ def _validate_shap_values_against_predictions(
 
     # calculate the matching predictions, so we can check if the SHAP values add up
     # correctly
-    predicted_probabilities_per_split: List[pd.DataFrame] = [
-        model.predict_proba(crossfit.sample_.features.iloc[test_split, :])
-        for model, (_, test_split) in zip(crossfit.models(), crossfit.splits())
-    ]
-
-    for split, predicted_probabilities in enumerate(predicted_probabilities_per_split):
-
-        assert isinstance(
-            predicted_probabilities, pd.DataFrame
-        ), "predicted probabilities are single-output"
-
-        expected_probability_range = 1 / len(predicted_probabilities.columns)
-
-        def _check_probabilities(
-            _class_probabilities: pd.DataFrame, _shap_for_split_and_class: pd.Series
-        ) -> None:
-            expected_probability = _class_probabilities.join(
-                _shap_for_split_and_class
-            ).sum(axis=1)
-
-            expected_probability_min = expected_probability.min()
-            expected_probability_max = expected_probability.max()
-            assert expected_probability_min == pytest.approx(
-                expected_probability_max
-            ), "expected probability is the same for all explanations"
-            assert (
-                expected_probability_range * 0.6
-                <= expected_probability_min
-                <= expected_probability_range / 0.6
-            ), (
-                "expected class probability is roughly in the range of "
-                f"{expected_probability_range * 100:.0f}%"
-            )
+    predicted_probabilities: pd.DataFrame = crossfit.pipeline.predict_proba(
+        crossfit.sample_.features
+    )
 
-        if predicted_probabilities.shape[1] == 2:
-            # for binary classification we have SHAP values only for the second class
-            _check_probabilities(
-                predicted_probabilities.iloc[:, [1]],
-                -shap_values.xs(split).sum(axis=1).rename("shap"),
-            )
+    assert isinstance(
+        predicted_probabilities, pd.DataFrame
+    ), "predicted probabilities are single-output"
 
-        else:
-            # multi-class classification has outputs for each class
+    expected_probability_range = 1 / len(predicted_probabilities.columns)
 
-            for class_idx, class_name in enumerate(predicted_probabilities.columns):
-                # for each observation and class, we expect to get the constant
-                # expected probability value by deducting the SHAP values for all
-                # features from the predicted probability
+    def _check_probabilities(
+        _class_probabilities: pd.DataFrame, _shap_for_split_and_class: pd.Series
+    ) -> None:
+        expected_probability = _class_probabilities.join(_shap_for_split_and_class).sum(
+            axis=1
+        )
 
-                class_probabilities = predicted_probabilities.loc[:, [class_name]]
+        expected_probability_min = expected_probability.min()
+        expected_probability_max = expected_probability.max()
+        assert expected_probability_min == pytest.approx(
+            expected_probability_max
+        ), "expected probability is the same for all explanations"
+        assert (
+            expected_probability_range * 0.6
+            <= expected_probability_min
+            <= expected_probability_range / 0.6
+        ), (
+            "expected class probability is roughly in the range of "
+            f"{expected_probability_range * 100:.0f}%"
+        )
 
-                shap_for_split_and_class = (
-                    -shap_values[class_idx].xs(split).sum(axis=1).rename("shap")
-                )
+    if predicted_probabilities.shape[1] == 2:
+        # for binary classification we have SHAP values only for the second class
+        _check_probabilities(
+            predicted_probabilities.iloc[:, [1]],
+            -shap_values.sum(axis=1).rename("shap"),
+        )
 
-                _check_probabilities(class_probabilities, shap_for_split_and_class)
+    else:
+        # multi-class classification has outputs for each class
+
+        for class_idx, class_name in enumerate(predicted_probabilities.columns):
+            # for each observation and class, we expect to get the constant
+            # expected probability value by deducting the SHAP values for all
+            # features from the predicted probability
+
+            class_probabilities = predicted_probabilities.loc[:, [class_name]]
+
+            shap_for_split_and_class = (
+                -shap_values[class_idx].sum(axis=1).rename("shap")
+            )
+
+            _check_probabilities(class_probabilities, shap_for_split_and_class)
 
 
 # noinspection DuplicatedCode
@@ -428,30 +387,21 @@ def test_model_inspection_classifier_interaction(
     warnings.filterwarnings("ignore", message="You are accessing a training score")
 
     model_inspector = LearnerInspector(
+        pipeline=iris_classifier_crossfit_binary.pipeline,
         explainer_factory=TreeExplainerFactory(
             feature_perturbation="tree_path_dependent", use_background_dataset=True
         ),
         n_jobs=n_jobs,
-    ).fit(crossfit=iris_classifier_crossfit_binary)
-
-    model_inspector_full_sample = LearnerInspector(
-        explainer_factory=TreeExplainerFactory(
-            feature_perturbation="tree_path_dependent", use_background_dataset=True
-        ),
-        n_jobs=n_jobs,
-    ).fit(crossfit=iris_classifier_crossfit_binary, full_sample=True)
-
-    # disable legacy calculations; we used them in the constructor so the legacy
-    # SHAP decomposer is created along with the new SHAP vector projector
-    model_inspector._legacy = False
+    ).fit(sample=iris_classifier_crossfit_binary.sample_)
 
     model_inspector_no_interaction = LearnerInspector(
+        pipeline=iris_classifier_crossfit_binary.pipeline,
         shap_interaction=False,
         explainer_factory=TreeExplainerFactory(
             feature_perturbation="tree_path_dependent", use_background_dataset=True
         ),
         n_jobs=n_jobs,
-    ).fit(crossfit=iris_classifier_crossfit_binary)
+    ).fit(sample=iris_classifier_crossfit_binary.sample_)
 
     # calculate shap interaction values
     shap_interaction_values = model_inspector.shap_interaction_values()
@@ -483,14 +433,12 @@ def test_model_inspection_classifier_interaction(
 
     # do the shap values add up to predictions minus a constant value?
     _validate_shap_values_against_predictions(
-        shap_values=model_inspector.shap_interaction_values(aggregation=None)
-        .groupby(level=[0, 1])
-        .sum(),
+        shap_values=model_inspector.shap_interaction_values().groupby(level=0).sum(),
         crossfit=iris_classifier_crossfit_binary,
     )
 
     assert model_inspector.feature_importance().values == pytest.approx(
-        np.array([0.063, 0.013, 0.492, 0.431]), abs=0.02
+        np.array([0.054, 0.019, 0.451, 0.477]), abs=0.02
     )
 
     try:
@@ -524,19 +472,19 @@ def test_model_inspection_classifier_interaction(
             atol=0.02,
         )
 
-        synergy_matrix = model_inspector.feature_synergy_matrix(clustered=True)
         assert_allclose(
-            synergy_matrix.values,
+            model_inspector.feature_synergy_matrix(clustered=True).values,
             np.array(
                 [
-                    [np.nan, 0.000, 0.001, 0.004],
-                    [0.149, np.nan, 0.045, 0.157],
-                    [0.040, 0.004, np.nan, 0.044],
-                    [0.003, 0.001, 0.001, np.nan],
+                    [np.nan, 0.000, 0.000, 0.001],
+                    [0.386, np.nan, 0.108, 0.314],
+                    [0.005, 0.002, np.nan, 0.059],
+                    [0.002, 0.000, 0.001, np.nan],
                 ]
             ),
             atol=0.02,
         )
+
         assert_allclose(
             model_inspector.feature_synergy_matrix(absolute=True).values,
             np.array(
@@ -549,30 +497,17 @@ def test_model_inspection_classifier_interaction(
             ),
             atol=0.02,
         )
-        assert_allclose(
-            model_inspector_full_sample.feature_synergy_matrix(clustered=True).values,
-            np.array(
-                [
-                    [np.nan, 0.000, 0.000, 0.001],
-                    [0.386, np.nan, 0.108, 0.314],
-                    [0.005, 0.002, np.nan, 0.059],
-                    [0.002, 0.000, 0.001, np.nan],
-                ]
-            ),
-            atol=0.02,
-        )
 
-        redundancy_matrix = model_inspector.feature_redundancy_matrix(
-            clustered=False, symmetrical=True
-        )
         assert_allclose(
-            redundancy_matrix.values,
+            model_inspector.feature_redundancy_matrix(
+                clustered=False, symmetrical=True
+            ).values,
             np.array(
                 [
-                    [np.nan, 0.080, 0.316, 0.208],
-                    [0.080, np.nan, 0.036, 0.044],
-                    [0.316, 0.036, np.nan, 0.691],
-                    [0.208, 0.044, 0.691, np.nan],
+                    [np.nan, 0.013, 0.462, 0.383],
+                    [0.013, np.nan, 0.000, 0.003],
+                    [0.462, 0.000, np.nan, 0.677],
+                    [0.383, 0.003, 0.677, np.nan],
                 ]
             ),
             atol=0.02,
@@ -583,51 +518,36 @@ def test_model_inspection_classifier_interaction(
             ).values,
             np.array(
                 [
-                    [np.nan, 0.316, 0.052, 0.010],
-                    [0.316, np.nan, 0.087, 0.009],
-                    [0.052, 0.087, np.nan, 0.004],
-                    [0.010, 0.009, 0.004, np.nan],
+                    [np.nan, 0.314, 0.102, 0.001],
+                    [0.314, np.nan, 0.116, 0.000],
+                    [0.102, 0.116, np.nan, 0.000],
+                    [0.001, 0.000, 0.000, np.nan],
                 ]
             ),
             atol=0.02,
         )
 
-        redundancy_matrix = model_inspector.feature_redundancy_matrix(clustered=True)
         assert_allclose(
-            redundancy_matrix.values,
+            model_inspector.feature_redundancy_matrix(clustered=True).values,
             np.array(
                 [
-                    [np.nan, 0.691, 0.209, 0.045],
-                    [0.692, np.nan, 0.317, 0.037],
-                    [0.201, 0.303, np.nan, 0.081],
-                    [0.040, 0.031, 0.076, np.nan],
-                ]
-            ),
-            atol=0.02,
-        )
-        assert_allclose(
-            model_inspector.feature_redundancy_matrix(absolute=True).values,
-            np.array(
-                [
-                    [np.nan, 0.294, 0.092, 0.020],
-                    [0.337, np.nan, 0.154, 0.017],
-                    [0.013, 0.020, np.nan, 0.006],
-                    [0.001, 0.001, 0.001, np.nan],
+                    [np.nan, 0.677, 0.384, 0.003],
+                    [0.676, np.nan, 0.465, 0.000],
+                    [0.382, 0.438, np.nan, 0.013],
+                    [0.002, 0.000, 0.012, np.nan],
                 ]
             ),
             atol=0.02,
         )
 
         assert_allclose(
-            model_inspector_full_sample.feature_redundancy_matrix(
-                clustered=True
-            ).values,
+            model_inspector.feature_redundancy_matrix(absolute=True).values,
             np.array(
                 [
-                    [np.nan, 0.677, 0.384, 0.003],
-                    [0.676, np.nan, 0.465, 0.000],
-                    [0.382, 0.438, np.nan, 0.013],
-                    [0.002, 0.000, 0.012, np.nan],
+                    [np.nan, 0.323, 0.183, 0.002],
+                    [0.305, np.nan, 0.209, 0.000],
+                    [0.021, 0.024, np.nan, 0.001],
+                    [0.000, 0.000, 0.000, np.nan],
                 ]
             ),
             atol=0.02,
@@ -640,65 +560,51 @@ def test_model_inspection_classifier_interaction(
             association_matrix.values,
             np.array(
                 [
-                    [np.nan, 0.074, 0.309, 0.205],
-                    [0.074, np.nan, 0.030, 0.040],
-                    [0.309, 0.030, np.nan, 0.694],
-                    [0.205, 0.040, 0.694, np.nan],
+                    [np.nan, 0.009, 0.447, 0.383],
+                    [0.009, np.nan, 0.000, 0.001],
+                    [0.447, 0.000, np.nan, 0.678],
+                    [0.383, 0.001, 0.678, np.nan],
                 ]
             ),
             atol=0.02,
         )
+
         assert_allclose(
             model_inspector.feature_association_matrix(
                 absolute=True, symmetrical=True
             ).values,
             np.array(
                 [
-                    [np.nan, 0.317, 0.051, 0.009],
-                    [0.317, np.nan, 0.085, 0.007],
-                    [0.051, 0.085, np.nan, 0.003],
-                    [0.009, 0.007, 0.003, np.nan],
+                    [np.nan, 0.314, 0.102, 0.000],
+                    [0.314, np.nan, 0.113, 0.000],
+                    [0.102, 0.113, np.nan, 0.000],
+                    [0.000, 0.000, 0.000, np.nan],
                 ]
             ),
             atol=0.02,
         )
 
-        association_matrix = model_inspector.feature_association_matrix(clustered=True)
         assert_allclose(
-            association_matrix.values,
+            model_inspector.feature_association_matrix(clustered=True).values,
             np.array(
                 [
-                    [np.nan, 0.694, 0.205, 0.040],
-                    [0.694, np.nan, 0.309, 0.030],
-                    [0.205, 0.309, np.nan, 0.074],
-                    [0.040, 0.030, 0.074, np.nan],
-                ]
-            ),
-            atol=0.02,
-        )
-        assert_allclose(
-            model_inspector.feature_association_matrix(absolute=True).values,
-            np.array(
-                [
-                    [np.nan, 0.295, 0.090, 0.018],
-                    [0.338, np.nan, 0.150, 0.014],
-                    [0.013, 0.020, np.nan, 0.005],
-                    [0.001, 0.001, 0.001, np.nan],
+                    [np.nan, 0.678, 0.383, 0.001],
+                    [0.678, np.nan, 0.447, 0.000],
+                    [0.383, 0.447, np.nan, 0.009],
+                    [0.001, 0.000, 0.009, np.nan],
                 ]
             ),
             atol=0.02,
         )
 
         assert_allclose(
-            model_inspector_full_sample.feature_association_matrix(
-                clustered=True
-            ).values,
+            model_inspector.feature_association_matrix(absolute=True).values,
             np.array(
                 [
-                    [np.nan, 0.678, 0.383, 0.001],
-                    [0.678, np.nan, 0.447, 0.000],
-                    [0.383, 0.447, np.nan, 0.009],
-                    [0.001, 0.000, 0.009, np.nan],
+                    [np.nan, 0.323, 0.182, 0.001],
+                    [0.305, np.nan, 0.201, 0.000],
+                    [0.021, 0.024, np.nan, 0.000],
+                    [0.000, 0.000, 0.000, np.nan],
                 ]
             ),
             atol=0.02,
@@ -735,9 +641,9 @@ def test_model_inspection_classifier_interaction_dual_target(
             f"{iris_target_name}.*{iris_target_name}2"
         ),
     ):
-        LearnerInspector(n_jobs=n_jobs).fit(
-            crossfit=iris_classifier_crossfit_dual_target
-        )
+        LearnerInspector(
+            pipeline=iris_classifier_crossfit_dual_target.pipeline, n_jobs=n_jobs
+        ).fit(sample=iris_classifier_crossfit_dual_target.sample_)
 
 
 def test_shap_plot_data(
@@ -769,17 +675,21 @@ def test_shap_plot_data(
 #
 
 
-def print_expected_matrix(error: AssertionError, split: bool = False):
+def print_expected_matrix(error: AssertionError, *, split: bool = False):
     # print expected output for copy/paste into assertion statement
 
     import re
 
-    matrix: List[List[float]] = eval(
-        re.search(r"array\(([^)]+)\)", error.args[0])[1]
-        .replace(r"\n", "\n")
-        .replace("nan", "np.nan")
-    )
+    array: Optional[re.Match] = re.search(r"array\(([^)]+)\)", error.args[0])
+    if array is not None:
+        matrix: List[List[float]] = eval(
+            array[1].replace(r"\n", "\n").replace("nan", "np.nan")
+        )
+
+        print_matrix(matrix, split=split)
+
 
+def print_matrix(matrix: Union[List[List[float]], np.ndarray], *, split: bool):
     print("==== matrix assertion failed ====\nExpected Matrix:")
     print("[")
     for row in matrix:

From 564227dfd27356d1b5693c168a8671cfbc3ea129 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Sun, 17 Oct 2021 14:41:36 +0200
Subject: [PATCH 020/106] REFACTOR: simplify signature of
 ShapCalculator._calculate_shap()

---
 src/facet/inspection/_shap.py | 114 ++++++++++++----------------------
 1 file changed, 41 insertions(+), 73 deletions(-)

diff --git a/src/facet/inspection/_shap.py b/src/facet/inspection/_shap.py
index 2c1a09dc..06f0cef6 100644
--- a/src/facet/inspection/_shap.py
+++ b/src/facet/inspection/_shap.py
@@ -188,9 +188,10 @@ def get_multi_output_type() -> str:
         """
 
     @abstractmethod
-    def _get_multi_output_names(
-        self, model: T_LearnerPipelineDF, sample: Sample
-    ) -> List[str]:
+    def get_multi_output_names(self, sample: Sample) -> List[str]:
+        """
+        :return: a name for each of the outputs
+        """
         pass
 
     def _get_shap(self, sample: Sample) -> pd.DataFrame:
@@ -242,36 +243,18 @@ def _make_explainer(_model: T_LearnerPipelineDF) -> BaseExplainer:
 
         # we explain the full sample using the model fitted on the full sample
         # so the result is a list with a single data frame of shap values
-        return self._calculate_shap(
-            model=pipeline,
-            sample=sample,
-            explainer=_make_explainer(pipeline),
-            features_out=self.feature_index_,
-            shap_matrix_to_df_fn=self._convert_raw_shap_to_df,
-            multi_output_type=self.get_multi_output_type(),
-            multi_output_names=self._get_multi_output_names(
-                model=pipeline, sample=sample
-            ),
-        )
+        return self._calculate_shap(sample=sample, explainer=_make_explainer(pipeline))
 
-    @staticmethod
     @abstractmethod
     def _calculate_shap(
-        *,
-        model: LearnerPipelineDF,
-        sample: Sample,
-        explainer: BaseExplainer,
-        features_out: pd.Index,
-        shap_matrix_to_df_fn: ShapToDataFrameFunction,
-        multi_output_type: str,
-        multi_output_names: Sequence[str],
+        self, *, sample: Sample, explainer: BaseExplainer
     ) -> pd.DataFrame:
         pass
 
-    @staticmethod
     def _convert_shap_tensors_to_list(
+        self,
+        *,
         shap_tensors: Union[np.ndarray, Sequence[np.ndarray]],
-        multi_output_type: str,
         multi_output_names: Sequence[str],
     ):
         def _validate_shap_tensor(_t: np.ndarray) -> None:
@@ -290,11 +273,7 @@ def _validate_shap_tensor(_t: np.ndarray) -> None:
                 _validate_shap_tensor(shap_tensor)
         else:
             _validate_shap_tensor(shap_tensors)
-            if (
-                n_outputs == 2
-                and multi_output_type
-                == ClassifierShapCalculator.get_multi_output_type()
-            ):
+            if n_outputs == 2 and isinstance(self, ClassifierShapCalculator):
                 # if we have a single output *and* binary classification, the explainer
                 # will have returned a single tensor for the positive class;
                 # the SHAP values for the negative class will have the opposite sign
@@ -312,20 +291,22 @@ def _validate_shap_tensor(_t: np.ndarray) -> None:
 
         return shap_tensors
 
-    @staticmethod
-    def _preprocess_features(model: LearnerPipelineDF, sample: Sample) -> pd.DataFrame:
+    def _preprocess_features(self, sample: Sample) -> pd.DataFrame:
         # get the out-of-bag subsample of the training sample, with feature columns
         # in the sequence that was used to fit the learner
 
+        # get the model
+        pipeline = self.pipeline
+
         # get the features of all out-of-bag observations
         x = sample.features
 
         # pre-process the features
-        if model.preprocessing is not None:
-            x = model.preprocessing.transform(x)
+        if pipeline.preprocessing is not None:
+            x = pipeline.preprocessing.transform(x)
 
         # re-index the features to fit the sequence that was used to fit the learner
-        return x.reindex(columns=model.final_estimator.feature_names_in_, copy=False)
+        return x.reindex(columns=pipeline.final_estimator.feature_names_in_, copy=False)
 
     @staticmethod
     @abstractmethod
@@ -376,18 +357,10 @@ def get_shap_interaction_values(self) -> pd.DataFrame:
             "is not defined"
         )
 
-    @staticmethod
     def _calculate_shap(
-        *,
-        model: LearnerPipelineDF,
-        sample: Sample,
-        explainer: BaseExplainer,
-        features_out: pd.Index,
-        shap_matrix_to_df_fn: ShapToDataFrameFunction,
-        multi_output_type: str,
-        multi_output_names: Sequence[str],
+        self, *, sample: Sample, explainer: BaseExplainer
     ) -> pd.DataFrame:
-        x = ShapCalculator._preprocess_features(model=model, sample=sample)
+        x = self._preprocess_features(sample=sample)
 
         if x.isna().values.any():
             log.warning(
@@ -395,11 +368,13 @@ def _calculate_shap(
                 "try to change preprocessing to impute all NaN values"
             )
 
+        multi_output_type = self.get_multi_output_type()
+        multi_output_names = self.get_multi_output_names(sample=sample)
+        features_out = self.feature_index_
+
         # calculate the shap values, and ensure the result is a list of arrays
-        shap_values: List[np.ndarray] = ShapCalculator._convert_shap_tensors_to_list(
-            shap_tensors=explainer.shap_values(x),
-            multi_output_type=multi_output_type,
-            multi_output_names=multi_output_names,
+        shap_values: List[np.ndarray] = self._convert_shap_tensors_to_list(
+            shap_tensors=explainer.shap_values(x), multi_output_names=multi_output_names
         )
 
         # convert to a data frame per output (different logic depending on whether
@@ -407,7 +382,7 @@ def _calculate_shap(
         # shap_matrix_for_split_to_df_fn)
         shap_values_df_per_output: List[pd.DataFrame] = [
             shap.reindex(columns=features_out, copy=False, fill_value=0.0)
-            for shap in shap_matrix_to_df_fn(shap_values, x.index, x.columns)
+            for shap in self._convert_raw_shap_to_df(shap_values, x.index, x.columns)
         ]
 
         # if we have a single output, return the data frame for that output;
@@ -472,18 +447,10 @@ def get_diagonals(self) -> pd.DataFrame:
             columns=interaction_matrix.columns,
         )
 
-    @staticmethod
     def _calculate_shap(
-        *,
-        model: LearnerPipelineDF,
-        sample: Sample,
-        explainer: BaseExplainer,
-        features_out: pd.Index,
-        shap_matrix_to_df_fn: ShapToDataFrameFunction,
-        multi_output_type: str,
-        multi_output_names: Sequence[str],
+        self, *, sample: Sample, explainer: BaseExplainer
     ) -> pd.DataFrame:
-        x = ShapCalculator._preprocess_features(model=model, sample=sample)
+        x = self._preprocess_features(sample=sample)
 
         # calculate the im values (returned as an array)
         try:
@@ -494,12 +461,13 @@ def _calculate_shap(
                 "Explainer does not implement method shap_interaction_values"
             )
 
+        multi_output_type = self.get_multi_output_type()
+        multi_output_names = self.get_multi_output_names(sample)
+        features_out = self.feature_index_
+
         # calculate the shap interaction values; ensure the result is a list of arrays
-        shap_interaction_tensors: List[
-            np.ndarray
-        ] = ShapCalculator._convert_shap_tensors_to_list(
+        shap_interaction_tensors: List[np.ndarray] = self._convert_shap_tensors_to_list(
             shap_tensors=shap_interaction_values_fn(x),
-            multi_output_type=multi_output_type,
             multi_output_names=multi_output_names,
         )
 
@@ -513,7 +481,9 @@ def _calculate_shap(
                 copy=False,
                 fill_value=0.0,
             )
-            for im in shap_matrix_to_df_fn(shap_interaction_tensors, x.index, x.columns)
+            for im in self._convert_raw_shap_to_df(
+                shap_interaction_tensors, x.index, x.columns
+            )
         ]
 
         # if we have a single output, use the data frame for that output;
@@ -545,9 +515,8 @@ def get_multi_output_type() -> str:
         """[see superclass]"""
         return Sample.IDX_TARGET
 
-    def _get_multi_output_names(
-        self, model: RegressorPipelineDF, sample: Sample
-    ) -> List[str]:
+    def get_multi_output_names(self, sample: Sample) -> List[str]:
+        """[see superclass]"""
         # noinspection PyProtectedMember
         return sample._target_names
 
@@ -652,15 +621,14 @@ def get_multi_output_type() -> str:
         """[see superclass]"""
         return ClassifierShapCalculator.COL_CLASS
 
-    def _get_multi_output_names(
-        self, model: ClassifierPipelineDF, sample: Sample
-    ) -> List[str]:
+    def get_multi_output_names(self, sample: Sample) -> List[str]:
+        """[see superclass]"""
         assert isinstance(
             sample.target, pd.Series
         ), "only single-output classifiers are currently supported"
-        root_classifier = model.final_estimator.native_estimator
+        root_classifier = self.pipeline.final_estimator.native_estimator
         # noinspection PyUnresolvedReferences
-        return [str(class_) for class_ in root_classifier.classes_]
+        return list(map(str, root_classifier.classes_))
 
 
 class ClassifierShapValuesCalculator(

From 45d49eb9e6e2d57b354945970ec215a5f000de30 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Mon, 18 Oct 2021 09:13:18 +0200
Subject: [PATCH 021/106] REFACTOR: use sklearn.base.is_classifier to check for
 classifier

---
 src/facet/inspection/_inspection.py | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/src/facet/inspection/_inspection.py b/src/facet/inspection/_inspection.py
index 170b6016..05a474b7 100644
--- a/src/facet/inspection/_inspection.py
+++ b/src/facet/inspection/_inspection.py
@@ -9,6 +9,7 @@
 import pandas as pd
 from scipy.cluster import hierarchy
 from scipy.spatial import distance
+from sklearn.base import is_classifier
 
 from pytools.api import AllTracker, inheritdoc
 from pytools.data import LinkageTree, Matrix
@@ -239,20 +240,14 @@ def fit(self: T_Self, sample: Sample, **fit_params: Any) -> T_Self:
 
         learner: LearnerDF = self.pipeline.final_estimator
 
-        if isinstance(learner, ClassifierDF):
-            if isinstance(sample.target_name, list):
-                raise ValueError(
-                    "only single-output classifiers (binary or multi-class) are "
-                    "supported, but the classifier in the given crossfit has been "
-                    "fitted on multiple columns "
-                    f"{sample.target_name}"
-                )
-
-            is_classifier = True
-
-        else:
-            assert isinstance(learner, RegressorDF)
-            is_classifier = False
+        _is_classifier = is_classifier(learner)
+        if _is_classifier and isinstance(sample.target_name, list):
+            raise ValueError(
+                "only single-output classifiers (binary or multi-class) are "
+                "supported, but the classifier in the given crossfit has been "
+                "fitted on multiple columns "
+                f"{sample.target_name}"
+            )
 
         shap_global_projector: Union[
             ShapVectorProjector, ShapInteractionVectorProjector, None
@@ -261,7 +256,7 @@ def fit(self: T_Self, sample: Sample, **fit_params: Any) -> T_Self:
         if self.shap_interaction:
             shap_calculator_type = (
                 ClassifierShapInteractionValuesCalculator
-                if is_classifier
+                if _is_classifier
                 else RegressorShapInteractionValuesCalculator
             )
             shap_calculator = shap_calculator_type(
@@ -278,7 +273,7 @@ def fit(self: T_Self, sample: Sample, **fit_params: Any) -> T_Self:
         else:
             shap_calculator_type = (
                 ClassifierShapValuesCalculator
-                if is_classifier
+                if _is_classifier
                 else RegressorShapValuesCalculator
             )
             shap_calculator = shap_calculator_type(

From bdc9f18d39cd779124a540aa7a0bd8e9d0a84a50 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Mon, 18 Oct 2021 09:14:06 +0200
Subject: [PATCH 022/106] FIX: return type of make_explainer is BaseExplainer

---
 src/facet/inspection/_explainer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/facet/inspection/_explainer.py b/src/facet/inspection/_explainer.py
index b5d75410..1e1809c4 100644
--- a/src/facet/inspection/_explainer.py
+++ b/src/facet/inspection/_explainer.py
@@ -227,7 +227,7 @@ def uses_background_dataset(self) -> bool:
 
     def make_explainer(
         self, model: LearnerDF, data: Optional[pd.DataFrame] = None
-    ) -> Explainer:
+    ) -> BaseExplainer:
         """[see superclass]"""
 
         self._validate_background_dataset(data=data)
@@ -309,7 +309,7 @@ def uses_background_dataset(self) -> bool:
         """[see superclass]"""
         return True
 
-    def make_explainer(self, model: LearnerDF, data: pd.DataFrame) -> Explainer:
+    def make_explainer(self, model: LearnerDF, data: pd.DataFrame) -> BaseExplainer:
         """[see superclass]"""
 
         self._validate_background_dataset(data=data)

From ed6ee600f23876ff415f8a4ea7588cc045b110ea Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Mon, 18 Oct 2021 09:15:28 +0200
Subject: [PATCH 023/106] REFACTOR: use triple quotes for arg match of
 @inheritdoc

---
 src/facet/inspection/_explainer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/facet/inspection/_explainer.py b/src/facet/inspection/_explainer.py
index 1e1809c4..809bef50 100644
--- a/src/facet/inspection/_explainer.py
+++ b/src/facet/inspection/_explainer.py
@@ -169,7 +169,7 @@ def _validate_background_dataset(self, data: Optional[pd.DataFrame]) -> None:
 _TreeExplainer: Optional[type] = None
 
 
-@inheritdoc(match="[see superclass]")
+@inheritdoc(match="""[see superclass]""")
 class TreeExplainerFactory(ExplainerFactory):
     """
     A factory constructing :class:`~shap.TreeExplainer` objects.
@@ -266,7 +266,7 @@ def shap_interaction_values(
     pass
 
 
-@inheritdoc(match="[see superclass]")
+@inheritdoc(match="""[see superclass]""")
 class KernelExplainerFactory(ExplainerFactory):
     """
     A factory constructing :class:`~shap.KernelExplainer` objects.

From 27848c197d7363afa25a69260f7e4d1dff7d314b Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Mon, 18 Oct 2021 09:25:59 +0200
Subject: [PATCH 024/106] REFACTOR: eliminate inner function make_explainer

---
 src/facet/inspection/_shap.py | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/src/facet/inspection/_shap.py b/src/facet/inspection/_shap.py
index 06f0cef6..7bf551e0 100644
--- a/src/facet/inspection/_shap.py
+++ b/src/facet/inspection/_shap.py
@@ -225,25 +225,24 @@ def _get_shap(self, sample: Sample) -> pd.DataFrame:
         else:
             background_dataset = None
 
-        def _make_explainer(_model: T_LearnerPipelineDF) -> BaseExplainer:
-            return self._explainer_factory.make_explainer(
-                model=_model.final_estimator,
-                # we re-index the columns of the background dataset to match
-                # the column sequence of the model (in case feature order
-                # was shuffled, or train split pre-processing removed columns)
-                data=(
-                    None
-                    if background_dataset is None
-                    else background_dataset.reindex(
-                        columns=_model.final_estimator.feature_names_in_,
-                        copy=False,
-                    )
-                ),
-            )
+        explainer = self._explainer_factory.make_explainer(
+            model=pipeline.final_estimator,
+            # we re-index the columns of the background dataset to match
+            # the column sequence of the model (in case feature order
+            # was shuffled, or train split pre-processing removed columns)
+            data=(
+                None
+                if background_dataset is None
+                else background_dataset.reindex(
+                    columns=pipeline.final_estimator.feature_names_in_,
+                    copy=False,
+                )
+            ),
+        )
 
         # we explain the full sample using the model fitted on the full sample
         # so the result is a list with a single data frame of shap values
-        return self._calculate_shap(sample=sample, explainer=_make_explainer(pipeline))
+        return self._calculate_shap(sample=sample, explainer=explainer)
 
     @abstractmethod
     def _calculate_shap(

From b4a97c5b4bce8e243af5f09a5ef3dd8e09254666 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Mon, 18 Oct 2021 10:31:08 +0200
Subject: [PATCH 025/106] API: make explainer factory arguments keyword-only

---
 src/facet/inspection/_explainer.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/facet/inspection/_explainer.py b/src/facet/inspection/_explainer.py
index 809bef50..33ffb440 100644
--- a/src/facet/inspection/_explainer.py
+++ b/src/facet/inspection/_explainer.py
@@ -177,6 +177,7 @@ class TreeExplainerFactory(ExplainerFactory):
 
     def __init__(
         self,
+        *,
         model_output: Optional[str] = None,
         feature_perturbation: Optional[str] = None,
         use_background_dataset: bool = True,
@@ -274,6 +275,7 @@ class KernelExplainerFactory(ExplainerFactory):
 
     def __init__(
         self,
+        *,
         link: Optional[str] = None,
         l1_reg: Optional[str] = "num_features(10)",
         data_size_limit: Optional[int] = 100,

From 81615cb0a74f844cc984ff3c08188725d7aa8585 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Mon, 18 Oct 2021 10:36:06 +0200
Subject: [PATCH 026/106] =?UTF-8?q?API:=20rename=20TreeExplainerFactory=20?=
 =?UTF-8?q?arg=20use=5Fbackground=5Fdataset=20to=20uses=5F=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/facet/inspection/_explainer.py  | 6 +++---
 src/facet/inspection/_inspection.py | 2 +-
 test/test/conftest.py               | 2 +-
 test/test/facet/test_inspection.py  | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/facet/inspection/_explainer.py b/src/facet/inspection/_explainer.py
index 33ffb440..bc7c560c 100644
--- a/src/facet/inspection/_explainer.py
+++ b/src/facet/inspection/_explainer.py
@@ -180,13 +180,13 @@ def __init__(
         *,
         model_output: Optional[str] = None,
         feature_perturbation: Optional[str] = None,
-        use_background_dataset: bool = True,
+        uses_background_dataset: bool = True,
     ) -> None:
         """
         :param model_output: (optional) override the default model output parameter
         :param feature_perturbation: (optional) override the default
             feature_perturbation parameter
-        :param use_background_dataset: if ``False``, don't pass the background
+        :param uses_background_dataset: if ``False``, don't pass the background
             dataset on to the tree explainer even if a background dataset is passed
             to :meth:`.make_explainer`
         """
@@ -202,7 +202,7 @@ def __init__(
         )
         self.model_output = model_output
         self.feature_perturbation = feature_perturbation
-        self._uses_background_dataset = use_background_dataset
+        self._uses_background_dataset = uses_background_dataset
 
         global _TreeExplainer
 
diff --git a/src/facet/inspection/_inspection.py b/src/facet/inspection/_inspection.py
index 05a474b7..c2b3f1f0 100644
--- a/src/facet/inspection/_inspection.py
+++ b/src/facet/inspection/_inspection.py
@@ -153,7 +153,7 @@ class LearnerInspector(
     #: This is a tree explainer using the tree_path_dependent method for
     #: feature perturbation, so we can calculate SHAP interaction values.
     DEFAULT_EXPLAINER_FACTORY = TreeExplainerFactory(
-        feature_perturbation="tree_path_dependent", use_background_dataset=False
+        feature_perturbation="tree_path_dependent", uses_background_dataset=False
     )
 
     def __init__(
diff --git a/test/test/conftest.py b/test/test/conftest.py
index 45b14966..caa9b53f 100644
--- a/test/test/conftest.py
+++ b/test/test/conftest.py
@@ -202,7 +202,7 @@ def regressor_inspector(
     inspector = LearnerInspector(
         pipeline=best_lgbm_crossfit.pipeline,
         explainer_factory=TreeExplainerFactory(
-            feature_perturbation="tree_path_dependent", use_background_dataset=True
+            feature_perturbation="tree_path_dependent", uses_background_dataset=True
         ),
         n_jobs=n_jobs,
     ).fit(sample=best_lgbm_crossfit.sample_)
diff --git a/test/test/facet/test_inspection.py b/test/test/facet/test_inspection.py
index 00936107..1fe0d71a 100644
--- a/test/test/facet/test_inspection.py
+++ b/test/test/facet/test_inspection.py
@@ -389,7 +389,7 @@ def test_model_inspection_classifier_interaction(
     model_inspector = LearnerInspector(
         pipeline=iris_classifier_crossfit_binary.pipeline,
         explainer_factory=TreeExplainerFactory(
-            feature_perturbation="tree_path_dependent", use_background_dataset=True
+            feature_perturbation="tree_path_dependent", uses_background_dataset=True
         ),
         n_jobs=n_jobs,
     ).fit(sample=iris_classifier_crossfit_binary.sample_)
@@ -398,7 +398,7 @@ def test_model_inspection_classifier_interaction(
         pipeline=iris_classifier_crossfit_binary.pipeline,
         shap_interaction=False,
         explainer_factory=TreeExplainerFactory(
-            feature_perturbation="tree_path_dependent", use_background_dataset=True
+            feature_perturbation="tree_path_dependent", uses_background_dataset=True
         ),
         n_jobs=n_jobs,
     ).fit(sample=iris_classifier_crossfit_binary.sample_)

From 8e590f607b0d1aeef8968a0ac22f6fabdb893615 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Mon, 18 Oct 2021 10:39:09 +0200
Subject: [PATCH 027/106] API: base class ExplainerFactory on HasExpressionRepr

---
 src/facet/inspection/_explainer.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/facet/inspection/_explainer.py b/src/facet/inspection/_explainer.py
index bc7c560c..a5058273 100644
--- a/src/facet/inspection/_explainer.py
+++ b/src/facet/inspection/_explainer.py
@@ -14,6 +14,8 @@
 from sklearn.base import BaseEstimator
 
 from pytools.api import AllTracker, inheritdoc, validate_type
+from pytools.expression import Expression, HasExpressionRepr
+from pytools.expression.atomic import Id
 from sklearndf import ClassifierDF, LearnerDF, RegressorDF
 
 log = logging.getLogger(__name__)
@@ -113,7 +115,7 @@ def shap_interaction_values(
         pass
 
 
-class ExplainerFactory(metaclass=ABCMeta):
+class ExplainerFactory(HasExpressionRepr, metaclass=ABCMeta):
     """
     A factory for constructing :class:`~shap.Explainer` objects.
     """
@@ -250,6 +252,14 @@ def make_explainer(
 
         return explainer
 
+    def to_expression(self) -> Expression:
+        """[see superclass]"""
+        return Id(type(self))(
+            model_output=self.model_output,
+            feature_perturbation=self.feature_perturbation,
+            use_background_dataset=self._uses_background_dataset,
+        )
+
 
 class _KernelExplainer(shap.KernelExplainer, BaseExplainer):
     # noinspection PyPep8Naming,PyUnresolvedReferences
@@ -353,5 +363,13 @@ def make_explainer(self, model: LearnerDF, data: pd.DataFrame) -> BaseExplainer:
 
         return explainer
 
+    def to_expression(self) -> Expression:
+        """[see superclass]"""
+        return Id(type(self))(
+            link=self.link,
+            l1_reg=self.l1_reg,
+            data_size_limit=self.data_size_limit,
+        )
+
 
 __tracker.validate()

From 57bbde0f98ee071eac06aeb2806ef4c09e3819e2 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Mon, 18 Oct 2021 10:39:59 +0200
Subject: [PATCH 028/106] API: state explainer when warning about ignoring
 shap_interaction=True

---
 src/facet/inspection/_inspection.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/facet/inspection/_inspection.py b/src/facet/inspection/_inspection.py
index c2b3f1f0..02d956f4 100644
--- a/src/facet/inspection/_inspection.py
+++ b/src/facet/inspection/_inspection.py
@@ -206,7 +206,7 @@ def __init__(
             if not explainer_factory.supports_shap_interaction_values:
                 log.warning(
                     "ignoring arg shap_interaction=True: "
-                    "explainers made by arg explainer_factory do not support "
+                    f"explainers made by {explainer_factory!r} do not support "
                     "SHAP interaction values"
                 )
                 shap_interaction = False

From 7419c2b7cdf10f95475ac7d97295d04bb989daa7 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Mon, 18 Oct 2021 10:46:10 +0200
Subject: [PATCH 029/106] API: introduce class ParallelExplainer

---
 src/facet/inspection/_explainer.py | 236 ++++++++++++++++++++++++++++-
 src/facet/inspection/_shap.py      |  11 +-
 2 files changed, 241 insertions(+), 6 deletions(-)

diff --git a/src/facet/inspection/_explainer.py b/src/facet/inspection/_explainer.py
index a5058273..af444f8c 100644
--- a/src/facet/inspection/_explainer.py
+++ b/src/facet/inspection/_explainer.py
@@ -5,7 +5,7 @@
 import functools
 import logging
 from abc import ABCMeta, abstractmethod
-from typing import Any, Dict, List, Mapping, Optional, Union
+from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Union
 
 import numpy as np
 import pandas as pd
@@ -16,6 +16,7 @@
 from pytools.api import AllTracker, inheritdoc, validate_type
 from pytools.expression import Expression, HasExpressionRepr
 from pytools.expression.atomic import Id
+from pytools.parallelization import Job, JobQueue, JobRunner, ParallelizableMixin
 from sklearndf import ClassifierDF, LearnerDF, RegressorDF
 
 log = logging.getLogger(__name__)
@@ -23,11 +24,13 @@
 __all__ = [
     "BaseExplainer",
     "ExplainerFactory",
+    "ExplainerJob",
+    "ExplainerQueue",
     "KernelExplainerFactory",
+    "ParallelExplainer",
     "TreeExplainerFactory",
 ]
 
-
 #
 # conditional and mock imports
 #
@@ -60,7 +63,7 @@
 
 
 #
-# Class definitions
+# Base classes
 #
 
 
@@ -168,6 +171,226 @@ def _validate_background_dataset(self, data: Optional[pd.DataFrame]) -> None:
             )
 
 
+#
+# Parallelization support: class ParallelExplainer and helper classes
+#
+
+
+@inheritdoc(match="""[see superclass]""")
+class ExplainerJob(Job[Union[np.ndarray, List[np.ndarray]]]):
+    """
+    A call to an explainer function with given `X` and `y` values.
+    """
+
+    #: the explainer method to call
+    explain_fn: Callable[..., Union[np.ndarray, List[np.ndarray]]]
+
+    #: the feature values of the observations to be explained
+    X: Union[np.ndarray, pd.DataFrame]
+
+    #: the target values of the observations to be explained
+    y: Union[None, np.ndarray, pd.Series]
+
+    #: additional arguments specific to the explainer method
+    kwargs: Dict[str, Any]
+
+    # noinspection PyPep8Naming
+    def __init__(
+        self,
+        explain_fn: Callable[..., Union[np.ndarray, List[np.ndarray]]],
+        X: Union[np.ndarray, pd.DataFrame],
+        y: Union[None, np.ndarray, pd.Series] = None,
+        **kwargs: Any,
+    ) -> None:
+        """
+        :param explain_fn: the explainer method to call
+        :param X: the feature values of the observations to be explained
+        :param y: the target values of the observations to be explained
+        :param kwargs: additional arguments specific to the explainer method
+        """
+        self.explain_fn = explain_fn
+        self.X = X
+        self.y = y
+        self.kwargs = kwargs
+
+    def run(self) -> Union[np.ndarray, List[np.ndarray]]:
+        """[see superclass]"""
+        if self.y is None:
+            return self.explain_fn(self.X, **self.kwargs)
+        else:
+            return self.explain_fn(self.X, self.y, **self.kwargs)
+
+
+@inheritdoc(match="""[see superclass]""")
+class ExplainerQueue(
+    JobQueue[Union[np.ndarray, List[np.ndarray]], Union[np.ndarray, List[np.ndarray]]]
+):
+    """
+    A queue splitting a data set to be explained into multiple jobs.
+    """
+
+    #: the explainer method to call
+    explain_fn: Callable[..., Union[np.ndarray, List[np.ndarray]]]
+
+    #: the feature values of the observations to be explained
+    X: np.ndarray
+
+    #: the target values of the observations to be explained
+    y: Optional[np.ndarray]
+
+    #: the maximum number of observations to allocate to each job
+    max_job_size: int
+
+    #: additional arguments specific to the explainer method
+    kwargs: Dict[str, Any]
+
+    _DEFAULT_MAX_JOB_SIZE = 10
+
+    # noinspection PyPep8Naming
+    def __init__(
+        self,
+        explain_fn: Callable[..., Union[np.ndarray, List[np.ndarray]]],
+        X: Union[np.ndarray, pd.DataFrame],
+        y: Union[None, np.ndarray, pd.Series] = None,
+        *,
+        max_job_size: int = _DEFAULT_MAX_JOB_SIZE,
+        **kwargs: Any,
+    ) -> None:
+        """
+        :param explain_fn: the explainer method to call
+        :param X: the feature values of the observations to be explained
+        :param y: the target values of the observations to be explained
+        :param max_job_size: the maximum number of observations to allocate to each job
+        :param kwargs: additional arguments specific to the explainer method
+        """
+        super().__init__()
+
+        self.explain_fn = explain_fn
+        self.X = X.values if isinstance(X, pd.DataFrame) else X
+        self.y = y.values if isinstance(y, pd.Series) else y
+        self.max_job_size = max_job_size
+        self.kwargs = kwargs
+
+    def jobs(self) -> Iterable[Job[Union[np.ndarray, List[np.ndarray]]]]:
+        """[see superclass]"""
+
+        x = self.X
+        y = self.y
+        n = len(x)
+        job_size = (n - 1) // len(self) + 1
+        kwargs = self.kwargs
+
+        return (
+            ExplainerJob(
+                self.explain_fn,
+                X=x[start : start + job_size].copy(),
+                y=None if y is None else y[start : start + job_size].copy(),
+                **kwargs,
+            )
+            for start in range(0, n, job_size)
+        )
+
+    def aggregate(
+        self, job_results: List[Union[np.ndarray, List[np.ndarray]]]
+    ) -> Union[np.ndarray, List[np.ndarray]]:
+        """[see superclass]"""
+        if isinstance(job_results[0], np.ndarray):
+            return np.vstack(job_results)
+        else:
+            return [np.vstack(arrays) for arrays in zip(*job_results)]
+
+    def __len__(self) -> int:
+        return (len(self.X) - 1) // self.max_job_size + 1
+
+
+@inheritdoc(match="""[see superclass]""")
+class ParallelExplainer(BaseExplainer, ParallelizableMixin):
+    """
+    A wrapper class, turning an explainer into a parallelized version, explaining
+    chunks of observations in parallel.
+    """
+
+    #: The explainer being parallelized by this wrapper
+    explainer: BaseExplainer
+
+    # noinspection PyProtectedMember
+    _DEFAULT_MAX_JOB_SIZE = ExplainerQueue._DEFAULT_MAX_JOB_SIZE
+
+    def __init__(
+        self,
+        explainer: BaseExplainer,
+        *,
+        max_job_size: int = _DEFAULT_MAX_JOB_SIZE,
+        n_jobs: int,
+        shared_memory: Optional[bool] = None,
+        pre_dispatch: Optional[Union[str, int]] = None,
+        verbose: Optional[int] = None,
+    ) -> None:
+        """
+        :param explainer: the explainer to be parallelized by this wrapper
+        :param max_job_size: the maximum number of observations to allocate to any of
+            the explainer jobs running in parallel
+        """
+        super().__init__(
+            n_jobs=n_jobs,
+            shared_memory=shared_memory,
+            pre_dispatch=pre_dispatch,
+            verbose=verbose,
+        )
+
+        if isinstance(explainer, ParallelExplainer):
+            log.warning(
+                f"creating parallel explainer from parallel explainer {explainer!r}"
+            )
+
+        self.explainer = explainer
+        self.max_job_size = max_job_size
+
+    __init__.__doc__ += ParallelizableMixin.__init__.__doc__
+
+    # noinspection PyPep8Naming
+    def shap_values(
+        self,
+        X: Union[np.ndarray, pd.DataFrame, catboost.Pool],
+        y: Union[None, np.ndarray, pd.Series] = None,
+        **kwargs: Any,
+    ) -> Union[np.ndarray, List[np.ndarray]]:
+        """[see superclass]"""
+        return self._run(self.explainer.shap_values, X, y, **kwargs)
+
+    # noinspection PyPep8Naming
+    def shap_interaction_values(
+        self,
+        X: Union[np.ndarray, pd.DataFrame, catboost.Pool],
+        y: Union[None, np.ndarray, pd.Series] = None,
+        **kwargs: Any,
+    ) -> Union[np.ndarray, List[np.ndarray]]:
+        """[see superclass]"""
+        return self._run(self.explainer.shap_interaction_values, X, y, **kwargs)
+
+    # noinspection PyPep8Naming
+    def _run(
+        self,
+        explain_fn: Callable[..., Union[np.ndarray, List[np.ndarray]]],
+        X: Union[np.ndarray, pd.DataFrame, catboost.Pool],
+        y: Union[None, np.ndarray, pd.Series] = None,
+        **kwargs: Any,
+    ):
+        return JobRunner.from_parallelizable(self).run_queue(
+            ExplainerQueue(
+                explain_fn=explain_fn,
+                X=X,
+                y=y,
+                max_job_size=self.max_job_size,
+                **kwargs,
+            )
+        )
+
+
+#
+# TreeExplainer factory
+#
+
 _TreeExplainer: Optional[type] = None
 
 
@@ -261,6 +484,11 @@ def to_expression(self) -> Expression:
         )
 
 
+#
+# KernelExplainer factory
+#
+
+
 class _KernelExplainer(shap.KernelExplainer, BaseExplainer):
     # noinspection PyPep8Naming,PyUnresolvedReferences
     def shap_interaction_values(
@@ -274,8 +502,6 @@ def shap_interaction_values(
         """
         raise NotImplementedError()
 
-    pass
-
 
 @inheritdoc(match="""[see superclass]""")
 class KernelExplainerFactory(ExplainerFactory):
diff --git a/src/facet/inspection/_shap.py b/src/facet/inspection/_shap.py
index 7bf551e0..cca53270 100644
--- a/src/facet/inspection/_shap.py
+++ b/src/facet/inspection/_shap.py
@@ -19,7 +19,7 @@
 )
 
 from ..data import Sample
-from ._explainer import BaseExplainer, ExplainerFactory
+from ._explainer import BaseExplainer, ExplainerFactory, ParallelExplainer
 
 log = logging.getLogger(__name__)
 
@@ -240,6 +240,15 @@ def _get_shap(self, sample: Sample) -> pd.DataFrame:
             ),
         )
 
+        if self.n_jobs != 1:
+            explainer = ParallelExplainer(
+                explainer,
+                n_jobs=self.n_jobs,
+                shared_memory=self.shared_memory,
+                pre_dispatch=self.pre_dispatch,
+                verbose=self.verbose,
+            )
+
         # we explain the full sample using the model fitted on the full sample
         # so the result is a list with a single data frame of shap values
         return self._calculate_shap(sample=sample, explainer=explainer)

From e52b5b4c3d9b52991f045b735dd812bad969f852 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Mon, 18 Oct 2021 11:53:46 +0200
Subject: [PATCH 030/106] API: make arg max_job_size required in ExplainerQueue

---
 src/facet/inspection/_explainer.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/facet/inspection/_explainer.py b/src/facet/inspection/_explainer.py
index af444f8c..28f1c971 100644
--- a/src/facet/inspection/_explainer.py
+++ b/src/facet/inspection/_explainer.py
@@ -244,8 +244,6 @@ class ExplainerQueue(
     #: additional arguments specific to the explainer method
     kwargs: Dict[str, Any]
 
-    _DEFAULT_MAX_JOB_SIZE = 10
-
     # noinspection PyPep8Naming
     def __init__(
         self,
@@ -253,7 +251,7 @@ def __init__(
         X: Union[np.ndarray, pd.DataFrame],
         y: Union[None, np.ndarray, pd.Series] = None,
         *,
-        max_job_size: int = _DEFAULT_MAX_JOB_SIZE,
+        max_job_size: int,
         **kwargs: Any,
     ) -> None:
         """
@@ -313,14 +311,15 @@ class ParallelExplainer(BaseExplainer, ParallelizableMixin):
     #: The explainer being parallelized by this wrapper
     explainer: BaseExplainer
 
-    # noinspection PyProtectedMember
-    _DEFAULT_MAX_JOB_SIZE = ExplainerQueue._DEFAULT_MAX_JOB_SIZE
+    #: the maximum number of observations to allocate to any of the explainer jobs
+    #: running in parallel
+    max_job_size: int
 
     def __init__(
         self,
         explainer: BaseExplainer,
         *,
-        max_job_size: int = _DEFAULT_MAX_JOB_SIZE,
+        max_job_size: int = 10,
         n_jobs: int,
         shared_memory: Optional[bool] = None,
         pre_dispatch: Optional[Union[str, int]] = None,

From 4875d658524023e1a9a9144950fa42f30ad5aad9 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Mon, 18 Oct 2021 12:39:43 +0200
Subject: [PATCH 031/106] REFACTOR: simplify access of
 BaseExplainer.shap_interaction_values()

---
 src/facet/inspection/_shap.py | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/src/facet/inspection/_shap.py b/src/facet/inspection/_shap.py
index cca53270..4add6a88 100644
--- a/src/facet/inspection/_shap.py
+++ b/src/facet/inspection/_shap.py
@@ -460,23 +460,14 @@ def _calculate_shap(
     ) -> pd.DataFrame:
         x = self._preprocess_features(sample=sample)
 
-        # calculate the im values (returned as an array)
-        try:
-            # noinspection PyUnresolvedReferences
-            shap_interaction_values_fn = explainer.shap_interaction_values
-        except AttributeError:
-            raise RuntimeError(
-                "Explainer does not implement method shap_interaction_values"
-            )
-
         multi_output_type = self.get_multi_output_type()
         multi_output_names = self.get_multi_output_names(sample)
         features_out = self.feature_index_
 
         # calculate the shap interaction values; ensure the result is a list of arrays
         shap_interaction_tensors: List[np.ndarray] = self._convert_shap_tensors_to_list(
-            shap_tensors=shap_interaction_values_fn(x),
-            multi_output_names=multi_output_names,
+            shap_tensors=explainer.shap_interaction_values(x),
+            n_outputs=len(multi_output_names),
         )
 
         interaction_matrix_per_output: List[pd.DataFrame] = [

From 09dfd13226d2f22a82b1f82c8670abdf908450aa Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Mon, 18 Oct 2021 12:41:42 +0200
Subject: [PATCH 032/106] REFACTOR: push binary classif. handling down to
 ClassifierShapCalculator

---
 src/facet/inspection/_shap.py | 40 +++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/src/facet/inspection/_shap.py b/src/facet/inspection/_shap.py
index 4add6a88..14db2154 100644
--- a/src/facet/inspection/_shap.py
+++ b/src/facet/inspection/_shap.py
@@ -260,41 +260,28 @@ def _calculate_shap(
         pass
 
     def _convert_shap_tensors_to_list(
-        self,
-        *,
-        shap_tensors: Union[np.ndarray, Sequence[np.ndarray]],
-        multi_output_names: Sequence[str],
+        self, *, shap_tensors: Union[np.ndarray, List[np.ndarray]], n_outputs: int
     ):
         def _validate_shap_tensor(_t: np.ndarray) -> None:
             if np.isnan(np.sum(_t)):
                 raise AssertionError(
-                    "Output of SHAP explainer included NaN values. "
+                    "Output of SHAP explainer includes NaN values. "
                     "This should not happen; consider initialising the "
                     "LearnerInspector with an ExplainerFactory that has a different "
                     "configuration, or that makes SHAP explainers of a different type."
                 )
 
-        n_outputs = len(multi_output_names)
-
         if isinstance(shap_tensors, List):
             for shap_tensor in shap_tensors:
                 _validate_shap_tensor(shap_tensor)
         else:
             _validate_shap_tensor(shap_tensors)
-            if n_outputs == 2 and isinstance(self, ClassifierShapCalculator):
-                # if we have a single output *and* binary classification, the explainer
-                # will have returned a single tensor for the positive class;
-                # the SHAP values for the negative class will have the opposite sign
-                shap_tensors = [-shap_tensors, shap_tensors]
-            else:
-                # if we have a single output *and* no classification, the explainer will
-                # have returned a single tensor as an array, so we wrap it in a list
-                shap_tensors = [shap_tensors]
+            shap_tensors = [shap_tensors]
 
         if n_outputs != len(shap_tensors):
             raise AssertionError(
                 f"count of SHAP tensors (n={len(shap_tensors)}) "
-                f"should match number of outputs ({multi_output_names})"
+                f"should match number of outputs (n={n_outputs})"
             )
 
         return shap_tensors
@@ -382,7 +369,7 @@ def _calculate_shap(
 
         # calculate the shap values, and ensure the result is a list of arrays
         shap_values: List[np.ndarray] = self._convert_shap_tensors_to_list(
-            shap_tensors=explainer.shap_values(x), multi_output_names=multi_output_names
+            shap_tensors=explainer.shap_values(x), n_outputs=len(multi_output_names)
         )
 
         # convert to a data frame per output (different logic depending on whether
@@ -579,6 +566,23 @@ class ClassifierShapCalculator(ShapCalculator[ClassifierPipelineDF], metaclass=A
 
     COL_CLASS = "class"
 
+    def _convert_shap_tensors_to_list(
+        self, *, shap_tensors: Union[np.ndarray, List[np.ndarray]], n_outputs: int
+    ):
+
+        if n_outputs == 2 and isinstance(shap_tensors, np.ndarray):
+            # if we have a single output *and* binary classification, the explainer
+            # will have returned a single tensor for the positive class;
+            # the SHAP values for the negative class will have the opposite sign
+            (shap_tensors,) = super()._convert_shap_tensors_to_list(
+                shap_tensors=shap_tensors, n_outputs=1
+            )
+            return [-shap_tensors, shap_tensors]
+        else:
+            return super()._convert_shap_tensors_to_list(
+                shap_tensors=shap_tensors, n_outputs=n_outputs
+            )
+
     def _get_output_names(
         self,
         sample: Sample,

From b0a60bb336ee8f0992e724d23a7eecabb229bbb5 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 19 Oct 2021 07:18:46 +0200
Subject: [PATCH 033/106] API: run simulations on full sample instead of
 crossfit splits

---
 src/facet/simulation/_simulation.py | 293 ++++++----------------------
 src/facet/simulation/viz/_draw.py   |   2 +-
 test/test/conftest.py               |   7 +
 test/test/facet/test_simulation.py  | 251 +++++++-----------------
 4 files changed, 140 insertions(+), 413 deletions(-)

diff --git a/src/facet/simulation/_simulation.py b/src/facet/simulation/_simulation.py
index 6c46f47a..cfc25483 100644
--- a/src/facet/simulation/_simulation.py
+++ b/src/facet/simulation/_simulation.py
@@ -8,7 +8,6 @@
     Any,
     Callable,
     Generic,
-    Iterator,
     List,
     Optional,
     Sequence,
@@ -23,7 +22,7 @@
 from scipy import stats
 
 from pytools.api import AllTracker, inheritdoc
-from pytools.parallelization import Job, JobRunner, ParallelizableMixin
+from pytools.parallelization import ParallelizableMixin
 from sklearndf import LearnerDF
 from sklearndf.pipeline import (
     ClassifierPipelineDF,
@@ -31,10 +30,8 @@
     RegressorPipelineDF,
 )
 
-from ..crossfit import LearnerCrossfit
 from ..data import Sample
 from ..data.partition import Partitioner
-from ..validation import BaseBootstrapCV
 
 log = logging.getLogger(__name__)
 
@@ -95,8 +92,8 @@ class UnivariateSimulationResult(Generic[T_Partition]):
     #: and rows representing bootstrap splits used to fit variations of the model.
     outputs: pd.DataFrame
 
-    #: The name of a series of median simulated values per partition.
-    COL_MEDIAN = "median"
+    #: The name of a series of mean simulated values per partition.
+    COL_MEAN = "mean"
 
     #: The name of a series of lower CI bounds of simulated values per partition.
     COL_LOWER_BOUND = "lower_bound"
@@ -132,16 +129,8 @@ def __init__(
         """
         super().__init__()
 
-        assert outputs.index.name in [
-            BaseUnivariateSimulator.IDX_SPLIT,
-            # for the experimental _full sample_ feature, we also accept "metric" as
-            # the name of the row index
-            "metric",
-        ], f"row index of arg outputs is named {BaseUnivariateSimulator.IDX_SPLIT}"
-        assert outputs.columns.name == BaseUnivariateSimulator.IDX_PARTITION, (
-            "column index of arg outputs is named "
-            f"{BaseUnivariateSimulator.IDX_PARTITION}"
-        )
+        assert outputs.index.name == "metric"
+        assert outputs.columns.name == BaseUnivariateSimulator.IDX_PARTITION
         assert (
             0.0 < confidence_level < 1.0
         ), f"confidence_level={confidence_level} ranges between 0.0 and 1.0 (exclusive)"
@@ -154,43 +143,33 @@ def __init__(
         self.partitioner = partitioner
         self.outputs = outputs
 
-    def outputs_median(self) -> pd.Series:
+    def outputs_mean(self) -> pd.Series:
         """
-        Calculate the medians of the distribution of simulation outcomes,
-        for every partition.
+        Calculate the means of simulation outcomes for every partition.
 
-        :return: a series of medians, indexed by the central values of the partitions
+        :return: a series of means, indexed by the central values of the partitions
             for which the simulation was run
         """
-        if self._full_sample:
-            # experimental feature: we only simulated using one model fit on the full
-            # sample; return the mean outputs for each partition without aggregating
-            # further
-            values = self.outputs.loc["mean"]
-        else:
-            values = self.outputs.median()
-        return values.rename(UnivariateSimulationResult.COL_MEDIAN)
+        return self.outputs.loc["mean"].rename(UnivariateSimulationResult.COL_MEAN)
 
     def outputs_lower_bound(self) -> pd.Series:
         """
         Calculate the lower CI bounds of the distribution of simulation outcomes,
         for every partition.
 
-        :return: a series of medians, indexed by the central values of the partitions
-            for which the simulation was run
+        :return: a series of lower CI bounds, indexed by the central values of the
+            partitions for which the simulation was run
         """
-        if self._full_sample:
-            # experimental feature: we only simulated using one model fit on the full
-            # sample; return the mean outputs for each partition without aggregating
-            # further, and determine the lower confidence bound based on the standard
-            # error of the mean and the desired confidence level
-            values = (
-                self.outputs.loc["mean"]
-                + stats.norm.ppf((1.0 - self.confidence_level) / 2.0)
-                * self.outputs.loc["sem"]
-            )
-        else:
-            values = self.outputs.quantile(q=(1.0 - self.confidence_level) / 2.0)
+        # return the mean outputs for each partition without aggregating
+        # further, and determine the lower confidence bound based on the standard
+        # error of the mean and the desired confidence level
+
+        values = (
+            self.outputs.loc["mean"]
+            + stats.norm.ppf((1.0 - self.confidence_level) / 2.0)
+            * self.outputs.loc["sem"]
+        )
+
         return values.rename(UnivariateSimulationResult.COL_LOWER_BOUND)
 
     def outputs_upper_bound(self) -> pd.Series:
@@ -198,28 +177,19 @@ def outputs_upper_bound(self) -> pd.Series:
         Calculate the lower CI bounds of the distribution of simulation outcomes,
         for every partition.
 
-        :return: a series of medians, indexed by the central values of the partitions
-            for which the simulation was run
+        :return: a series of upper CI bounds, indexed by the central values of the
+            partitions for which the simulation was run
         """
-        if self._full_sample:
-            # experimental feature: we only simulated using one model fit on the full
-            # sample; return the mean outputs for each partition without aggregating
-            # further, and determine the upper confidence bound based on the standard
-            # error of the mean and the desired confidence level
-            values = (
-                self.outputs.loc["mean"]
-                - stats.norm.ppf((1.0 - self.confidence_level) / 2.0)
-                * self.outputs.loc["sem"]
-            )
-        else:
-            values = self.outputs.quantile(q=1.0 - (1.0 - self.confidence_level) / 2.0)
-        return values.rename(UnivariateSimulationResult.COL_UPPER_BOUND)
+        # return the mean outputs for each partition without aggregating
+        # further, and determine the upper confidence bound based on the standard
+        # error of the mean and the desired confidence level
+        values = (
+            self.outputs.loc["mean"]
+            - stats.norm.ppf((1.0 - self.confidence_level) / 2.0)
+            * self.outputs.loc["sem"]
+        )
 
-    @property
-    def _full_sample(self) -> bool:
-        # experimental _full sample_ feature is active iff the name of the row index
-        # is "metric"
-        return self.outputs.index.name == "metric"
+        return values.rename(UnivariateSimulationResult.COL_UPPER_BOUND)
 
 
 class BaseUnivariateSimulator(
@@ -229,9 +199,6 @@ class BaseUnivariateSimulator(
     Base class for univariate simulations.
     """
 
-    #: The name of the row index of attribute :attr:`.output`, denoting splits.
-    IDX_SPLIT = "split"
-
     #: The name of the column index of attribute :attr:`.output`, denoting partitions
     #: represented by their central values or by a category.
     IDX_PARTITION = "partition"
@@ -239,8 +206,8 @@ class BaseUnivariateSimulator(
     #: The name of a series of simulated outputs.
     COL_OUTPUT = "output"
 
-    #: The crossfit used to conduct simulations
-    crossfit: LearnerCrossfit[T_LearnerPipelineDF]
+    #: The learner pipeline used to conduct simulations
+    model: T_LearnerPipelineDF
 
     #: The sample used in baseline calculations and simulations; this is the full sample
     #: from the :attr:`.crossfit`, or a subsample thereof
@@ -252,9 +219,9 @@ class BaseUnivariateSimulator(
 
     def __init__(
         self,
-        crossfit: LearnerCrossfit[T_LearnerPipelineDF],
+        model: T_LearnerPipelineDF,
+        sample: Sample,
         *,
-        subsample: Optional[pd.Index] = None,
         confidence_level: float = 0.95,
         n_jobs: Optional[int] = None,
         shared_memory: Optional[bool] = None,
@@ -262,15 +229,10 @@ def __init__(
         verbose: Optional[int] = None,
     ) -> None:
         """
-        :param crossfit: cross-validated crossfit of a model for all observations
-            in a given sample
-        :param subsample: an optional index referencing a subset of the training sample
-            to be used in baseline calculations and simulations
+        :param model: a fitted learner to use for calculating simulated outputs
+        :param sample: the sample to be used for baseline calculations and simulations
         :param confidence_level: the width :math:`\\alpha` of the confidence interval
-            determined by bootstrapping, with :math:`0 < \\alpha < 1`;
-            for reliable CI estimates the number of splits in the crossfit should be
-            at least :math:`n = \\frac{50}{1 - \\alpha}`, e.g. :math:`n = 1000` for
-            :math:`\\alpha = 0.95`
+            to be estimated for simulation results
         """
         super().__init__(
             n_jobs=n_jobs,
@@ -279,16 +241,16 @@ def __init__(
             verbose=verbose,
         )
 
-        if not isinstance(crossfit.pipeline, self._expected_pipeline_type()):
+        if not isinstance(model, self._expected_pipeline_type()):
             raise TypeError(
                 "arg crossfit must fit a pipeline of type "
                 f"{self._expected_pipeline_type().__name__}."
             )
 
-        if not crossfit.is_fitted:
-            raise ValueError("arg crossfit expected to be fitted")
+        if not model.is_fitted:
+            raise ValueError("arg model must be fitted")
 
-        if isinstance(crossfit.sample_.target_name, list):
+        if isinstance(sample.target_name, list):
             raise NotImplementedError("multi-output simulations are not supported")
 
         if not 0.0 < confidence_level < 1.0:
@@ -297,32 +259,7 @@ def __init__(
                 "must range between 0.0 and 1.0 (exclusive)"
             )
 
-        if not isinstance(crossfit.cv, BaseBootstrapCV):
-            log.warning(
-                "arg crossfit.cv should be a bootstrap cross-validator "
-                f"but is a {type(crossfit.cv).__name__}"
-            )
-
-        min_splits = int(50 / (1.0 - confidence_level))
-        if len(crossfit) < min_splits:
-            log.warning(
-                f"at least {min_splits} bootstrap splits are recommended for "
-                f"reliable results with arg confidence_level={confidence_level}, "
-                f"but arg crossfit.cv has only {len(crossfit)} splits"
-            )
-
-        sample = crossfit.sample_
-
-        if subsample is not None:
-            unknown_observations = subsample.difference(sample.index)
-            if len(unknown_observations) > 0:
-                raise ValueError(
-                    "arg subsample includes indices not contained "
-                    f"in the simulation sample: {unknown_observations.to_list()}"
-                )
-            sample = sample.subsample(loc=subsample)
-
-        self.crossfit = crossfit
+        self.model = model
         self.sample = sample
         self.confidence_level = confidence_level
 
@@ -365,34 +302,6 @@ def simulate_feature(
             ),
         )
 
-    def simulate_actuals(self) -> pd.Series:
-        r"""
-        For each test split :math:`\mathrm{T}_i` in this simulator's
-        crossfit, predict the outputs for all test samples given their actual
-        feature values, and calculate the absolute deviation from the mean of all actual
-        outputs of the entire sample
-        :math:`\frac{1}{n}\sum_{j \in \mathrm{T}_i}\hat y_j - \bar y`.
-
-        The spread and offset of these deviations can serve as an indication of how the
-        bias of the model contributes to the uncertainty of simulations produced with
-        method :meth:`.simulate_feature`.
-
-        :return: series mapping split IDs to deviations of simulated mean outputs
-        """
-
-        y_mean = self.expected_output()
-
-        result: List[float] = JobRunner.from_parallelizable(self).run_jobs(
-            Job.delayed(self._simulate_actuals)(
-                model, subsample.features, y_mean, self._simulate
-            )
-            for model, subsample in self._get_simulations()
-        )
-
-        return pd.Series(
-            data=result, name=BaseUnivariateSimulator.COL_OUTPUT
-        ).rename_axis(index=BaseUnivariateSimulator.IDX_SPLIT)
-
     @property
     @abstractmethod
     def output_unit(self) -> str:
@@ -448,97 +357,34 @@ def _simulate_feature_with_values(
         if feature_name not in self.sample.features.columns:
             raise ValueError(f"feature not in sample: {feature_name}")
 
+        model = self.model
+        sample = self.sample
         # for each split, calculate the mean simulation outputs and the standard error
         # of each mean
-        simulation_means_and_sems_per_split: List[
-            Tuple[Sequence[float], Sequence[float]]
-        ] = JobRunner.from_parallelizable(self).run_jobs(
-            Job.delayed(UnivariateUpliftSimulator._simulate_values_for_split)(
-                model=model,
-                subsample=subsample,
-                feature_name=feature_name,
-                simulated_values=simulation_values,
-                simulate_fn=self._simulate,
-            )
-            for (model, subsample) in self._get_simulations()
+        mean: Sequence[float]
+        sem: Sequence[float]
+        mean, sem = UnivariateUpliftSimulator._simulate_values_for_split(
+            model=model,
+            subsample=sample,
+            feature_name=feature_name,
+            simulated_values=simulation_values,
+            simulate_fn=self._simulate,
         )
 
         index_name: str
         index: Optional[List[str]]
         simulation_results_per_split: List[List[float]]
 
-        if self._full_sample:
-            # experimental "full sample" feature: we only worked with one split
-            # (which is the full sample); for that split we preserve the means and
-            # standard errors of the means for each partition
-            assert len(simulation_means_and_sems_per_split) == 1
-            simulation_results_per_split = [
-                # convert mean and sem tuple to a list
-                list(seq_result)
-                for seq_result in simulation_means_and_sems_per_split[0]
-            ]
-            index_name = "metric"
-            index = ["mean", "sem"]
-        else:
-            # existing approach: only keep the means for each split
-            simulation_results_per_split = [
-                list(seq_mean) for seq_mean, _ in simulation_means_and_sems_per_split
-            ]
-            index_name = BaseUnivariateSimulator.IDX_SPLIT
-            index = None
+        index_name = "metric"
+        index = ["mean", "sem"]
 
         return pd.DataFrame(
-            simulation_results_per_split, columns=simulation_values, index=index
+            [mean, sem], columns=simulation_values, index=index
         ).rename_axis(
             index=index_name,
             columns=BaseUnivariateSimulator.IDX_PARTITION,
         )
 
-    def _get_simulations(self) -> Iterator[Tuple[T_LearnerPipelineDF, Sample]]:
-        sample = self.sample
-        # we don't need duplicate indices to calculate the intersection
-        # with the samples of the test split, so we drop them
-        sample_index = sample.index.unique()
-
-        if self._full_sample:
-            # experimental flag: if `True`, simulate on full sample using all data
-            xf_sample: Sample = self.crossfit.sample_
-            return iter(
-                (
-                    (
-                        self.crossfit.pipeline.clone().fit(
-                            X=xf_sample.features,
-                            y=xf_sample.target,
-                            sample_weight=xf_sample.weight,
-                        ),
-                        sample,
-                    ),
-                )
-            )
-
-        xf_sample_index = self.crossfit.sample_.index
-        return (
-            (model, subsample)
-            for model, subsample in zip(
-                self.crossfit.models(),
-                (
-                    (
-                        sample.subsample(
-                            loc=sample_index.intersection(xf_sample_index[test_indices])
-                        )
-                    )
-                    for _, test_indices in self.crossfit.splits()
-                ),
-            )
-            if len(subsample)
-        )
-
-    @property
-    def _full_sample(self) -> Sample:
-        # experimental flag: if `True`, simulate on full sample using all data
-        full_sample = getattr(self, "full_sample", False)
-        return full_sample
-
     @staticmethod
     def _simulate_values_for_split(
         model: LearnerDF,
@@ -575,15 +421,6 @@ def _simulate_values_for_split(
         outputs_mean, outputs_sem = zip(*outputs_mean_sem)
         return outputs_mean, outputs_sem
 
-    @staticmethod
-    def _simulate_actuals(
-        model: LearnerDF,
-        x: pd.DataFrame,
-        y_mean: float,
-        simulate_fn: Callable[[LearnerDF, pd.DataFrame], pd.Series],
-    ) -> float:
-        return simulate_fn(model, x).mean() - y_mean
-
 
 @inheritdoc(match="[see superclass]")
 class UnivariateProbabilitySimulator(BaseUnivariateSimulator[ClassifierPipelineDF]):
@@ -632,7 +469,7 @@ def _positive_class(self) -> Any:
         """
         The label of the positive class of the binary classifier being simulated.
         """
-        classifier = self.crossfit.pipeline.final_estimator
+        classifier = self.model.final_estimator
 
         try:
             return classifier.classes_[-1]
@@ -704,7 +541,7 @@ class UnivariateTargetSimulator(_UnivariateRegressionSimulator):
     @property
     def output_unit(self) -> str:
         """[see superclass]"""
-        return f"Mean predicted target ({self.crossfit.sample_.target_name})"
+        return f"Mean predicted target ({self.sample.target_name})"
 
 
 @inheritdoc(match="[see superclass]")
@@ -735,7 +572,7 @@ class UnivariateUpliftSimulator(_UnivariateRegressionSimulator):
     @property
     def output_unit(self) -> str:
         """[see superclass]"""
-        return f"Mean predicted uplift ({self.crossfit.sample_.target_name})"
+        return f"Mean predicted uplift ({self.sample.target_name})"
 
     def baseline(self) -> float:
         """
@@ -755,12 +592,10 @@ def simulate_feature(
         result = super().simulate_feature(
             feature_name=feature_name, partitioner=partitioner
         )
-        if self._full_sample:
-            # we only offset the mean values, but not the standard errors of the means
-            # (which are relative values already so don't need to be offset)
-            result.outputs.loc["mean"] -= self.expected_output()
-        else:
-            result.outputs -= self.expected_output()
+        # we only offset the mean values, but not the standard errors of the means
+        # (which are relative values already so don't need to be offset)
+        result.outputs.loc["mean"] -= self.expected_output()
+
         return result
 
 
diff --git a/src/facet/simulation/viz/_draw.py b/src/facet/simulation/viz/_draw.py
index 36f28c6d..f082f306 100644
--- a/src/facet/simulation/viz/_draw.py
+++ b/src/facet/simulation/viz/_draw.py
@@ -90,7 +90,7 @@ def _draw(self, data: UnivariateSimulationResult) -> None:
             Sequence[Any],
             Sequence[int],
         ] = (
-            data.outputs_median().to_list(),
+            data.outputs_mean().to_list(),
             data.outputs_lower_bound().to_list(),
             data.outputs_upper_bound().to_list(),
             data.partitioner.partitions_,
diff --git a/test/test/conftest.py b/test/test/conftest.py
index bc584ab6..01f166fe 100644
--- a/test/test/conftest.py
+++ b/test/test/conftest.py
@@ -381,6 +381,13 @@ def iris_classifier_ranker_dual_target(
     )
 
 
+@pytest.fixture
+def iris_classifier_model_binary(
+    iris_classifier_ranker_binary: LearnerRanker[ClassifierPipelineDF],
+) -> ClassifierPipelineDF[RandomForestClassifierDF]:
+    return iris_classifier_ranker_binary.best_model_
+
+
 @pytest.fixture
 def iris_classifier_crossfit_binary(
     iris_classifier_ranker_binary: LearnerRanker[ClassifierPipelineDF],
diff --git a/test/test/facet/test_simulation.py b/test/test/facet/test_simulation.py
index c7380275..083c2dd1 100644
--- a/test/test/facet/test_simulation.py
+++ b/test/test/facet/test_simulation.py
@@ -11,7 +11,6 @@
 from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
 from sklearndf.regression.extra import LGBMRegressorDF
 
-from facet.crossfit import LearnerCrossfit
 from facet.data import Sample
 from facet.data.partition import ContinuousRangePartitioner
 from facet.simulation import (
@@ -21,7 +20,6 @@
     UnivariateUpliftSimulator,
 )
 from facet.simulation.viz import SimulationDrawer
-from facet.validation import StationaryBootstrapCV
 
 log = logging.getLogger(__name__)
 
@@ -29,52 +27,42 @@
 
 
 @pytest.fixture
-def crossfit(
-    sample: Sample, simple_preprocessor: TransformerDF, n_jobs: int
-) -> LearnerCrossfit:
+def model(sample: Sample, simple_preprocessor: TransformerDF) -> RegressorPipelineDF:
     # use a pre-optimised model
-    return LearnerCrossfit(
-        pipeline=RegressorPipelineDF(
-            preprocessing=simple_preprocessor,
-            regressor=LGBMRegressorDF(
-                max_depth=10, min_split_gain=0.2, num_leaves=50, random_state=42
-            ),
+    return RegressorPipelineDF(
+        preprocessing=simple_preprocessor,
+        regressor=LGBMRegressorDF(
+            max_depth=10, min_split_gain=0.2, num_leaves=50, random_state=42
         ),
-        cv=StationaryBootstrapCV(n_splits=N_SPLITS, random_state=42),
-        n_jobs=n_jobs,
-    ).fit(sample=sample)
+    ).fit(X=sample.features, y=sample.target)
 
 
 @pytest.fixture
-def subsample() -> pd.Index:
-    return pd.Index(
-        [8, 77, 65, 43, 43, 85, 8, 69, 20, 9, 52, 97, 73, 76, 71, 78]
-        + [51, 12, 83, 45, 50, 37, 18, 92, 78, 64, 40, 82, 54, 44, 45, 22]
-        + [9, 55, 88, 6, 85, 82, 27, 63, 16, 75, 70, 35, 6, 97, 44, 89, 67, 77]
+def subsample(sample: Sample) -> Sample:
+    return sample.subsample(
+        iloc=(
+            [8, 77, 65, 43, 43, 85, 8, 69, 20, 9, 52, 97, 73, 76, 71, 78]
+            + [51, 12, 83, 45, 50, 37, 18, 92, 78, 64, 40, 82, 54, 44, 45, 22]
+            + [9, 55, 88, 6, 85, 82, 27, 63, 16, 75, 70, 35, 6, 97, 44, 89, 67, 77]
+        )
     )
 
 
 @pytest.fixture
 def target_simulator(
-    crossfit: LearnerCrossfit, n_jobs: int
+    model: RegressorPipelineDF, sample: Sample, n_jobs: int
 ) -> UnivariateTargetSimulator:
     return UnivariateTargetSimulator(
-        crossfit=crossfit,
-        confidence_level=0.8,
-        n_jobs=n_jobs,
-        verbose=50,
+        model=model, sample=sample, confidence_level=0.8, n_jobs=n_jobs, verbose=50
     )
 
 
 @pytest.fixture
 def uplift_simulator(
-    crossfit: LearnerCrossfit, n_jobs: int
+    model: RegressorPipelineDF, sample: Sample, n_jobs: int
 ) -> UnivariateUpliftSimulator:
     return UnivariateUpliftSimulator(
-        crossfit=crossfit,
-        confidence_level=0.8,
-        n_jobs=n_jobs,
-        verbose=50,
+        model=model, sample=sample, confidence_level=0.8, n_jobs=n_jobs, verbose=50
     )
 
 
@@ -90,16 +78,7 @@ def test_univariate_target_simulation(
         partitioner=partitioner,
     )
 
-    values = simulation_result.outputs.values
-
-    # test aggregated values
-    # the values on the right were computed from correct runs
-    assert values.min() == approx(18.47276)
-    assert values.mean() == approx(22.63754)
-    assert values.max() == approx(28.47179)
-
-    # test the first five rows of aggregated_results
-    # the values were computed from a correct run
+    # test simulation results
 
     index = pd.Index(
         data=[0.0, 5.0, 10.0, 15.0, 20.0, 25.0],
@@ -109,17 +88,17 @@ def test_univariate_target_simulation(
     assert_series_equal(
         simulation_result.outputs_lower_bound(),
         pd.Series(
-            [22.431173, 22.431173, 19.789556, 18.853876, 18.853876, 18.853876],
+            [24.98646, 24.98646, 21.15398, 20.23877, 20.23877, 20.23877],
             name=UnivariateSimulationResult.COL_LOWER_BOUND,
             index=index,
         ),
     )
 
     assert_series_equal(
-        simulation_result.outputs_median(),
+        simulation_result.outputs_mean(),
         pd.Series(
-            [25.782475, 25.782475, 22.310836, 21.302304, 21.011027, 21.011027],
-            name=UnivariateSimulationResult.COL_MEDIAN,
+            [25.4571, 25.4571, 21.67744, 20.81063, 20.81063, 20.81063],
+            name=UnivariateSimulationResult.COL_MEAN,
             index=index,
         ),
     )
@@ -127,7 +106,7 @@ def test_univariate_target_simulation(
     assert_series_equal(
         simulation_result.outputs_upper_bound(),
         pd.Series(
-            [27.750435, 27.750435, 23.621475, 23.031676, 22.906156, 22.906156],
+            [25.92774, 25.92774, 22.2009, 21.38249, 21.38249, 21.38249],
             name=UnivariateSimulationResult.COL_UPPER_BOUND,
             index=index,
         ),
@@ -144,36 +123,15 @@ def test_univariate_target_simulation(
     )
 
 
-def test_univariate_target_subsample_simulation(
-    crossfit: LearnerCrossfit, subsample: pd.Index, n_jobs: int
+def test_univariate_target_subsample_simulation_80(
+    model: RegressorPipelineDF, subsample: Sample, n_jobs: int
 ) -> None:
 
     parameterized_feature = "LSTAT"
     partitioner = ContinuousRangePartitioner(max_partitions=10)
 
-    sample_index = crossfit.sample_.index
-
-    with pytest.raises(
-        ValueError,
-        match=(
-            "arg subsample includes indices not contained in the simulation sample: "
-            r"\[-1, 9999\]"
-        ),
-    ):
-        UnivariateTargetSimulator(
-            crossfit=crossfit,
-            subsample=pd.Index([*sample_index, -1, 9999]),
-        ).simulate_feature(
-            feature_name=parameterized_feature,
-            partitioner=partitioner,
-        )
-
     target_simulator = UnivariateTargetSimulator(
-        crossfit=crossfit,
-        subsample=subsample,
-        confidence_level=0.8,
-        n_jobs=n_jobs,
-        verbose=50,
+        model=model, sample=subsample, confidence_level=0.8, n_jobs=n_jobs, verbose=50
     )
 
     simulation_result: UnivariateSimulationResult = target_simulator.simulate_feature(
@@ -181,16 +139,7 @@ def test_univariate_target_subsample_simulation(
         partitioner=partitioner,
     )
 
-    values = simulation_result.outputs.values
-
-    # test aggregated values
-    # the values on the right were computed from correct runs
-    assert values.min() == approx(17.92365)
-    assert values.mean() == approx(23.30506)
-    assert values.max() == approx(28.60988)
-
-    # test the first five rows of aggregated_results
-    # the values were computed from a correct run
+    # test simulation results
 
     index = pd.Index(
         data=[2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0],
@@ -200,19 +149,19 @@ def test_univariate_target_subsample_simulation(
     assert_series_equal(
         simulation_result.outputs_lower_bound(),
         pd.Series(
-            [22.233849, 22.233849, 22.233849, 20.942154, 19.444643]
-            + [19.363522, 18.300420, 18.300420, 18.300420],
+            [25.05676, 25.05676, 25.05676, 22.96243, 21.43395]
+            + [21.21544, 20.76824, 20.49282, 20.49282],
             name=UnivariateSimulationResult.COL_LOWER_BOUND,
             index=index,
         ),
     )
 
     assert_series_equal(
-        simulation_result.outputs_median(),
+        simulation_result.outputs_mean(),
         pd.Series(
-            [25.913666, 25.913666, 25.913666, 24.445583, 22.575495]
-            + [22.403473, 22.288344, 21.642255, 21.430772],
-            name=UnivariateSimulationResult.COL_MEDIAN,
+            [25.642227, 25.642227, 25.642227, 23.598706, 22.067057]
+            + [21.864828, 21.451056, 21.195954, 21.195954],
+            name=UnivariateSimulationResult.COL_MEAN,
             index=index,
         ),
     )
@@ -220,8 +169,8 @@ def test_univariate_target_subsample_simulation(
     assert_series_equal(
         simulation_result.outputs_upper_bound(),
         pd.Series(
-            [28.230187, 28.230187, 28.230187, 25.805393, 24.296859]
-            + [24.221809, 24.174851, 23.640126, 23.640126],
+            [26.22769, 26.22769, 26.22769, 24.23498, 22.70016]
+            + [22.51422, 22.13387, 21.89909, 21.89909],
             name=UnivariateSimulationResult.COL_UPPER_BOUND,
             index=index,
         ),
@@ -238,30 +187,23 @@ def test_univariate_target_subsample_simulation(
     )
 
 
-def test_univariate_uplift_subsample_simulation_full_sample(
-    crossfit: LearnerCrossfit, subsample: pd.Index, n_jobs: int
+def test_univariate_uplift_subsample_simulation_95(
+    model: RegressorPipelineDF, subsample: Sample, n_jobs: int
 ) -> None:
 
     parameterized_feature = "LSTAT"
     partitioner = ContinuousRangePartitioner(max_partitions=10)
 
     target_simulator = UnivariateUpliftSimulator(
-        crossfit=crossfit,
-        subsample=subsample,
-        confidence_level=0.95,
-        n_jobs=n_jobs,
-        verbose=50,
+        model=model, sample=subsample, confidence_level=0.95, n_jobs=n_jobs, verbose=50
     )
 
-    target_simulator.full_sample = True
-
     simulation_result: UnivariateSimulationResult = target_simulator.simulate_feature(
         feature_name=parameterized_feature,
         partitioner=partitioner,
     )
 
-    # test the first five rows of aggregated_results
-    # the values were computed from a correct run
+    # test simulation results
 
     index = pd.Index(
         data=[2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0],
@@ -279,11 +221,11 @@ def test_univariate_uplift_subsample_simulation_full_sample(
     )
 
     assert_series_equal(
-        simulation_result.outputs_median().round(6),
+        simulation_result.outputs_mean().round(6),
         pd.Series(
             [2.696227, 2.696227, 2.696227, 0.652706, -0.878943]
             + [-1.081172, -1.494944, -1.750046, -1.750046],
-            name=UnivariateSimulationResult.COL_MEDIAN,
+            name=UnivariateSimulationResult.COL_MEAN,
             index=index,
         ),
     )
@@ -309,21 +251,6 @@ def test_univariate_uplift_subsample_simulation_full_sample(
     )
 
 
-def test_actuals_simulation(uplift_simulator: UnivariateUpliftSimulator) -> None:
-
-    assert_series_equal(
-        uplift_simulator.simulate_actuals(),
-        pd.Series(
-            index=pd.RangeIndex(10, name=UnivariateUpliftSimulator.IDX_SPLIT),
-            data=(
-                [3.207810, 1.807740, 0.709917, -2.392966, 1.530005]
-                + [-2.394199, 1.389225, -3.261376, 2.248752, 1.226377]
-            ),
-            name=UnivariateUpliftSimulator.COL_OUTPUT,
-        ),
-    )
-
-
 def test_univariate_uplift_simulation(
     uplift_simulator: UnivariateUpliftSimulator,
 ) -> None:
@@ -336,18 +263,7 @@ def test_univariate_uplift_simulation(
         partitioner=partitioner,
     )
 
-    absolute_target_change_df: pd.DataFrame = simulation_result.outputs
-
-    values = absolute_target_change_df.values
-
-    # test aggregated values
-    # the values on the right were computed from correct runs
-    assert values.min() == approx(-3.83624)
-    assert values.mean() == approx(0.3285436)
-    assert values.max() == approx(6.16279)
-
-    # test the first five rows of aggregated_results
-    # the values were computed from a correct run
+    # test simulation results
 
     index = pd.Index(
         data=[0.0, 5.0, 10.0, 15.0, 20.0, 25.0],
@@ -357,17 +273,17 @@ def test_univariate_uplift_simulation(
     assert_series_equal(
         simulation_result.outputs_lower_bound(),
         pd.Series(
-            [0.122173, 0.122173, -2.519444, -3.455124, -3.455124, -3.455124],
+            [2.677461, 2.677461, -1.155017, -2.070234, -2.070234, -2.070234],
             name=UnivariateSimulationResult.COL_LOWER_BOUND,
             index=index,
         ),
     )
 
     assert_series_equal(
-        simulation_result.outputs_median(),
+        simulation_result.outputs_mean(),
         pd.Series(
-            [3.473475, 3.473475, 0.00183626, -1.006696, -1.297973, -1.297973],
-            name=UnivariateSimulationResult.COL_MEDIAN,
+            [3.148100, 3.148100, -0.631560, -1.498371, -1.498371, -1.498371],
+            name=UnivariateSimulationResult.COL_MEAN,
             index=index,
         ),
     )
@@ -375,7 +291,7 @@ def test_univariate_uplift_simulation(
     assert_series_equal(
         simulation_result.outputs_upper_bound(),
         pd.Series(
-            [5.441435, 5.441435, 1.312475, 0.722676, 0.597156, 0.597156],
+            [3.618739, 3.618739, -0.108103, -0.926508, -0.926508, -0.926508],
             name=UnivariateSimulationResult.COL_UPPER_BOUND,
             index=index,
         ),
@@ -393,52 +309,21 @@ def test_univariate_uplift_simulation(
 
 
 def test_univariate_uplift_subsample_simulation(
-    crossfit: LearnerCrossfit, subsample: pd.Index, n_jobs: int
+    model: RegressorPipelineDF, subsample: Sample, n_jobs: int
 ) -> None:
 
     parameterized_feature = "LSTAT"
     partitioner = ContinuousRangePartitioner(max_partitions=10)
 
-    sample_index = crossfit.sample_.index
-
-    with pytest.raises(
-        ValueError,
-        match=(
-            "arg subsample includes indices not contained in the simulation sample: "
-            r"\[-1, 9999\]"
-        ),
-    ):
-        UnivariateUpliftSimulator(
-            crossfit=crossfit, subsample=pd.Index([*sample_index, -1, 9999])
-        ).simulate_feature(
-            feature_name=parameterized_feature,
-            partitioner=partitioner,
-        )
-
     uplift_simulator = UnivariateUpliftSimulator(
-        crossfit=crossfit,
-        subsample=subsample,
-        confidence_level=0.8,
-        n_jobs=n_jobs,
-        verbose=50,
+        model=model, sample=subsample, confidence_level=0.8, n_jobs=n_jobs, verbose=50
     )
 
     simulation_result: UnivariateSimulationResult = uplift_simulator.simulate_feature(
         feature_name=parameterized_feature, partitioner=partitioner
     )
 
-    absolute_target_change_df: pd.DataFrame = simulation_result.outputs
-
-    values = absolute_target_change_df.values
-
-    # test aggregated values
-    # the values on the right were computed from correct runs
-    assert values.min() == approx(-5.02235)
-    assert values.mean() == approx(0.359062)
-    assert values.max() == approx(5.66388)
-
-    # test the first five rows of aggregated_results
-    # the values were computed from a correct run
+    # test simulation results
 
     index = pd.Index(
         data=[2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0],
@@ -448,19 +333,19 @@ def test_univariate_uplift_subsample_simulation(
     assert_series_equal(
         simulation_result.outputs_lower_bound(),
         pd.Series(
-            [-0.712151, -0.712151, -0.712151, -2.003846, -3.501357]
-            + [-3.582478, -4.64558, -4.64558, -4.64558],
+            [2.110762, 2.110762, 2.110762, 0.0164306, -1.512048]
+            + [-1.730561, -2.177757, -2.453179, -2.453179],
             name=UnivariateSimulationResult.COL_LOWER_BOUND,
             index=index,
         ),
     )
 
     assert_series_equal(
-        simulation_result.outputs_median(),
+        simulation_result.outputs_mean(),
         pd.Series(
-            [2.967666, 2.967666, 2.967666, 1.499583, -0.370505]
-            + [-0.542527, -0.657656, -1.303745, -1.515228],
-            name=UnivariateSimulationResult.COL_MEDIAN,
+            [2.696227, 2.696227, 2.696227, 0.652706, -0.878943]
+            + [-1.081172, -1.494944, -1.750046, -1.750046],
+            name=UnivariateSimulationResult.COL_MEAN,
             index=index,
         ),
     )
@@ -468,8 +353,8 @@ def test_univariate_uplift_subsample_simulation(
     assert_series_equal(
         simulation_result.outputs_upper_bound(),
         pd.Series(
-            [5.284187, 5.284187, 5.284187, 2.859393, 1.350859]
-            + [1.275809, 1.228851, 0.694126, 0.694126],
+            [3.281693, 3.281693, 3.281693, 1.288981, -0.245838]
+            + [-0.431783, -0.81213, -1.046914, -1.046914],
             name=UnivariateSimulationResult.COL_UPPER_BOUND,
             index=index,
         ),
@@ -483,18 +368,18 @@ def test_univariate_uplift_subsample_simulation(
 
 
 def test_univariate_probability_simulation(
-    iris_classifier_crossfit_binary: LearnerCrossfit[
-        ClassifierPipelineDF[RandomForestClassifierDF]
-    ],
+    iris_classifier_model_binary: ClassifierPipelineDF[RandomForestClassifierDF],
+    iris_sample_binary: Sample,
     n_jobs: int,
 ) -> None:
     parameterized_feature = "sepal length (cm)"
     partitioner = ContinuousRangePartitioner(max_partitions=10)
 
-    print(iris_classifier_crossfit_binary.sample_.feature_names)
+    print(iris_sample_binary.feature_names)
 
     proba_simulator = UnivariateProbabilitySimulator(
-        crossfit=iris_classifier_crossfit_binary,
+        model=iris_classifier_model_binary,
+        sample=iris_sample_binary,
         confidence_level=0.95,
         n_jobs=n_jobs,
         verbose=50,
@@ -513,17 +398,17 @@ def test_univariate_probability_simulation(
     assert_series_equal(
         simulation_result.outputs_lower_bound(),
         pd.Series(
-            [0.346255, 0.346255, 0.353697, 0.394167, 0.401895, 0.417372, 0.417372],
+            [0.415337, 0.390766, 0.401039, 0.420727, 0.425914, 0.452885, 0.452885],
             name=UnivariateSimulationResult.COL_LOWER_BOUND,
             index=index,
         ),
     )
 
     assert_series_equal(
-        simulation_result.outputs_median(),
+        simulation_result.outputs_mean(),
         pd.Series(
-            [0.460432, 0.450516, 0.469412, 0.488569, 0.492651, 0.507788, 0.507788],
-            name=UnivariateSimulationResult.COL_MEDIAN,
+            [0.495814, 0.475288, 0.48689, 0.507294, 0.510055, 0.533888, 0.533888],
+            name=UnivariateSimulationResult.COL_MEAN,
             index=index,
         ),
     )
@@ -531,7 +416,7 @@ def test_univariate_probability_simulation(
     assert_series_equal(
         simulation_result.outputs_upper_bound(),
         pd.Series(
-            [0.582565, 0.562096, 0.570590, 0.580023, 0.599714, 0.602303, 0.602303],
+            [0.576292, 0.559809, 0.57274, 0.593862, 0.594196, 0.614892, 0.614892],
             name=UnivariateSimulationResult.COL_UPPER_BOUND,
             index=index,
         ),

From bd0f0ef87e41a816a32f20c4c3b9c777930b7aa3 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 19 Oct 2021 09:16:25 +0200
Subject: [PATCH 034/106] API: simplify UnivariateSimulationResult API;
 simplify simulator classes

---
 src/facet/simulation/_simulation.py | 288 +++++++++++++---------------
 src/facet/simulation/viz/_draw.py   |   6 +-
 test/test/facet/test_simulation.py  |  48 ++---
 3 files changed, 155 insertions(+), 187 deletions(-)

diff --git a/src/facet/simulation/_simulation.py b/src/facet/simulation/_simulation.py
index cfc25483..cecd908f 100644
--- a/src/facet/simulation/_simulation.py
+++ b/src/facet/simulation/_simulation.py
@@ -6,9 +6,8 @@
 from abc import ABCMeta, abstractmethod
 from typing import (
     Any,
-    Callable,
     Generic,
-    List,
+    Iterable,
     Optional,
     Sequence,
     Tuple,
@@ -23,12 +22,7 @@
 
 from pytools.api import AllTracker, inheritdoc
 from pytools.parallelization import ParallelizableMixin
-from sklearndf import LearnerDF
-from sklearndf.pipeline import (
-    ClassifierPipelineDF,
-    LearnerPipelineDF,
-    RegressorPipelineDF,
-)
+from sklearndf import ClassifierDF, LearnerDF, RegressorDF
 
 from ..data import Sample
 from ..data.partition import Partitioner
@@ -48,7 +42,7 @@
 # Type variables
 #
 
-T_LearnerPipelineDF = TypeVar("T_LearnerPipelineDF", bound=LearnerPipelineDF)
+T_LearnerDF = TypeVar("T_LearnerDF", bound=LearnerDF)
 T_Partition = TypeVar("T_Partition")
 
 
@@ -69,6 +63,24 @@ class UnivariateSimulationResult(Generic[T_Partition]):
     Summary result of a univariate simulation.
     """
 
+    #: The partitioner used to generate feature values to be simulated.
+    partitioner: Partitioner
+
+    #: The mean predictions for the values representing each partition.
+    mean: pd.Series
+
+    #: The standard errors of the mean predictions for the values representing each
+    # partition.
+    sem: pd.Series
+
+    #: The lower bounds of the confidence intervals for the mean predictions for the
+    # values representing each partition.
+    lower_bound: pd.Series
+
+    #: The upper bounds of the confidence intervals for the mean predictions for the
+    # values representing each partition.
+    upper_bound: pd.Series
+
     #: Name of the simulated feature.
     feature_name: str
 
@@ -85,16 +97,16 @@ class UnivariateSimulationResult(Generic[T_Partition]):
     #: determined by bootstrapping, with :math:`0 < \alpha < 1`.
     confidence_level: float
 
-    #: The partitioner used to generate feature values to be simulated.
-    partitioner: Partitioner
-
-    #: The matrix of simulated outcomes, with columns representing partitions
-    #: and rows representing bootstrap splits used to fit variations of the model.
-    outputs: pd.DataFrame
+    #: The name of the column index of attribute :attr:`.output`, denoting partitions
+    #: represented by their central values or by a category.
+    IDX_PARTITION = "partition"
 
     #: The name of a series of mean simulated values per partition.
     COL_MEAN = "mean"
 
+    #: The name of a series of standard errors of mean simulated values per partition.
+    COL_SEM = "sem"
+
     #: The name of a series of lower CI bounds of simulated values per partition.
     COL_LOWER_BOUND = "lower_bound"
 
@@ -104,15 +116,21 @@ class UnivariateSimulationResult(Generic[T_Partition]):
     def __init__(
         self,
         *,
+        partitioner: Partitioner,
+        mean: Sequence[float],
+        sem: Sequence[float],
         feature_name: str,
         output_name: str,
         output_unit: str,
         baseline: float,
         confidence_level: float,
-        partitioner: Partitioner,
-        outputs: pd.DataFrame,
     ) -> None:
         """
+        :param partitioner: the partitioner used to generate feature values to be
+            simulated
+        :param mean: mean predictions for the values representing each partition
+        :param sem: standard errors of the mean predictions for the values representing
+            each partition
         :param feature_name: name of the simulated feature
         :param output_name: name of the target for which outputs are simulated
         :param output_unit: the unit of the simulated outputs
@@ -121,38 +139,46 @@ def __init__(
             of the simulation
         :param confidence_level: the width of the confidence interval determined by
             bootstrapping, ranging between 0.0 and 1.0 (exclusive)
-        :param partitioner: the partitioner used to generate feature values to be
-            simulated
-        :param outputs: matrix of simulated outcomes, with columns representing
-            partitions and rows representing bootstrap splits used to fit variations
-            of the model
         """
         super().__init__()
 
-        assert outputs.index.name == "metric"
-        assert outputs.columns.name == BaseUnivariateSimulator.IDX_PARTITION
-        assert (
-            0.0 < confidence_level < 1.0
-        ), f"confidence_level={confidence_level} ranges between 0.0 and 1.0 (exclusive)"
+        if not partitioner.is_fitted:
+            raise ValueError("arg partitioner must be fitted")
+
+        n_partitions = len(partitioner.partitions_)
+
+        for seq, seq_name in [(mean, "mean"), (sem, "sem")]:
+            if len(seq) != n_partitions:
+                raise ValueError(
+                    f"length of arg {seq_name} must correspond to "
+                    f"the number of partitions (n={n_partitions})"
+                )
+
+        if not (0.0 < confidence_level < 1.0):
+            raise ValueError(
+                f"arg confidence_level={confidence_level} is not "
+                "in the range between 0.0 and 1.0 (exclusive)"
+            )
+
+        idx = pd.Index(
+            partitioner.partitions_, name=UnivariateSimulationResult.IDX_PARTITION
+        )
 
+        self.partitioner = partitioner
+        self.mean = pd.Series(mean, index=idx, name=UnivariateSimulationResult.COL_MEAN)
+        self.sem = pd.Series(sem, index=idx, name=UnivariateSimulationResult.COL_SEM)
         self.feature_name = feature_name
         self.output_name = output_name
         self.output_unit = output_unit
         self.baseline = baseline
         self.confidence_level = confidence_level
-        self.partitioner = partitioner
-        self.outputs = outputs
 
-    def outputs_mean(self) -> pd.Series:
-        """
-        Calculate the means of simulation outcomes for every partition.
-
-        :return: a series of means, indexed by the central values of the partitions
-            for which the simulation was run
-        """
-        return self.outputs.loc["mean"].rename(UnivariateSimulationResult.COL_MEAN)
+    def _ci_width(self) -> np.ndarray:
+        # get the width of the confidence interval
+        return -stats.norm.ppf((1.0 - self.confidence_level) / 2.0) * self.sem.values
 
-    def outputs_lower_bound(self) -> pd.Series:
+    @property
+    def lower_bound(self) -> pd.Series:
         """
         Calculate the lower CI bounds of the distribution of simulation outcomes,
         for every partition.
@@ -160,19 +186,13 @@ def outputs_lower_bound(self) -> pd.Series:
         :return: a series of lower CI bounds, indexed by the central values of the
             partitions for which the simulation was run
         """
-        # return the mean outputs for each partition without aggregating
-        # further, and determine the lower confidence bound based on the standard
-        # error of the mean and the desired confidence level
-
-        values = (
-            self.outputs.loc["mean"]
-            + stats.norm.ppf((1.0 - self.confidence_level) / 2.0)
-            * self.outputs.loc["sem"]
-        )
 
-        return values.rename(UnivariateSimulationResult.COL_LOWER_BOUND)
+        return (self.mean - self._ci_width()).rename(
+            UnivariateSimulationResult.COL_LOWER_BOUND
+        )
 
-    def outputs_upper_bound(self) -> pd.Series:
+    @property
+    def upper_bound(self) -> pd.Series:
         """
         Calculate the lower CI bounds of the distribution of simulation outcomes,
         for every partition.
@@ -180,34 +200,20 @@ def outputs_upper_bound(self) -> pd.Series:
         :return: a series of upper CI bounds, indexed by the central values of the
             partitions for which the simulation was run
         """
-        # return the mean outputs for each partition without aggregating
-        # further, and determine the upper confidence bound based on the standard
-        # error of the mean and the desired confidence level
-        values = (
-            self.outputs.loc["mean"]
-            - stats.norm.ppf((1.0 - self.confidence_level) / 2.0)
-            * self.outputs.loc["sem"]
+        return (self.mean + self._ci_width()).rename(
+            UnivariateSimulationResult.COL_UPPER_BOUND
         )
 
-        return values.rename(UnivariateSimulationResult.COL_UPPER_BOUND)
-
 
 class BaseUnivariateSimulator(
-    ParallelizableMixin, Generic[T_LearnerPipelineDF], metaclass=ABCMeta
+    ParallelizableMixin, Generic[T_LearnerDF], metaclass=ABCMeta
 ):
     """
     Base class for univariate simulations.
     """
 
-    #: The name of the column index of attribute :attr:`.output`, denoting partitions
-    #: represented by their central values or by a category.
-    IDX_PARTITION = "partition"
-
-    #: The name of a series of simulated outputs.
-    COL_OUTPUT = "output"
-
     #: The learner pipeline used to conduct simulations
-    model: T_LearnerPipelineDF
+    model: T_LearnerDF
 
     #: The sample used in baseline calculations and simulations; this is the full sample
     #: from the :attr:`.crossfit`, or a subsample thereof
@@ -219,7 +225,7 @@ class BaseUnivariateSimulator(
 
     def __init__(
         self,
-        model: T_LearnerPipelineDF,
+        model: T_LearnerDF,
         sample: Sample,
         *,
         confidence_level: float = 0.95,
@@ -241,10 +247,10 @@ def __init__(
             verbose=verbose,
         )
 
-        if not isinstance(model, self._expected_pipeline_type()):
+        if not isinstance(model, self._expected_learner_type()):
             raise TypeError(
                 "arg crossfit must fit a pipeline of type "
-                f"{self._expected_pipeline_type().__name__}."
+                f"{self._expected_learner_type().__name__}."
             )
 
         if not model.is_fitted:
@@ -283,23 +289,21 @@ def simulate_feature(
 
         sample = self.sample
 
+        mean, sem = self._simulate_feature_with_values(
+            feature_name=feature_name,
+            simulation_values=(
+                partitioner.fit(sample.features.loc[:, feature_name]).partitions_
+            ),
+        )
         return UnivariateSimulationResult(
+            partitioner=partitioner,
+            mean=mean,
+            sem=sem,
             feature_name=feature_name,
             output_name=sample.target_name,
             output_unit=self.output_unit,
             baseline=self.baseline(),
             confidence_level=self.confidence_level,
-            partitioner=partitioner,
-            outputs=(
-                self._simulate_feature_with_values(
-                    feature_name=feature_name,
-                    simulation_values=(
-                        partitioner.fit(
-                            sample.features.loc[:, feature_name]
-                        ).partitions_
-                    ),
-                )
-            ),
         )
 
     @property
@@ -330,19 +334,24 @@ def expected_output(self) -> float:
 
     @staticmethod
     @abstractmethod
-    def _expected_pipeline_type() -> Type[T_LearnerPipelineDF]:
+    def _expected_learner_type() -> Type[T_LearnerDF]:
         pass
 
     @staticmethod
     @abstractmethod
-    def _simulate(model: T_LearnerPipelineDF, x: pd.DataFrame) -> pd.Series:
+    def _simulate(model: T_LearnerDF, x: pd.DataFrame) -> Tuple[float, float]:
         pass
 
+    @staticmethod
+    def _aggregate(predictions: pd.Series) -> Tuple[float, float]:
+        # generate summary stats for a series of predictions
+        return predictions.mean(), predictions.sem()
+
     def _simulate_feature_with_values(
         self,
         feature_name: str,
         simulation_values: Sequence[T_Partition],
-    ) -> pd.DataFrame:
+    ) -> Tuple[Sequence[float], Sequence[float]]:
         """
         Run a simulation on a feature.
 
@@ -351,79 +360,40 @@ def _simulate_feature_with_values(
 
         :param feature_name: name of the feature to use in the simulation
         :param simulation_values: values to use in the simulation
-        :return: data frame with splits as rows and partitions as columns.
+        :return: a tuple with mean predictions and standard errors of mean predictions
+            for each partition
         """
 
         if feature_name not in self.sample.features.columns:
             raise ValueError(f"feature not in sample: {feature_name}")
 
-        model = self.model
-        sample = self.sample
-        # for each split, calculate the mean simulation outputs and the standard error
-        # of each mean
-        mean: Sequence[float]
-        sem: Sequence[float]
-        mean, sem = UnivariateUpliftSimulator._simulate_values_for_split(
-            model=model,
-            subsample=sample,
-            feature_name=feature_name,
-            simulated_values=simulation_values,
-            simulate_fn=self._simulate,
-        )
-
-        index_name: str
-        index: Optional[List[str]]
-        simulation_results_per_split: List[List[float]]
-
-        index_name = "metric"
-        index = ["mean", "sem"]
-
-        return pd.DataFrame(
-            [mean, sem], columns=simulation_values, index=index
-        ).rename_axis(
-            index=index_name,
-            columns=BaseUnivariateSimulator.IDX_PARTITION,
-        )
-
-    @staticmethod
-    def _simulate_values_for_split(
-        model: LearnerDF,
-        subsample: Sample,
-        feature_name: str,
-        simulated_values: Optional[Sequence[Any]],
-        simulate_fn: Callable[[LearnerDF, pd.DataFrame], pd.Series],
-    ) -> Tuple[Sequence[float], Sequence[float]]:
-        # for a list of values to be simulated, return a sequence of mean outputs
+        # for a list of values to be simulated, calculate a sequence of mean predictions
         # and a sequence of standard errors of those means
-
-        n_observations = len(subsample)
-        features = subsample.features
+        features = self.sample.features
         feature_dtype = features.loc[:, feature_name].dtype
 
-        outputs_mean_sem: List[Tuple[float, float]] = [
-            (outputs_sr.mean(), outputs_sr.sem())
-            for outputs_sr in (
-                simulate_fn(
-                    model,
-                    features.assign(
-                        **{
-                            feature_name: np.full(
-                                shape=n_observations,
-                                fill_value=value,
-                                dtype=feature_dtype,
-                            )
-                        }
-                    ),
-                )
-                for value in simulated_values
+        outputs_mean_sem: Iterable[Tuple[float, float]] = (
+            self._simulate(
+                self.model,
+                features.assign(
+                    **{
+                        feature_name: np.full(
+                            shape=len(features),
+                            fill_value=value,
+                            dtype=feature_dtype,
+                        )
+                    }
+                ),
             )
-        ]
+            for value in simulation_values
+        )
+
         outputs_mean, outputs_sem = zip(*outputs_mean_sem)
         return outputs_mean, outputs_sem
 
 
 @inheritdoc(match="[see superclass]")
-class UnivariateProbabilitySimulator(BaseUnivariateSimulator[ClassifierPipelineDF]):
+class UnivariateProbabilitySimulator(BaseUnivariateSimulator[ClassifierDF]):
     """
     Univariate simulation of positive class probabilities based on a binary classifier.
 
@@ -469,7 +439,7 @@ def _positive_class(self) -> Any:
         """
         The label of the positive class of the binary classifier being simulated.
         """
-        classifier = self.model.final_estimator
+        classifier = self.model
 
         try:
             return classifier.classes_[-1]
@@ -481,19 +451,19 @@ def _positive_class(self) -> Any:
             return "positive class"
 
     @staticmethod
-    def _expected_pipeline_type() -> Type[ClassifierPipelineDF]:
-        return ClassifierPipelineDF
+    def _expected_learner_type() -> Type[ClassifierDF]:
+        return ClassifierDF
 
     @staticmethod
-    def _simulate(model: ClassifierPipelineDF, x: pd.DataFrame) -> pd.Series:
+    def _simulate(model: ClassifierDF, x: pd.DataFrame) -> Tuple[float, float]:
         probabilities: pd.DataFrame = model.predict_proba(x)
         if probabilities.shape[1] != 2:
             raise TypeError("only binary classifiers are supported")
-        return probabilities.iloc[:, 1]
+        return BaseUnivariateSimulator._aggregate(probabilities.iloc[:, 1])
 
 
 class _UnivariateRegressionSimulator(
-    BaseUnivariateSimulator[RegressorPipelineDF], metaclass=ABCMeta
+    BaseUnivariateSimulator[RegressorDF], metaclass=ABCMeta
 ):
     def expected_output(self) -> float:
         """
@@ -504,14 +474,14 @@ def expected_output(self) -> float:
         return self.sample.target.mean()
 
     @staticmethod
-    def _expected_pipeline_type() -> Type[RegressorPipelineDF]:
-        return RegressorPipelineDF
+    def _expected_learner_type() -> Type[RegressorDF]:
+        return RegressorDF
 
     @staticmethod
-    def _simulate(model: RegressorPipelineDF, x: pd.DataFrame) -> pd.Series:
+    def _simulate(model: RegressorDF, x: pd.DataFrame) -> Tuple[float, float]:
         predictions = model.predict(X=x)
         assert predictions.ndim == 1, "single-target regressor required"
-        return predictions
+        return BaseUnivariateSimulator._aggregate(predictions)
 
 
 @inheritdoc(match="[see superclass]")
@@ -583,18 +553,16 @@ def baseline(self) -> float:
         return 0.0
 
     def simulate_feature(
-        self,
-        feature_name: str,
-        *,
-        partitioner: Partitioner[T_Partition],
+        self, feature_name: str, *, partitioner: Partitioner[T_Partition]
     ) -> UnivariateSimulationResult:
         """[see superclass]"""
+
         result = super().simulate_feature(
             feature_name=feature_name, partitioner=partitioner
         )
-        # we only offset the mean values, but not the standard errors of the means
-        # (which are relative values already so don't need to be offset)
-        result.outputs.loc["mean"] -= self.expected_output()
+
+        # offset the mean values to get uplift instead of absolute outputs
+        result.mean -= self.expected_output()
 
         return result
 
diff --git a/src/facet/simulation/viz/_draw.py b/src/facet/simulation/viz/_draw.py
index f082f306..4be0351f 100644
--- a/src/facet/simulation/viz/_draw.py
+++ b/src/facet/simulation/viz/_draw.py
@@ -90,9 +90,9 @@ def _draw(self, data: UnivariateSimulationResult) -> None:
             Sequence[Any],
             Sequence[int],
         ] = (
-            data.outputs_mean().to_list(),
-            data.outputs_lower_bound().to_list(),
-            data.outputs_upper_bound().to_list(),
+            data.mean.to_list(),
+            data.lower_bound.to_list(),
+            data.upper_bound.to_list(),
             data.partitioner.partitions_,
             data.partitioner.frequencies_,
         )
diff --git a/test/test/facet/test_simulation.py b/test/test/facet/test_simulation.py
index 083c2dd1..c32acd26 100644
--- a/test/test/facet/test_simulation.py
+++ b/test/test/facet/test_simulation.py
@@ -82,11 +82,11 @@ def test_univariate_target_simulation(
 
     index = pd.Index(
         data=[0.0, 5.0, 10.0, 15.0, 20.0, 25.0],
-        name=UnivariateTargetSimulator.IDX_PARTITION,
+        name=UnivariateSimulationResult.IDX_PARTITION,
     )
 
     assert_series_equal(
-        simulation_result.outputs_lower_bound(),
+        simulation_result.lower_bound,
         pd.Series(
             [24.98646, 24.98646, 21.15398, 20.23877, 20.23877, 20.23877],
             name=UnivariateSimulationResult.COL_LOWER_BOUND,
@@ -95,7 +95,7 @@ def test_univariate_target_simulation(
     )
 
     assert_series_equal(
-        simulation_result.outputs_mean(),
+        simulation_result.mean,
         pd.Series(
             [25.4571, 25.4571, 21.67744, 20.81063, 20.81063, 20.81063],
             name=UnivariateSimulationResult.COL_MEAN,
@@ -104,7 +104,7 @@ def test_univariate_target_simulation(
     )
 
     assert_series_equal(
-        simulation_result.outputs_upper_bound(),
+        simulation_result.upper_bound,
         pd.Series(
             [25.92774, 25.92774, 22.2009, 21.38249, 21.38249, 21.38249],
             name=UnivariateSimulationResult.COL_UPPER_BOUND,
@@ -143,11 +143,11 @@ def test_univariate_target_subsample_simulation_80(
 
     index = pd.Index(
         data=[2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0],
-        name=UnivariateTargetSimulator.IDX_PARTITION,
+        name=UnivariateSimulationResult.IDX_PARTITION,
     )
 
     assert_series_equal(
-        simulation_result.outputs_lower_bound(),
+        simulation_result.lower_bound,
         pd.Series(
             [25.05676, 25.05676, 25.05676, 22.96243, 21.43395]
             + [21.21544, 20.76824, 20.49282, 20.49282],
@@ -157,7 +157,7 @@ def test_univariate_target_subsample_simulation_80(
     )
 
     assert_series_equal(
-        simulation_result.outputs_mean(),
+        simulation_result.mean,
         pd.Series(
             [25.642227, 25.642227, 25.642227, 23.598706, 22.067057]
             + [21.864828, 21.451056, 21.195954, 21.195954],
@@ -167,7 +167,7 @@ def test_univariate_target_subsample_simulation_80(
     )
 
     assert_series_equal(
-        simulation_result.outputs_upper_bound(),
+        simulation_result.upper_bound,
         pd.Series(
             [26.22769, 26.22769, 26.22769, 24.23498, 22.70016]
             + [22.51422, 22.13387, 21.89909, 21.89909],
@@ -207,11 +207,11 @@ def test_univariate_uplift_subsample_simulation_95(
 
     index = pd.Index(
         data=[2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0],
-        name=UnivariateTargetSimulator.IDX_PARTITION,
+        name=UnivariateSimulationResult.IDX_PARTITION,
     )
 
     assert_series_equal(
-        simulation_result.outputs_lower_bound().round(6),
+        simulation_result.lower_bound.round(6),
         pd.Series(
             [1.800835, 1.800835, 1.800835, -0.320393, -1.847194]
             + [-2.074327, -2.539217, -2.825394, -2.825394],
@@ -221,7 +221,7 @@ def test_univariate_uplift_subsample_simulation_95(
     )
 
     assert_series_equal(
-        simulation_result.outputs_mean().round(6),
+        simulation_result.mean.round(6),
         pd.Series(
             [2.696227, 2.696227, 2.696227, 0.652706, -0.878943]
             + [-1.081172, -1.494944, -1.750046, -1.750046],
@@ -231,7 +231,7 @@ def test_univariate_uplift_subsample_simulation_95(
     )
 
     assert_series_equal(
-        simulation_result.outputs_upper_bound().round(6),
+        simulation_result.upper_bound.round(6),
         pd.Series(
             [3.59162, 3.59162, 3.59162, 1.625805, 0.089307]
             + [-0.088017, -0.450671, -0.674698, -0.674698],
@@ -267,11 +267,11 @@ def test_univariate_uplift_simulation(
 
     index = pd.Index(
         data=[0.0, 5.0, 10.0, 15.0, 20.0, 25.0],
-        name=UnivariateUpliftSimulator.IDX_PARTITION,
+        name=UnivariateSimulationResult.IDX_PARTITION,
     )
 
     assert_series_equal(
-        simulation_result.outputs_lower_bound(),
+        simulation_result.lower_bound,
         pd.Series(
             [2.677461, 2.677461, -1.155017, -2.070234, -2.070234, -2.070234],
             name=UnivariateSimulationResult.COL_LOWER_BOUND,
@@ -280,7 +280,7 @@ def test_univariate_uplift_simulation(
     )
 
     assert_series_equal(
-        simulation_result.outputs_mean(),
+        simulation_result.mean,
         pd.Series(
             [3.148100, 3.148100, -0.631560, -1.498371, -1.498371, -1.498371],
             name=UnivariateSimulationResult.COL_MEAN,
@@ -289,7 +289,7 @@ def test_univariate_uplift_simulation(
     )
 
     assert_series_equal(
-        simulation_result.outputs_upper_bound(),
+        simulation_result.upper_bound,
         pd.Series(
             [3.618739, 3.618739, -0.108103, -0.926508, -0.926508, -0.926508],
             name=UnivariateSimulationResult.COL_UPPER_BOUND,
@@ -327,11 +327,11 @@ def test_univariate_uplift_subsample_simulation(
 
     index = pd.Index(
         data=[2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0],
-        name=UnivariateUpliftSimulator.IDX_PARTITION,
+        name=UnivariateSimulationResult.IDX_PARTITION,
     )
 
     assert_series_equal(
-        simulation_result.outputs_lower_bound(),
+        simulation_result.lower_bound,
         pd.Series(
             [2.110762, 2.110762, 2.110762, 0.0164306, -1.512048]
             + [-1.730561, -2.177757, -2.453179, -2.453179],
@@ -341,7 +341,7 @@ def test_univariate_uplift_subsample_simulation(
     )
 
     assert_series_equal(
-        simulation_result.outputs_mean(),
+        simulation_result.mean,
         pd.Series(
             [2.696227, 2.696227, 2.696227, 0.652706, -0.878943]
             + [-1.081172, -1.494944, -1.750046, -1.750046],
@@ -351,7 +351,7 @@ def test_univariate_uplift_subsample_simulation(
     )
 
     assert_series_equal(
-        simulation_result.outputs_upper_bound(),
+        simulation_result.upper_bound,
         pd.Series(
             [3.281693, 3.281693, 3.281693, 1.288981, -0.245838]
             + [-0.431783, -0.81213, -1.046914, -1.046914],
@@ -390,13 +390,13 @@ def test_univariate_probability_simulation(
     )
 
     index = pd.Index(
-        data=[5, 5.5, 6, 6.5, 7, 7.5, 8], name=UnivariateUpliftSimulator.IDX_PARTITION
+        data=[5, 5.5, 6, 6.5, 7, 7.5, 8], name=UnivariateSimulationResult.IDX_PARTITION
     )
 
     assert simulation_result.baseline == approx(0.5)
 
     assert_series_equal(
-        simulation_result.outputs_lower_bound(),
+        simulation_result.lower_bound,
         pd.Series(
             [0.415337, 0.390766, 0.401039, 0.420727, 0.425914, 0.452885, 0.452885],
             name=UnivariateSimulationResult.COL_LOWER_BOUND,
@@ -405,7 +405,7 @@ def test_univariate_probability_simulation(
     )
 
     assert_series_equal(
-        simulation_result.outputs_mean(),
+        simulation_result.mean,
         pd.Series(
             [0.495814, 0.475288, 0.48689, 0.507294, 0.510055, 0.533888, 0.533888],
             name=UnivariateSimulationResult.COL_MEAN,
@@ -414,7 +414,7 @@ def test_univariate_probability_simulation(
     )
 
     assert_series_equal(
-        simulation_result.outputs_upper_bound(),
+        simulation_result.upper_bound,
         pd.Series(
             [0.576292, 0.559809, 0.57274, 0.593862, 0.594196, 0.614892, 0.614892],
             name=UnivariateSimulationResult.COL_UPPER_BOUND,

From 7e224e5faec9062a8fb8d69a675f49abc334f17d Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 19 Oct 2021 09:43:16 +0200
Subject: [PATCH 035/106] API: simulate partitions in parallel

---
 src/facet/simulation/_simulation.py | 68 ++++++++++++++++++-----------
 1 file changed, 42 insertions(+), 26 deletions(-)

diff --git a/src/facet/simulation/_simulation.py b/src/facet/simulation/_simulation.py
index cecd908f..3778c095 100644
--- a/src/facet/simulation/_simulation.py
+++ b/src/facet/simulation/_simulation.py
@@ -21,7 +21,7 @@
 from scipy import stats
 
 from pytools.api import AllTracker, inheritdoc
-from pytools.parallelization import ParallelizableMixin
+from pytools.parallelization import Job, JobRunner, ParallelizableMixin
 from sklearndf import ClassifierDF, LearnerDF, RegressorDF
 
 from ..data import Sample
@@ -339,11 +339,27 @@ def _expected_learner_type() -> Type[T_LearnerDF]:
 
     @staticmethod
     @abstractmethod
-    def _simulate(model: T_LearnerDF, x: pd.DataFrame) -> Tuple[float, float]:
+    def _simulate(
+        model: T_LearnerDF, x: pd.DataFrame, name: str, value: Any
+    ) -> Tuple[float, float]:
         pass
 
     @staticmethod
-    def _aggregate(predictions: pd.Series) -> Tuple[float, float]:
+    def _set_constant_feature_value(
+        x: pd.DataFrame, feature_name: str, value: Any
+    ) -> pd.DataFrame:
+        return x.assign(
+            **{
+                feature_name: np.full(
+                    shape=len(x),
+                    fill_value=value,
+                    dtype=x.loc[:, feature_name].dtype,
+                )
+            }
+        )
+
+    @staticmethod
+    def _aggregate_simulation_results(predictions: pd.Series) -> Tuple[float, float]:
         # generate summary stats for a series of predictions
         return predictions.mean(), predictions.sem()
 
@@ -355,8 +371,8 @@ def _simulate_feature_with_values(
         """
         Run a simulation on a feature.
 
-        For each combination of crossfit and feature value, compute the simulation
-        result when substituting a given fixed value for the feature being simulated.
+        For each simulation value, compute the mean and sem of predictions when
+        substituting the value for the feature being simulated.
 
         :param feature_name: name of the feature to use in the simulation
         :param simulation_values: values to use in the simulation
@@ -370,21 +386,11 @@ def _simulate_feature_with_values(
         # for a list of values to be simulated, calculate a sequence of mean predictions
         # and a sequence of standard errors of those means
         features = self.sample.features
-        feature_dtype = features.loc[:, feature_name].dtype
-
-        outputs_mean_sem: Iterable[Tuple[float, float]] = (
-            self._simulate(
-                self.model,
-                features.assign(
-                    **{
-                        feature_name: np.full(
-                            shape=len(features),
-                            fill_value=value,
-                            dtype=feature_dtype,
-                        )
-                    }
-                ),
-            )
+
+        outputs_mean_sem: Iterable[Tuple[float, float]] = JobRunner.from_parallelizable(
+            self
+        ).run_jobs(
+            Job.delayed(self._simulate)(self.model, features, feature_name, value)
             for value in simulation_values
         )
 
@@ -455,11 +461,17 @@ def _expected_learner_type() -> Type[ClassifierDF]:
         return ClassifierDF
 
     @staticmethod
-    def _simulate(model: ClassifierDF, x: pd.DataFrame) -> Tuple[float, float]:
-        probabilities: pd.DataFrame = model.predict_proba(x)
+    def _simulate(
+        model: ClassifierDF, x: pd.DataFrame, name: str, value: Any
+    ) -> Tuple[float, float]:
+        probabilities: pd.DataFrame = model.predict_proba(
+            BaseUnivariateSimulator._set_constant_feature_value(x, name, value)
+        )
         if probabilities.shape[1] != 2:
             raise TypeError("only binary classifiers are supported")
-        return BaseUnivariateSimulator._aggregate(probabilities.iloc[:, 1])
+        return BaseUnivariateSimulator._aggregate_simulation_results(
+            probabilities.iloc[:, 1]
+        )
 
 
 class _UnivariateRegressionSimulator(
@@ -478,10 +490,14 @@ def _expected_learner_type() -> Type[RegressorDF]:
         return RegressorDF
 
     @staticmethod
-    def _simulate(model: RegressorDF, x: pd.DataFrame) -> Tuple[float, float]:
-        predictions = model.predict(X=x)
+    def _simulate(
+        model: RegressorDF, x: pd.DataFrame, name: str, value: Any
+    ) -> Tuple[float, float]:
+        predictions = model.predict(
+            X=BaseUnivariateSimulator._set_constant_feature_value(x, name, value)
+        )
         assert predictions.ndim == 1, "single-target regressor required"
-        return BaseUnivariateSimulator._aggregate(predictions)
+        return BaseUnivariateSimulator._aggregate_simulation_results(predictions)
 
 
 @inheritdoc(match="[see superclass]")

From 4aa64b9c63519648314391599524b18cc3b666a5 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 19 Oct 2021 10:07:12 +0200
Subject: [PATCH 036/106] DOC: update simulation docstrings

---
 src/facet/simulation/_simulation.py | 39 +++++++++++++++--------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/src/facet/simulation/_simulation.py b/src/facet/simulation/_simulation.py
index 3778c095..15137d3a 100644
--- a/src/facet/simulation/_simulation.py
+++ b/src/facet/simulation/_simulation.py
@@ -215,8 +215,7 @@ class BaseUnivariateSimulator(
     #: The learner pipeline used to conduct simulations
     model: T_LearnerDF
 
-    #: The sample used in baseline calculations and simulations; this is the full sample
-    #: from the :attr:`.crossfit`, or a subsample thereof
+    #: The sample to be used in baseline calculations and simulations
     sample: Sample
 
     #: The width of the confidence interval used to calculate the lower/upper bound
@@ -249,7 +248,7 @@ def __init__(
 
         if not isinstance(model, self._expected_learner_type()):
             raise TypeError(
-                "arg crossfit must fit a pipeline of type "
+                "arg model must be a learner of type "
                 f"{self._expected_learner_type().__name__}."
             )
 
@@ -411,11 +410,11 @@ class UnivariateProbabilitySimulator(BaseUnivariateSimulator[ClassifierDF]):
     observations is modified by assigning value `v[j]` for feature `x[i]` for all
     observations, i.e., assuming that feature `x[i]` has the constant value `v[j]`.
 
-    Then all classifiers of a :class:`.LearnerCrossfit` are used in turn to each predict
-    the positive class probabilities for all observations, and the mean probability
-    across all observations is calculated for each classifier and value `v[j]`.
-    The simulation result is a set of `n` distributions of mean predicted probabilities
-    across all classifiers -- one distribution for each `v[j]`.
+    Then the classifier is used to predict the positive class probabilities for all
+    observations, and the mean probability across all observations is calculated
+    for each classifier and value `v[j]`,
+    along with the standard error of the mean as a basis of obtaining confidence
+    intervals.
 
     Note that sample weights are not taken into account for simulations; each
     observation has the same weight in the simulation even if different weights
@@ -513,11 +512,11 @@ class UnivariateTargetSimulator(_UnivariateRegressionSimulator):
     observations is modified by assigning value `v[j]` for feature `x[i]` for all
     observations, i.e., assuming that feature `x[i]` has the constant value `v[j]`.
 
-    Then all regressors of a :class:`.LearnerCrossfit` are used in turn to each predict
-    the output for all observations, and the mean of the predicted outputs is calculated
-    for each regressor and value `v[j]`. The simulation result is a set of `n`
-    distributions of mean predicted targets across regressors -- one distribution for
-    each `v[j]`.
+    Then the regressor is used to predict the output for all
+    observations, and the mean output across all observations is calculated
+    for each regressor and value `v[j]`,
+    along with the standard error of the mean as a basis of obtaining confidence
+    intervals.
 
     Note that sample weights are not taken into account for simulations; each
     observation has the same weight in the simulation even if different weights
@@ -543,12 +542,14 @@ class UnivariateUpliftSimulator(_UnivariateRegressionSimulator):
     observations is modified by assigning value `v[j]` for feature `x[i]` for all
     observations, i.e., assuming that feature `x[i]` has the constant value `v[j]`.
 
-    Then all regressors of a :class:`.LearnerCrossfit` are used in turn to each predict
-    the output for all observations, and the mean of the predicted outputs is calculated
-    for each regressor and value `v[j]`. The simulation result is a set of `n`
-    distributions of mean predicted target uplifts across regressors, i.e. the mean
-    predicted difference of the historical expectation value of the target --
-    one distribution for each `v[j]`.
+    Then the regressor is used to predict the output for all
+    observations, and the mean output across all observations is calculated
+    for each regressor and value `v[j]`,
+    along with the standard error of the mean as a basis of obtaining confidence
+    intervals.
+    The simulation result is determined as the mean *uplift*, i.e., the mean
+    predicted difference of the historical expectation value of the target,
+    for each `v[j]`.
 
     Note that sample weights are not taken into account for simulations; each
     observation has the same weight in the simulation even if different weights

From ae0e1f2dd70a31760fcd088f0a7201dea230f2ec Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 19 Oct 2021 10:43:59 +0200
Subject: [PATCH 037/106] FIX: remove references to removed attribute
 LearnerInspector.crossfit

---
 src/facet/inspection/_inspection.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/facet/inspection/_inspection.py b/src/facet/inspection/_inspection.py
index 02d956f4..9145e9c9 100644
--- a/src/facet/inspection/_inspection.py
+++ b/src/facet/inspection/_inspection.py
@@ -336,7 +336,7 @@ def features_(self) -> List[str]:
         The names of the features used to fit the learner pipeline explained by this
         inspector.
         """
-        return self.crossfit_.pipeline.feature_names_out_.to_list()
+        return self.pipeline.feature_names_out_.to_list()
 
     def shap_values(self) -> Union[pd.DataFrame, List[pd.DataFrame]]:
         """
@@ -828,9 +828,7 @@ def __arrays_to_matrix(
         # transform a matrix of shape (n_outputs, n_features, n_features)
         # to a data frame
 
-        feature_index = self.crossfit_.pipeline.feature_names_out_.rename(
-            Sample.IDX_FEATURE
-        )
+        feature_index = self.pipeline.feature_names_out_.rename(Sample.IDX_FEATURE)
 
         n_features = len(feature_index)
         assert matrix.shape == (len(self.output_names_), n_features, n_features)

From 202a9a8dd6573ee85237b35a3a7259f5a8cf6019 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 19 Oct 2021 10:45:37 +0200
Subject: [PATCH 038/106] =?UTF-8?q?FIX:=20remove=20obsolete=20arg=20`aggre?=
 =?UTF-8?q?gation`=20from=20feature=5F=E2=80=A6=5Fmatrix=20methods?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/facet/inspection/_inspection.py | 28 ----------------------------
 1 file changed, 28 deletions(-)

diff --git a/src/facet/inspection/_inspection.py b/src/facet/inspection/_inspection.py
index 9145e9c9..e9ce00a2 100644
--- a/src/facet/inspection/_inspection.py
+++ b/src/facet/inspection/_inspection.py
@@ -138,14 +138,6 @@ class LearnerInspector(
     specified in the underlying training sample.
     """
 
-    #: constant for "mean" aggregation method, to be passed as arg ``aggregation``
-    #: to :class:`.LearnerInspector` methods that implement it
-    AGG_MEAN = "mean"
-
-    #: constant for "std" aggregation method, to be passed as arg ``aggregation``
-    #: to :class:`.LearnerInspector` methods that implement it
-    AGG_STD = "std"
-
     #: Name for feature importance series or column.
     COL_IMPORTANCE = "importance"
 
@@ -426,7 +418,6 @@ def feature_synergy_matrix(
         *,
         absolute: bool = False,
         symmetrical: bool = False,
-        aggregation: Optional[str] = AGG_MEAN,
         clustered: bool = True,
     ) -> Union[Matrix, List[Matrix]]:
         """
@@ -454,8 +445,6 @@ def feature_synergy_matrix(
             mutual synergy; if ``False``, return an asymmetrical matrix quantifying
             unilateral synergy of the features represented by rows with the
             features represented by columns (default: ``False``)
-        :param aggregation: if ``mean``, return mean values across all models in the
-            crossfit; additional aggregation methods will be added in future releases
         :param clustered: if ``True``, reorder the rows and columns of the matrix
             such that synergy between adjacent rows and columns is maximised; if
             ``False``, keep rows and columns in the original features order
@@ -465,8 +454,6 @@ def feature_synergy_matrix(
         """
         self._ensure_fitted()
 
-        self.__validate_aggregation_method(aggregation)
-
         explainer = self.__interaction_explainer
         return self.__feature_affinity_matrix(
             affinity_matrices=(
@@ -486,7 +473,6 @@ def feature_redundancy_matrix(
         *,
         absolute: bool = False,
         symmetrical: bool = False,
-        aggregation: Optional[str] = AGG_MEAN,
         clustered: bool = True,
     ) -> Union[Matrix, List[Matrix]]:
         """
@@ -514,8 +500,6 @@ def feature_redundancy_matrix(
             mutual redundancy; if ``False``, return an asymmetrical matrix quantifying
             unilateral redundancy of the features represented by rows with the
             features represented by columns (default: ``False``)
-        :param aggregation: if ``mean``, return mean values across all models in the
-            crossfit; additional aggregation methods will be added in future releases
         :param clustered: if ``True``, reorder the rows and columns of the matrix
             such that redundancy between adjacent rows and columns is maximised; if
             ``False``, keep rows and columns in the original features order
@@ -525,8 +509,6 @@ def feature_redundancy_matrix(
         """
         self._ensure_fitted()
 
-        self.__validate_aggregation_method(aggregation)
-
         explainer = self.__interaction_explainer
         return self.__feature_affinity_matrix(
             affinity_matrices=(
@@ -546,7 +528,6 @@ def feature_association_matrix(
         *,
         absolute: bool = False,
         symmetrical: bool = False,
-        aggregation: Optional[str] = AGG_MEAN,
         clustered: bool = True,
     ) -> Union[Matrix, List[Matrix]]:
         """
@@ -576,8 +557,6 @@ def feature_association_matrix(
             with the features represented by columns;
             if ``True``, return a symmetrical matrix quantifying mutual association
             (default: ``False``)
-        :param aggregation: if ``mean``, return mean values across all models in the
-            crossfit; additional aggregation methods will be added in future releases
         :param clustered: if ``True``, reorder the rows and columns of the matrix
             such that association between adjacent rows and columns is maximised; if
             ``False``, keep rows and columns in the original features order
@@ -587,8 +566,6 @@ def feature_association_matrix(
         """
         self._ensure_fitted()
 
-        self.__validate_aggregation_method(aggregation)
-
         global_explainer = self._shap_global_explainer
         return self.__feature_affinity_matrix(
             affinity_matrices=(
@@ -1066,11 +1043,6 @@ def __frame_to_matrix(
             name_labels=("primary feature", "associated feature"),
         )
 
-    @staticmethod
-    def __validate_aggregation_method(aggregation: str) -> None:
-        if aggregation != LearnerInspector.AGG_MEAN:
-            raise ValueError(f"unknown aggregation method: aggregation={aggregation}")
-
     @property
     def __shap_interaction_values_calculator(self) -> ShapInteractionValuesCalculator:
         self._ensure_shap_interaction()

From 25f7cd85191c0178ce623f7aea3840ad492bb3f3 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 19 Oct 2021 10:46:16 +0200
Subject: [PATCH 039/106] DOC: remove references to crossfit from
 LearnerInspector documentation

---
 src/facet/inspection/_inspection.py | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/src/facet/inspection/_inspection.py b/src/facet/inspection/_inspection.py
index e9ce00a2..63a4c027 100644
--- a/src/facet/inspection/_inspection.py
+++ b/src/facet/inspection/_inspection.py
@@ -109,28 +109,27 @@ class LearnerInspector(
     """
     Explain regressors and classifiers based on SHAP values.
 
-    Focus is on explaining the overall model as well as individual observations.
-    Given that SHAP values are estimations, this inspector operates based on crossfits
-    to enable estimations of the uncertainty of SHAP values.
+    Focus is on explaining the overall model, but the inspector also delivers
+    SHAP explanations of the individual observations.
 
     Available inspection methods are:
 
-    - SHAP values (mean or standard deviation across crossfits)
-    - SHAP interaction values (mean or standard deviation across crossfits)
+    - SHAP values
+    - SHAP interaction values
     - feature importance derived from SHAP values (either as mean absolute values
       or as the root of mean squares)
-    - pairwise feature interaction matrix (direct feature interaction quantified by
-      SHAP interaction values)
     - pairwise feature redundancy matrix (requires availability of SHAP interaction
       values)
     - pairwise feature synergy matrix (requires availability of SHAP interaction
       values)
     - pairwise feature association matrix (upper bound for redundancy but can be
       inflated by synergy; available if SHAP interaction values are unknown)
+    - pairwise feature interaction matrix (direct feature interaction quantified by
+      SHAP interaction values)
     - feature redundancy linkage (to visualize clusters of redundant features in a
-      dendrogram)
+      dendrogram; requires availability of SHAP interaction values)
     - feature synergy linkage (to visualize clusters of synergistic features in a
-      dendrogram)
+      dendrogram; requires availability of SHAP interaction values)
     - feature association linkage (to visualize clusters of associated features in a
       dendrogram)
 
@@ -216,7 +215,7 @@ def __init__(
 
     def fit(self: T_Self, sample: Sample, **fit_params: Any) -> T_Self:
         """
-        Fit the inspector with the given crossfit.
+        Fit the inspector with the given sample.
 
         This will calculate SHAP values and, if enabled in the underlying SHAP
         explainer, also SHAP interaction values.
@@ -235,9 +234,8 @@ def fit(self: T_Self, sample: Sample, **fit_params: Any) -> T_Self:
         _is_classifier = is_classifier(learner)
         if _is_classifier and isinstance(sample.target_name, list):
             raise ValueError(
-                "only single-output classifiers (binary or multi-class) are "
-                "supported, but the classifier in the given crossfit has been "
-                "fitted on multiple columns "
+                "only single-output classifiers (binary or multi-class) are supported, "
+                "but the given classifier has been fitted on multiple columns "
                 f"{sample.target_name}"
             )
 

From 8abb7aef0a88b3d880107936cc4334954dc07a2c Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 19 Oct 2021 11:25:36 +0200
Subject: [PATCH 040/106] TEST: remove LearnerCrossfit from learner inspector
 unit tests

---
 test/test/conftest.py                      | 86 ++++++++++-----------
 test/test/facet/test_crossfit.py           |  9 ++-
 test/test/facet/test_inspection.py         | 90 ++++++++++------------
 test/test/facet/test_shap_decomposition.py | 19 ++---
 4 files changed, 95 insertions(+), 109 deletions(-)

diff --git a/test/test/conftest.py b/test/test/conftest.py
index caa9b53f..c1571b52 100644
--- a/test/test/conftest.py
+++ b/test/test/conftest.py
@@ -1,6 +1,4 @@
-import functools
 import logging
-import operator
 from typing import Any, List, Mapping, Optional, Sequence, Set
 
 import numpy as np
@@ -160,12 +158,9 @@ def regressor_ranker(
 
 
 @pytest.fixture
-def best_lgbm_crossfit(
+def best_lgbm_model(
     regressor_ranker: LearnerRanker[RegressorPipelineDF],
-    cv_kfold: KFold,
-    sample: Sample,
-    n_jobs: int,
-) -> LearnerCrossfit[RegressorPipelineDF]:
+) -> RegressorPipelineDF:
     # we get the best model_evaluation which is a LGBM - for the sake of test
     # performance
     best_lgbm_evaluation: LearnerEvaluation[RegressorPipelineDF] = [
@@ -174,38 +169,28 @@ def best_lgbm_crossfit(
         if isinstance(evaluation.pipeline.regressor, LGBMRegressorDF)
     ][0]
 
-    best_lgbm_regressor: RegressorPipelineDF = best_lgbm_evaluation.pipeline
-
-    return LearnerCrossfit(
-        pipeline=best_lgbm_regressor,
-        cv=cv_kfold,
-        random_state=42,
-        n_jobs=n_jobs,
-    ).fit(sample=sample)
+    return best_lgbm_evaluation.pipeline
 
 
 @pytest.fixture
-def feature_names(best_lgbm_crossfit: LearnerCrossfit[RegressorPipelineDF]) -> Set[str]:
+def preprocessed_feature_names(best_lgbm_model: RegressorPipelineDF) -> Set[str]:
     """
-    all unique features across the models in the crossfit, after preprocessing
+    Names of all features after preprocessing
     """
-    return functools.reduce(
-        operator.or_,
-        (set(model.feature_names_out_) for model in best_lgbm_crossfit.models()),
-    )
+    return set(best_lgbm_model.feature_names_out_)
 
 
 @pytest.fixture
 def regressor_inspector(
-    best_lgbm_crossfit: LearnerCrossfit[RegressorPipelineDF], n_jobs: int
+    best_lgbm_model: RegressorPipelineDF, sample: Sample, n_jobs: int
 ) -> LearnerInspector:
     inspector = LearnerInspector(
-        pipeline=best_lgbm_crossfit.pipeline,
+        pipeline=best_lgbm_model,
         explainer_factory=TreeExplainerFactory(
             feature_perturbation="tree_path_dependent", uses_background_dataset=True
         ),
         n_jobs=n_jobs,
-    ).fit(sample=best_lgbm_crossfit.sample_)
+    ).fit(sample=sample)
 
     return inspector
 
@@ -272,7 +257,7 @@ def iris_df(iris_target_name: str) -> pd.DataFrame:
 
 
 @pytest.fixture
-def iris_sample(iris_df: pd.DataFrame, iris_target_name: str) -> Sample:
+def iris_sample_multi_class(iris_df: pd.DataFrame, iris_target_name: str) -> Sample:
     # the iris dataset
     return Sample(
         observations=iris_df.assign(weight=2.0),
@@ -282,10 +267,10 @@ def iris_sample(iris_df: pd.DataFrame, iris_target_name: str) -> Sample:
 
 
 @pytest.fixture
-def iris_sample_binary(iris_sample: Sample) -> Sample:
+def iris_sample_binary(iris_sample_multi_class) -> Sample:
     # the iris dataset, retaining only two categories so we can do binary classification
-    return iris_sample.subsample(
-        loc=iris_sample.target.isin(["virginica", "versicolor"])
+    return iris_sample_multi_class.subsample(
+        loc=iris_sample_multi_class.target.isin(["virginica", "versicolor"])
     )
 
 
@@ -355,17 +340,19 @@ def iris_classifier_ranker_binary(
     cv_stratified_bootstrap: StratifiedBootstrapCV,
     n_jobs: int,
 ) -> LearnerRanker[ClassifierPipelineDF[RandomForestClassifierDF]]:
-    return fit_learner_ranker(
+    return fit_classifier_ranker(
         sample=iris_sample_binary, cv=cv_stratified_bootstrap, n_jobs=n_jobs
     )
 
 
 @pytest.fixture
 def iris_classifier_ranker_multi_class(
-    iris_sample: Sample, cv_stratified_bootstrap: StratifiedBootstrapCV, n_jobs: int
+    iris_sample_multi_class: Sample,
+    cv_stratified_bootstrap: StratifiedBootstrapCV,
+    n_jobs: int,
 ) -> LearnerRanker[ClassifierPipelineDF[RandomForestClassifierDF]]:
-    return fit_learner_ranker(
-        sample=iris_sample, cv=cv_stratified_bootstrap, n_jobs=n_jobs
+    return fit_classifier_ranker(
+        sample=iris_sample_multi_class, cv=cv_stratified_bootstrap, n_jobs=n_jobs
     )
 
 
@@ -373,11 +360,18 @@ def iris_classifier_ranker_multi_class(
 def iris_classifier_ranker_dual_target(
     iris_sample_binary_dual_target: Sample, cv_bootstrap: BootstrapCV, n_jobs: int
 ) -> LearnerRanker[ClassifierPipelineDF[RandomForestClassifierDF]]:
-    return fit_learner_ranker(
+    return fit_classifier_ranker(
         sample=iris_sample_binary_dual_target, cv=cv_bootstrap, n_jobs=n_jobs
     )
 
 
+@pytest.fixture
+def iris_classifier_binary(
+    iris_classifier_ranker_binary: LearnerRanker[ClassifierPipelineDF],
+) -> ClassifierPipelineDF[RandomForestClassifierDF]:
+    return iris_classifier_ranker_binary.best_model_
+
+
 @pytest.fixture
 def iris_classifier_crossfit_binary(
     iris_classifier_ranker_binary: LearnerRanker[ClassifierPipelineDF],
@@ -386,24 +380,21 @@ def iris_classifier_crossfit_binary(
 
 
 @pytest.fixture
-def iris_classifier_crossfit_multi_class(
+def iris_classifier_multi_class(
     iris_classifier_ranker_multi_class: LearnerRanker[ClassifierPipelineDF],
-) -> LearnerCrossfit[ClassifierPipelineDF[RandomForestClassifierDF]]:
-    return iris_classifier_ranker_multi_class.best_model_crossfit_
+) -> ClassifierPipelineDF[RandomForestClassifierDF]:
+    return iris_classifier_ranker_multi_class.best_model_
 
 
 @pytest.fixture
 def iris_inspector_multi_class(
-    iris_classifier_crossfit_multi_class: LearnerCrossfit[
-        ClassifierPipelineDF[RandomForestClassifierDF]
-    ],
+    iris_classifier_multi_class: ClassifierPipelineDF[RandomForestClassifierDF],
+    iris_sample_multi_class: Sample,
     n_jobs: int,
 ) -> LearnerInspector[ClassifierPipelineDF[RandomForestClassifierDF]]:
     return LearnerInspector(
-        pipeline=iris_classifier_crossfit_multi_class.pipeline,
-        shap_interaction=True,
-        n_jobs=n_jobs,
-    ).fit(sample=iris_classifier_crossfit_multi_class.sample_)
+        pipeline=iris_classifier_multi_class, shap_interaction=True, n_jobs=n_jobs
+    ).fit(sample=iris_sample_multi_class)
 
 
 #
@@ -411,10 +402,10 @@ def iris_inspector_multi_class(
 #
 
 
-def fit_learner_ranker(
+def fit_classifier_ranker(
     sample: Sample, cv: BaseCrossValidator, n_jobs: int
 ) -> LearnerRanker[ClassifierPipelineDF[RandomForestClassifierDF]]:
-    # define parameters and crossfit
+    # define the parameter grid
     grids = [
         LearnerGrid(
             pipeline=ClassifierPipelineDF(
@@ -423,8 +414,9 @@ def fit_learner_ranker(
             learner_parameters={"n_estimators": [10, 50], "min_samples_leaf": [4, 8]},
         )
     ]
-    # pipeline inspector does only support binary classification - hence
-    # filter the test_sample down to only 2 target classes:
+
+    # pipeline inspector only supports binary classification,
+    # therefore filter the sample down to only 2 target classes
     return LearnerRanker(
         grids=grids,
         cv=cv,
diff --git a/test/test/facet/test_crossfit.py b/test/test/facet/test_crossfit.py
index b46d5f88..e5abf5ec 100644
--- a/test/test/facet/test_crossfit.py
+++ b/test/test/facet/test_crossfit.py
@@ -8,7 +8,6 @@
 from sklearndf.regression import RandomForestRegressorDF
 
 from ..conftest import check_ranking
-from facet.data import Sample
 from facet.selection import LearnerGrid, LearnerRanker
 from facet.validation import StratifiedBootstrapCV
 
@@ -16,7 +15,7 @@
 
 
 def test_prediction_classifier(
-    iris_sample: Sample, cv_stratified_bootstrap: StratifiedBootstrapCV, n_jobs: int
+    iris_sample_multi_class, cv_stratified_bootstrap: StratifiedBootstrapCV, n_jobs: int
 ) -> None:
 
     expected_learner_scores = [0.889, 0.886, 0.885, 0.879]
@@ -58,12 +57,14 @@ def test_prediction_classifier(
         random_state=42,
     )
 
-    model_ranker.fit(sample=iris_sample)
+    model_ranker.fit(sample=iris_sample_multi_class)
 
     with pytest.raises(
         ValueError, match="do not use arg sample_weight to pass sample weights"
     ):
-        model_ranker.fit(sample=iris_sample, sample_weight=iris_sample.weight)
+        model_ranker.fit(
+            sample=iris_sample_multi_class, sample_weight=iris_sample_multi_class.weight
+        )
 
     log.debug(f"\n{model_ranker.summary_report()}")
 
diff --git a/test/test/facet/test_inspection.py b/test/test/facet/test_inspection.py
index 1fe0d71a..7ccea522 100644
--- a/test/test/facet/test_inspection.py
+++ b/test/test/facet/test_inspection.py
@@ -3,7 +3,7 @@
 """
 import logging
 import warnings
-from typing import List, Optional, Sequence, Set, TypeVar, Union
+from typing import List, Optional, Sequence, TypeVar, Union
 
 import numpy as np
 import pandas as pd
@@ -22,7 +22,6 @@
 from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
 
 from ..conftest import check_ranking
-from facet.crossfit import LearnerCrossfit
 from facet.data import Sample
 from facet.inspection import (
     KernelExplainerFactory,
@@ -30,7 +29,6 @@
     TreeExplainerFactory,
 )
 from facet.selection import LearnerGrid, LearnerRanker
-from facet.validation import BootstrapCV
 
 # noinspection PyMissingOrEmptyDocstring
 
@@ -42,8 +40,8 @@
 def test_model_inspection(
     regressor_grids: Sequence[LearnerGrid[RegressorPipelineDF]],
     regressor_ranker: LearnerRanker[RegressorPipelineDF],
-    best_lgbm_crossfit: LearnerCrossfit[RegressorPipelineDF],
-    feature_names: Set[str],
+    best_lgbm_model: RegressorPipelineDF,
+    preprocessed_feature_names,
     regressor_inspector: LearnerInspector,
     cv_kfold: KFold,
     sample: Sample,
@@ -74,7 +72,7 @@ def test_model_inspection(
     assert shap_values.columns.names == [Sample.IDX_FEATURE]
 
     # column index
-    assert set(shap_values.columns) == feature_names
+    assert set(shap_values.columns) == preprocessed_feature_names
 
     # check that the SHAP values add up to the predictions
     shap_totals = shap_values.sum(axis=1)
@@ -83,18 +81,16 @@ def test_model_inspection(
     # for every observation. This is always the same constant value,
     # therefore the mean absolute deviation is zero
 
-    shap_minus_pred = shap_totals - best_lgbm_crossfit.pipeline.predict(
-        X=sample.features
-    )
+    shap_minus_pred = shap_totals - best_lgbm_model.predict(X=sample.features)
     assert round(shap_minus_pred.mad(), 12) == 0.0, "predictions matching total SHAP"
 
     #  test the ModelInspector with a KernelExplainer:
 
     inspector_2 = LearnerInspector(
-        pipeline=best_lgbm_crossfit.pipeline,
+        pipeline=best_lgbm_model,
         explainer_factory=KernelExplainerFactory(link="identity", data_size_limit=20),
         n_jobs=n_jobs,
-    ).fit(sample=best_lgbm_crossfit.sample_)
+    ).fit(sample=sample)
     inspector_2.shap_values()
 
     linkage_tree = inspector_2.feature_association_linkage()
@@ -121,21 +117,23 @@ def test_binary_classifier_ranking(iris_classifier_ranker_binary) -> None:
 
 # noinspection DuplicatedCode
 def test_model_inspection_classifier_binary(
-    iris_sample_binary: Sample, iris_classifier_crossfit_binary, n_jobs: int
+    iris_classifier_binary: ClassifierPipelineDF,
+    iris_sample_binary: Sample,
+    n_jobs: int,
 ) -> None:
 
     model_inspector = LearnerInspector(
-        pipeline=iris_classifier_crossfit_binary.pipeline,
+        pipeline=iris_classifier_binary,
         shap_interaction=False,
         n_jobs=n_jobs,
-    ).fit(sample=iris_classifier_crossfit_binary.sample_)
+    ).fit(sample=iris_sample_binary)
 
     # calculate the shap value matrix, without any consolidation
     shap_values = model_inspector.shap_values()
 
     # do the shap values add up to predictions minus a constant value?
     _validate_shap_values_against_predictions(
-        shap_values=shap_values, crossfit=iris_classifier_crossfit_binary
+        shap_values=shap_values, model=iris_classifier_binary, sample=iris_sample_binary
     )
 
     # the length of rows in shap_values should be equal to the unique observation
@@ -185,34 +183,29 @@ def test_model_inspection_classifier_binary_single_shap_output() -> None:
     # create sample object
     sample_df = Sample(observations=sim_df, target_name="target")
 
-    # fit the crossfit
-    crossfit = LearnerCrossfit(
-        pipeline=ClassifierPipelineDF(
-            classifier=GradientBoostingClassifierDF(random_state=42)
-        ),
-        cv=BootstrapCV(n_splits=5, random_state=42),
-        random_state=42,
-        n_jobs=-3,
-    ).fit(sample_df)
+    # fit the model
+    pipeline = ClassifierPipelineDF(
+        classifier=GradientBoostingClassifierDF(random_state=42)
+    ).fit(sample_df.features, sample_df.target)
 
     # fit the inspector
-    LearnerInspector(pipeline=crossfit.pipeline, n_jobs=-3).fit(sample=crossfit.sample_)
+    LearnerInspector(pipeline=pipeline, n_jobs=-3).fit(sample=sample_df)
 
 
 # noinspection DuplicatedCode
 def test_model_inspection_classifier_multi_class(
-    iris_sample: Sample,
-    iris_classifier_crossfit_multi_class: LearnerCrossfit[ClassifierPipelineDF],
     iris_inspector_multi_class: LearnerInspector[ClassifierPipelineDF],
     n_jobs: int,
 ) -> None:
+    iris_classifier = iris_inspector_multi_class.pipeline
+    iris_sample = iris_inspector_multi_class.sample_
 
     # calculate the shap value matrix, without any consolidation
     shap_values = iris_inspector_multi_class.shap_values()
 
     # do the shap values add up to predictions minus a constant value?
     _validate_shap_values_against_predictions(
-        shap_values=shap_values, crossfit=iris_classifier_crossfit_multi_class
+        shap_values=shap_values, model=iris_classifier, sample=iris_sample
     )
 
     # Feature importance
@@ -316,14 +309,12 @@ def test_model_inspection_classifier_multi_class(
 
 
 def _validate_shap_values_against_predictions(
-    shap_values: pd.DataFrame, crossfit: LearnerCrossfit[ClassifierPipelineDF]
+    shap_values: pd.DataFrame, model: ClassifierPipelineDF, sample: Sample
 ):
 
     # calculate the matching predictions, so we can check if the SHAP values add up
     # correctly
-    predicted_probabilities: pd.DataFrame = crossfit.pipeline.predict_proba(
-        crossfit.sample_.features
-    )
+    predicted_probabilities: pd.DataFrame = model.predict_proba(sample.features)
 
     assert isinstance(
         predicted_probabilities, pd.DataFrame
@@ -378,30 +369,28 @@ def _check_probabilities(
 
 # noinspection DuplicatedCode
 def test_model_inspection_classifier_interaction(
+    iris_classifier_binary: ClassifierPipelineDF[RandomForestClassifierDF],
     iris_sample_binary: Sample,
-    iris_classifier_crossfit_binary: LearnerCrossfit[
-        ClassifierPipelineDF[RandomForestClassifierDF]
-    ],
     n_jobs: int,
 ) -> None:
     warnings.filterwarnings("ignore", message="You are accessing a training score")
 
     model_inspector = LearnerInspector(
-        pipeline=iris_classifier_crossfit_binary.pipeline,
+        pipeline=iris_classifier_binary,
         explainer_factory=TreeExplainerFactory(
             feature_perturbation="tree_path_dependent", uses_background_dataset=True
         ),
         n_jobs=n_jobs,
-    ).fit(sample=iris_classifier_crossfit_binary.sample_)
+    ).fit(sample=iris_sample_binary)
 
     model_inspector_no_interaction = LearnerInspector(
-        pipeline=iris_classifier_crossfit_binary.pipeline,
+        pipeline=iris_classifier_binary,
         shap_interaction=False,
         explainer_factory=TreeExplainerFactory(
             feature_perturbation="tree_path_dependent", uses_background_dataset=True
         ),
         n_jobs=n_jobs,
-    ).fit(sample=iris_classifier_crossfit_binary.sample_)
+    ).fit(sample=iris_sample_binary)
 
     # calculate shap interaction values
     shap_interaction_values = model_inspector.shap_interaction_values()
@@ -434,7 +423,8 @@ def test_model_inspection_classifier_interaction(
     # do the shap values add up to predictions minus a constant value?
     _validate_shap_values_against_predictions(
         shap_values=model_inspector.shap_interaction_values().groupby(level=0).sum(),
-        crossfit=iris_classifier_crossfit_binary,
+        model=iris_classifier_binary,
+        sample=iris_sample_binary,
     )
 
     assert model_inspector.feature_importance().values == pytest.approx(
@@ -630,9 +620,7 @@ def test_model_inspection_classifier_interaction_dual_target(
     iris_target_name,
     n_jobs: int,
 ) -> None:
-    iris_classifier_crossfit_dual_target = (
-        iris_classifier_ranker_dual_target.best_model_crossfit_
-    )
+    iris_classifier_dual_target = iris_classifier_ranker_dual_target.best_model_
 
     with pytest.raises(
         ValueError,
@@ -641,13 +629,13 @@ def test_model_inspection_classifier_interaction_dual_target(
             f"{iris_target_name}.*{iris_target_name}2"
         ),
     ):
-        LearnerInspector(
-            pipeline=iris_classifier_crossfit_dual_target.pipeline, n_jobs=n_jobs
-        ).fit(sample=iris_classifier_crossfit_dual_target.sample_)
+        LearnerInspector(pipeline=iris_classifier_dual_target, n_jobs=n_jobs).fit(
+            sample=iris_sample_binary_dual_target
+        )
 
 
 def test_shap_plot_data(
-    iris_sample,
+    iris_sample_multi_class,
     iris_inspector_multi_class: LearnerInspector[ClassifierPipelineDF],
 ) -> None:
     shap_plot_data = iris_inspector_multi_class.shap_plot_data()
@@ -666,8 +654,12 @@ def test_shap_plot_data(
     assert all(shap.shape == features_shape for shap in shap_values)
 
     shap_index = shap_plot_data.features.index
-    assert_frame_equal(shap_plot_data.features, iris_sample.features.loc[shap_index])
-    assert_series_equal(shap_plot_data.target, iris_sample.target.loc[shap_index])
+    assert_frame_equal(
+        shap_plot_data.features, iris_sample_multi_class.features.loc[shap_index]
+    )
+    assert_series_equal(
+        shap_plot_data.target, iris_sample_multi_class.target.loc[shap_index]
+    )
 
 
 #
diff --git a/test/test/facet/test_shap_decomposition.py b/test/test/facet/test_shap_decomposition.py
index 5ec3fe89..cc5e826a 100644
--- a/test/test/facet/test_shap_decomposition.py
+++ b/test/test/facet/test_shap_decomposition.py
@@ -6,20 +6,15 @@
 
 import numpy as np
 
-from sklearndf.pipeline import RegressorPipelineDF
-
-from facet.crossfit import LearnerCrossfit
 from facet.inspection import LearnerInspector
 
 log = logging.getLogger(__name__)
 
 
-def test_shap_decomposition_matrices(
-    best_lgbm_crossfit: LearnerCrossfit[RegressorPipelineDF],
-    feature_names: Set[str],
-    regressor_inspector: LearnerInspector,
+def test_feature_affinity_matrices(
+    preprocessed_feature_names: Set[str], regressor_inspector: LearnerInspector
 ) -> None:
-    # Shap decomposition matrices (feature dependencies)
+    # feature affinity matrices (feature dependencies)
     # check that dimensions of pairwise feature matrices are equal to # of features,
     # and value ranges:
     for matrix, matrix_name in zip(
@@ -31,9 +26,15 @@ def test_shap_decomposition_matrices(
         ("association", "synergy", "redundancy"),
     ):
         matrix_full_name = f"feature {matrix_name} matrix"
-        n_features = len(feature_names)
+        n_features = len(preprocessed_feature_names)
         assert matrix.values.shape[0] == n_features, f"rows in {matrix_full_name}"
         assert matrix.values.shape[1] == n_features, f"columns in {matrix_full_name}"
+        assert (
+            set(matrix.names[0]) == preprocessed_feature_names
+        ), f"row names in {matrix_full_name}"
+        assert (
+            set(matrix.names[1]) == preprocessed_feature_names
+        ), f"column names in {matrix_full_name}"
 
         # check values
         assert (

From 96898b9d16e6db4344784c799518d836cbf3ccae Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 19 Oct 2021 11:26:52 +0200
Subject: [PATCH 041/106] TEST: remove obsolete helper functions

---
 test/test/facet/test_shap_decomposition.py | 66 +---------------------
 1 file changed, 1 insertion(+), 65 deletions(-)

diff --git a/test/test/facet/test_shap_decomposition.py b/test/test/facet/test_shap_decomposition.py
index cc5e826a..b07e8d8d 100644
--- a/test/test/facet/test_shap_decomposition.py
+++ b/test/test/facet/test_shap_decomposition.py
@@ -2,7 +2,7 @@
 Test shap decomposition calculations
 """
 import logging
-from typing import Set, Union
+from typing import Set
 
 import numpy as np
 
@@ -40,67 +40,3 @@ def test_feature_affinity_matrices(
         assert (
             np.nanmin(matrix.values) >= 0.0 and np.nanmax(matrix.values) <= 1.0
         ), f"Values of [0.0, 1.0] in {matrix_full_name}"
-
-
-#
-# auxiliary functions
-#
-
-
-def cov(a: np.ndarray, b: np.ndarray) -> float:
-    """
-    covariance, assuming a population mean of 0
-    :param a: array of floats
-    :param b: array of floats
-    :return: covariance of a and b
-    """
-    return (a * b).mean()
-
-
-def var(a: np.ndarray) -> float:
-    """
-    variance, assuming a population mean of 0
-    :param a: array of floats
-    :return: variance of a
-    """
-    return cov(a, a)
-
-
-def std(a: np.ndarray) -> float:
-    """
-    standard deviation, assuming a population mean of 0
-    :param a: array of floats
-    :return: standard deviation of a
-    """
-    return np.sqrt(var(a))
-
-
-def corr(a: np.ndarray, b: np.ndarray) -> float:
-    """
-    pearson correlation, assuming a population mean of 0
-    :param a: array of floats
-    :param b: array of floats
-    :return: pearson correlation of a and b
-    """
-    return cov(a, b) / np.sqrt(var(a) * var(b))
-
-
-def print_list(*args, percentage: bool = False, **kwargs):
-    """
-    print all arguments, including their names
-    :param args: the arguments to print (as their names, print integers indicating \
-        the position)
-    :param percentage: if `true`, print all arguments as % values
-    :param kwargs: the named arguments to print
-    :return:
-    """
-
-    def _prt(_value, _name: Union[str, int]):
-        if percentage:
-            _value *= 100
-        print(f"{_name}: {_value:.4g}{'%' if percentage else ''}")
-
-    for name, arg in enumerate(args):
-        _prt(arg, _name=name)
-    for name, arg in kwargs.items():
-        _prt(arg, _name=name)

From f118803bafc5edf9c08777b2128e6788eba3bb48 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 19 Oct 2021 20:44:52 +0200
Subject: [PATCH 042/106] TEST: replace iris_classifier_model_binary by
 iris_classifier_binary

---
 test/test/facet/test_simulation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test/facet/test_simulation.py b/test/test/facet/test_simulation.py
index c32acd26..4f6e11fe 100644
--- a/test/test/facet/test_simulation.py
+++ b/test/test/facet/test_simulation.py
@@ -368,7 +368,7 @@ def test_univariate_uplift_subsample_simulation(
 
 
 def test_univariate_probability_simulation(
-    iris_classifier_model_binary: ClassifierPipelineDF[RandomForestClassifierDF],
+    iris_classifier_binary: ClassifierPipelineDF[RandomForestClassifierDF],
     iris_sample_binary: Sample,
     n_jobs: int,
 ) -> None:
@@ -378,7 +378,7 @@ def test_univariate_probability_simulation(
     print(iris_sample_binary.feature_names)
 
     proba_simulator = UnivariateProbabilitySimulator(
-        model=iris_classifier_model_binary,
+        model=iris_classifier_binary,
         sample=iris_sample_binary,
         confidence_level=0.95,
         n_jobs=n_jobs,

From b2c66357462da59f8222caab339df6dca03abeb1 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Fri, 22 Oct 2021 17:45:33 +0200
Subject: [PATCH 043/106] TEST: remove obsolete fixture
 iris_classifier_crossfit_binary

---
 test/test/conftest.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/test/test/conftest.py b/test/test/conftest.py
index c1571b52..db689e46 100644
--- a/test/test/conftest.py
+++ b/test/test/conftest.py
@@ -27,7 +27,6 @@
 )
 
 import facet
-from facet.crossfit import LearnerCrossfit
 from facet.data import Sample
 from facet.inspection import LearnerInspector, TreeExplainerFactory
 from facet.selection import LearnerEvaluation, LearnerGrid, LearnerRanker
@@ -372,13 +371,6 @@ def iris_classifier_binary(
     return iris_classifier_ranker_binary.best_model_
 
 
-@pytest.fixture
-def iris_classifier_crossfit_binary(
-    iris_classifier_ranker_binary: LearnerRanker[ClassifierPipelineDF],
-) -> LearnerCrossfit[ClassifierPipelineDF[RandomForestClassifierDF]]:
-    return iris_classifier_ranker_binary.best_model_crossfit_
-
-
 @pytest.fixture
 def iris_classifier_multi_class(
     iris_classifier_ranker_multi_class: LearnerRanker[ClassifierPipelineDF],

From 202d75431f23f79b3ad65edc587151543fd1654f Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Fri, 22 Oct 2021 17:46:44 +0200
Subject: [PATCH 044/106] TEST: use strings not pandas indices to select
 ColumnTransformer columns

---
 test/test/conftest.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/test/test/conftest.py b/test/test/conftest.py
index db689e46..a4657574 100644
--- a/test/test/conftest.py
+++ b/test/test/conftest.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Any, List, Mapping, Optional, Sequence, Set
+from typing import Any, List, Mapping, Optional, Sequence, Set, Tuple
 
 import numpy as np
 import pandas as pd
@@ -198,12 +198,16 @@ def regressor_inspector(
 def simple_preprocessor(sample: Sample) -> TransformerDF:
     features = sample.features
 
-    column_transforms = []
+    column_transforms: List[Tuple[str, Any, Any]] = []
 
-    numeric_columns = features.select_dtypes(np.number).columns
+    numeric_columns: pd.Index = features.select_dtypes(np.number).columns
     if numeric_columns is not None and len(numeric_columns) > 0:
         column_transforms.append(
-            (STEP_IMPUTE, SimpleImputerDF(strategy="median"), numeric_columns)
+            (
+                STEP_IMPUTE,
+                SimpleImputerDF(strategy="median"),
+                list(map(str, numeric_columns)),
+            )
         )
 
     category_columns = features.select_dtypes(object).columns
@@ -212,7 +216,7 @@ def simple_preprocessor(sample: Sample) -> TransformerDF:
             (
                 STEP_ONE_HOT_ENCODE,
                 OneHotEncoderDF(sparse=False, handle_unknown="ignore"),
-                category_columns,
+                list(map(str, category_columns)),
             )
         )
 

From 466afe259b00c2960d77a4e7f906ef1c18c98dd6 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 9 Nov 2021 12:54:26 +0100
Subject: [PATCH 045/106] API: implement class ParameterSpace and related
 classes

---
 src/facet/selection/__init__.py         |   1 +
 src/facet/selection/_parameters.py      | 412 ++++++++++++++++++++++++
 src/facet/selection/base/__init__.py    |   5 +
 src/facet/selection/base/_parameters.py |  95 ++++++
 test/test/facet/test_selection.py       | 133 +++++++-
 5 files changed, 644 insertions(+), 2 deletions(-)
 create mode 100644 src/facet/selection/_parameters.py
 create mode 100644 src/facet/selection/base/__init__.py
 create mode 100644 src/facet/selection/base/_parameters.py

diff --git a/src/facet/selection/__init__.py b/src/facet/selection/__init__.py
index fa1de86f..fd6ae236 100644
--- a/src/facet/selection/__init__.py
+++ b/src/facet/selection/__init__.py
@@ -7,4 +7,5 @@
 :class:`.LearnerRanker` selects the best pipeline and parametrization based on the
 pipeline and hyperparameter choices provided as a list of :class:`.LearnerGrid`.
 """
+from ._parameters import *
 from ._selection import *
diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
new file mode 100644
index 00000000..a43ea6fe
--- /dev/null
+++ b/src/facet/selection/_parameters.py
@@ -0,0 +1,412 @@
+"""
+Core implementation of :mod:`facet.selection`
+"""
+
+import logging
+import warnings
+from typing import (
+    Any,
+    Collection,
+    Dict,
+    Generic,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    Set,
+    Tuple,
+    Type,
+    TypeVar,
+    Union,
+)
+
+from scipy import stats
+from sklearn.base import BaseEstimator
+from sklearn.pipeline import Pipeline
+
+from pytools.api import AllTracker, inheritdoc, subsdoc, to_list, validate_element_types
+from pytools.expression import Expression, make_expression
+from pytools.expression.atomic import Id
+from sklearndf import EstimatorDF
+from sklearndf.pipeline import ClassifierPipelineDF, PipelineDF, RegressorPipelineDF
+
+from .base import BaseParameterSpace
+
+log = logging.getLogger(__name__)
+
+__all__ = [
+    "MultiClassifierParameterSpace",
+    "MultiEstimatorParameterSpace",
+    "MultiRegressorParameterSpace",
+    "ParameterSpace",
+]
+
+
+#
+# Type constants
+#
+
+ParameterSet = Union[List[Any], stats.rv_continuous, stats.rv_discrete]
+ParameterDict = Dict[str, ParameterSet]
+
+rv_frozen = type(stats.uniform())
+assert rv_frozen.__name__ == "rv_frozen"
+
+
+#
+# Type variables
+#
+
+T_Estimator = TypeVar("T_Estimator", bound=BaseEstimator)
+
+
+#
+# Ensure all symbols introduced below are included in __all__
+#
+
+__tracker = AllTracker(globals())
+
+
+#
+# Class definitions
+#
+
+
+@inheritdoc(match="""[see superclass]""")
+class ParameterSpace(BaseParameterSpace[T_Estimator], Generic[T_Estimator]):
+    # noinspection SpellCheckingInspection
+    """
+    A set of parameters spanning a parameter space for optimizing the hyper-parameters
+    of a single estimator.
+
+    Parameter spaces provide an easy approach to define and validate search spaces
+    for hyper-parameter tuning of ML pipelines using `scikit-learn`'s
+    :class:`~sklearn.model_selection.GridSearchCV` and
+    :class:`~sklearn.model_selection.RandomizedSearchCV`.
+
+    Parameter lists or distributions to be searched can be set using attribute access,
+    and will be validated for correct names and values.
+
+    Example:
+
+    .. code-block:: python
+
+        ps = ParameterSpace(
+            RegressorPipelineDF(
+                regressor=RandomForestRegressorDF(random_state=42),
+                preprocessing=simple_preprocessor,
+            )
+        )
+        ps.regressor.min_weight_fraction_leaf = scipy.stats.loguniform(0.01, 0.1)
+        ps.regressor.max_depth = [3, 4, 5, 7, 10]
+
+        cv = RandomizedSearchCV(
+            estimator=ps.estimator,
+            param_distributions=ps.parameters,
+            # ...
+        )
+
+        # the following will raise an AttributeError for unknown attribute xyz:
+        ps.regressor.xyz = [3, 4, 5, 7, 10]
+
+        # the following will raise a TypeError because we do not assign a list or \
+distribution:
+        ps.regressor.max_depth = 3
+
+    """
+
+    def __init__(self, estimator: T_Estimator) -> None:
+        """
+        :param estimator: the estimator to which to apply the parameters to
+        """
+
+        super().__init__(estimator=estimator)
+
+        params: Dict[str, Any] = {
+            name: param
+            for name, param in estimator.get_params(deep=True).items()
+            if "__" not in name
+        }
+
+        self._children: Dict[str, ParameterSpace] = {
+            name: ParameterSpace(estimator=value)
+            for name, value in params.items()
+            if isinstance(value, BaseEstimator)
+        }
+        self._values: ParameterDict = {}
+        self._params: Set[str] = set(params.keys())
+
+    @subsdoc(
+        pattern="or a list of such dictionaries, ",
+        replacement="",
+        using=BaseParameterSpace.get_parameters,
+    )
+    def get_parameters(self, prefix: Optional[str] = None) -> ParameterDict:
+        """[see superclass]"""
+
+        return {
+            "__".join(name): values
+            for (name, values) in self._iter_parameters([prefix] if prefix else [])
+        }
+
+    def _validate_parameter(self, name: str, value: ParameterSet) -> None:
+
+        if name not in self._params:
+            raise AttributeError(
+                f"unknown parameter name for {type(self.estimator).__name__}: {name}"
+            )
+
+        if not (
+            isinstance(
+                value,
+                (list, stats.rv_discrete, stats.rv_continuous),
+            )
+            or callable(getattr(value, "rvs", None))
+        ):
+            raise TypeError(
+                f"expected list or distribution for parameter {name} but got: "
+                f"{value!r}"
+            )
+
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name.startswith("_"):
+            super().__setattr__(name, value)
+        else:
+            self._validate_parameter(name, value)
+            if name in self.__dict__:
+                warnings.warn(
+                    f"parameter {name!r} overrides {type(self).__name__}"
+                    f"attribute of same name",
+                    stacklevel=-1,
+                )
+            self._values[name] = value
+
+    def __dir__(self) -> Iterable[str]:
+        return {*super().__dir__(), *self._params}
+
+    def __getattr__(self, key: str) -> Any:
+        if not key.startswith("_"):
+
+            result = self._children.get(key, None)
+            if result is not None:
+                return result
+
+            result = self._values.get(key, None)
+            if result is not None:
+                return result
+
+        return super().__getattribute__(key)
+
+    def __iter__(self) -> Iterator[Tuple[List[str], ParameterSet]]:
+        return self._iter_parameters([])
+
+    def _iter_parameters(
+        self, path_prefix: List[str]
+    ) -> Iterator[Tuple[List[str], ParameterSet]]:
+
+        yield from (
+            ([*path_prefix, name], value) for name, value in self._values.items()
+        )
+
+        for name, child in self._children.items():
+            yield from child._iter_parameters([*path_prefix, name])
+
+    def to_expression(self) -> Expression:
+        """[see superclass]"""
+        return self._to_expression([])
+
+    def _to_expression(self, path_prefix: Union[str, List[str]]) -> Expression:
+        # path_prefix: the path prefix to prepend to each parameter name
+
+        def _values_to_expression(values: ParameterSet) -> Expression:
+            if isinstance(values, rv_frozen):
+                values: rv_frozen
+                return Id(values.dist.name)(*values.args, **values.kwds)
+            elif isinstance(values, (stats.rv_continuous, stats.rv_discrete)):
+                try:
+                    return Id(values.name)(values.a, values.b)
+                except AttributeError:
+                    pass
+
+            return make_expression(values)
+
+        path_prefix = (
+            []
+            if path_prefix is None
+            else to_list(path_prefix, element_type=str, arg_name="path_prefix")
+        )
+
+        parameters = {
+            ".".join(path): _values_to_expression(value)
+            for path, value in self._iter_parameters(path_prefix=path_prefix)
+        }
+
+        if path_prefix:
+            return Id(type(self))(
+                **{".".join(path_prefix): self.estimator}, **parameters
+            )
+        else:
+            return Id(type(self))(self.estimator, **parameters)
+
+
+@inheritdoc(match="""[see superclass]""")
+class MultiEstimatorParameterSpace(
+    BaseParameterSpace[T_Estimator], Generic[T_Estimator]
+):
+    """
+    A collection of parameter spaces, each representing a competing estimator from which
+    select the best-performing candidate with optimal hyper-parameters.
+
+    See :class:`.ParameterSpace` for documentation on how to set up and use parameter
+    spaces.
+    """
+
+    STEP_CANDIDATE = "candidate"
+
+    #: The estimator base type which all candidate estimators must implement.
+    estimator_type: Type[T_Estimator]
+
+    def __init__(
+        self,
+        *candidates: ParameterSpace[T_Estimator],
+        estimator_type: Type[T_Estimator],
+    ) -> None:
+        """
+        :param candidates: the parameter spaces from which to select the best estimator
+        :param estimator_type: the estimator base type which all candidate estimators
+            must implement
+        """
+        validate_element_types(candidates, expected_type=ParameterSpace)
+        validate_candidates(candidates, expected_estimator_type=estimator_type)
+
+        if len(candidates) == 0:
+            raise TypeError("no parameter space passed; need to pass at least one")
+
+        if all(
+            isinstance(candidate.estimator, EstimatorDF) for candidate in candidates
+        ):
+            cls_pipeline = PipelineDF
+        else:
+            cls_pipeline = Pipeline
+
+        super().__init__(
+            estimator=cls_pipeline(
+                [(MultiEstimatorParameterSpace.STEP_CANDIDATE, candidates[0].estimator)]
+            )
+        )
+
+        self.candidates = candidates
+        self.estimator_type = estimator_type
+
+    @subsdoc(
+        pattern=(
+            r"a dictionary of parameter distributions,[\n\s]*"
+            r"or a list of such dictionaries"
+        ),
+        replacement="a list of dictionaries of parameter distributions",
+        using=BaseParameterSpace.get_parameters,
+    )
+    def get_parameters(self, prefix: Optional[str] = None) -> List[ParameterDict]:
+        """[see superclass]"""
+        return [
+            {
+                MultiEstimatorParameterSpace.STEP_CANDIDATE: [candidate.estimator],
+                **candidate.get_parameters(
+                    prefix=MultiEstimatorParameterSpace.STEP_CANDIDATE
+                ),
+            }
+            for candidate in self.candidates
+        ]
+
+    def to_expression(self) -> "Expression":
+        """[see superclass]"""
+        # noinspection PyProtectedMember
+        return Id(type(self))(
+            self.estimator,
+            [
+                candidate._to_expression(
+                    path_prefix=MultiEstimatorParameterSpace.STEP_CANDIDATE
+                )
+                for candidate in self.candidates
+            ],
+        )
+
+
+@subsdoc(pattern="a competing estimator", replacement="a competing regressor pipeline")
+@inheritdoc(match="""[see superclass]""")
+class MultiRegressorParameterSpace(MultiEstimatorParameterSpace[RegressorPipelineDF]):
+    """[see superclass]"""
+
+    def __init__(
+        self,
+        *candidates: ParameterSpace[RegressorPipelineDF],
+        estimator_type: Type[RegressorPipelineDF] = RegressorPipelineDF,
+    ) -> None:
+        """[see superclass]"""
+        ensure_subclass(estimator_type, RegressorPipelineDF)
+        super().__init__(*candidates, estimator_type=estimator_type)
+
+
+@subsdoc(pattern="a competing estimator", replacement="a competing classifier pipeline")
+@inheritdoc(match="""[see superclass]""")
+class MultiClassifierParameterSpace(MultiEstimatorParameterSpace[ClassifierPipelineDF]):
+    """[see superclass]"""
+
+    def __init__(
+        self,
+        *candidates: ParameterSpace[ClassifierPipelineDF],
+        estimator_type: Type[ClassifierPipelineDF] = ClassifierPipelineDF,
+    ) -> None:
+        """[see superclass]"""
+        ensure_subclass(estimator_type, ClassifierPipelineDF)
+        super().__init__(*candidates, estimator_type=estimator_type)
+
+
+__tracker.validate()
+
+
+#
+# auxiliary functions
+#
+
+
+def ensure_subclass(
+    estimator_type: Type[T_Estimator], expected_type: Type[T_Estimator]
+) -> None:
+    """
+    Ensure that the given estimator type is a subclass of the expected estimator type.
+
+    :param estimator_type: the estimator type to validate
+    :param expected_type: the expected estimator type
+    """
+    if not issubclass(estimator_type, expected_type):
+        raise TypeError(
+            f"arg estimator_type must be a subclass of {expected_type.__name__} "
+            f"but is: {estimator_type.__name__}"
+        )
+
+
+def validate_candidates(
+    candidates: Collection[ParameterSpace[T_Estimator]],
+    expected_estimator_type: Type[T_Estimator],
+) -> None:
+    """
+    Ensure that all candidates implement a given estimator type.
+
+    :param candidates: the candidates to check
+    :param expected_estimator_type: the type that all candidates' estimators must
+        implement
+    """
+
+    non_compliant_candidate_estimators: Set[str] = {
+        type(candidate.estimator).__name__
+        for candidate in candidates
+        if not isinstance(candidate.estimator, expected_estimator_type)
+    }
+    if non_compliant_candidate_estimators:
+        raise TypeError(
+            f"all candidate estimators must be instances of "
+            f"{expected_estimator_type.__name__}, "
+            f"but candidate estimators include: "
+            f"{', '.join(non_compliant_candidate_estimators)}"
+        )
diff --git a/src/facet/selection/base/__init__.py b/src/facet/selection/base/__init__.py
new file mode 100644
index 00000000..40af413e
--- /dev/null
+++ b/src/facet/selection/base/__init__.py
@@ -0,0 +1,5 @@
+"""
+Base classes for module :mod:`facet.selection`.
+"""
+
+from ._parameters import *
diff --git a/src/facet/selection/base/_parameters.py b/src/facet/selection/base/_parameters.py
new file mode 100644
index 00000000..0b311a58
--- /dev/null
+++ b/src/facet/selection/base/_parameters.py
@@ -0,0 +1,95 @@
+"""
+Core implementation of :mod:`facet.selection.base`
+"""
+
+import logging
+from abc import ABCMeta, abstractmethod
+from typing import Any, Dict, Generic, List, Optional, TypeVar, Union
+
+from scipy import stats
+from sklearn.base import BaseEstimator
+
+from pytools.api import AllTracker
+from pytools.expression import HasExpressionRepr
+
+log = logging.getLogger(__name__)
+
+__all__ = [
+    "BaseParameterSpace",
+]
+
+
+#
+# Type constants
+#
+
+ParameterDict = Dict[str, Union[List[Any], stats.rv_continuous, stats.rv_discrete]]
+
+
+#
+# Type variables
+#
+
+T_Estimator = TypeVar("T_Estimator")
+
+
+#
+# Ensure all symbols introduced below are included in __all__
+#
+
+__tracker = AllTracker(globals())
+
+
+#
+# Class definitions
+#
+
+
+class BaseParameterSpace(HasExpressionRepr, Generic[T_Estimator], metaclass=ABCMeta):
+    """
+    A collection of parameters spanning a parameter space for hyper-parameter
+    optimization.
+    """
+
+    def __init__(self, estimator: T_Estimator) -> None:
+        """
+        :param estimator: the estimator for which to capture parameters
+        """
+        self._estimator: BaseEstimator = estimator
+
+    @property
+    def estimator(self) -> T_Estimator:
+        """
+        The estimator associated with this parameter space.
+        """
+        return self._estimator
+
+    @property
+    def parameters(self) -> Union[List[ParameterDict], ParameterDict]:
+        """
+        The parameter sets spanning this parameter space.
+
+        This is a shortcut for calling method :meth:`.get_parameters` with no
+        arguments.
+        """
+        return self.get_parameters()
+
+    @abstractmethod
+    def get_parameters(
+        self, prefix: Optional[str] = None
+    ) -> Union[List[ParameterDict], ParameterDict]:
+        """
+        Generate a dictionary of parameter distributions,
+        or a list of such dictionaries, compatible with `scikit-learn`'s
+        :class:`~sklearn.model_selection.GridSearchCV` and
+        :class:`~sklearn.model_selection.RandomizedSearchCV`.
+
+        :param prefix: an optional path prefix to prepend to all paths in the resulting
+            dictionary
+        :return: a dictionary mapping paths to estimator parameters to parameter
+            distributions
+        """
+        pass
+
+
+__tracker.validate()
diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
index 94584d32..fd3e1ca5 100644
--- a/test/test/facet/test_selection.py
+++ b/test/test/facet/test_selection.py
@@ -8,10 +8,14 @@
 import numpy as np
 import pandas as pd
 import pytest
+from scipy.stats import loguniform, randint, zipfian
 from sklearn import datasets
 
+from pytools.expression import freeze
+from pytools.expression.atomic import Id
+from sklearndf import TransformerDF
 from sklearndf.classification import SVCDF
-from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
+from sklearndf.pipeline import ClassifierPipelineDF, PipelineDF, RegressorPipelineDF
 from sklearndf.regression import (
     AdaBoostRegressorDF,
     LinearRegressionDF,
@@ -22,7 +26,14 @@
 from ..conftest import check_ranking
 from facet.crossfit import LearnerCrossfit
 from facet.data import Sample
-from facet.selection import LearnerEvaluation, LearnerGrid, LearnerRanker
+from facet.selection import (
+    LearnerEvaluation,
+    LearnerGrid,
+    LearnerRanker,
+    MultiClassifierParameterSpace,
+    MultiRegressorParameterSpace,
+    ParameterSpace,
+)
 from facet.validation import BootstrapCV
 
 log = logging.getLogger(__name__)
@@ -183,3 +194,121 @@ def test_model_ranker_no_preprocessing(n_jobs) -> None:
     assert (
         model_ranker.ranking_[0].ranking_score >= 0.8
     ), "expected a best performance of at least 0.8"
+
+
+def test_parameter_space(
+    sample: Sample, simple_preprocessor: TransformerDF, n_jobs: int
+) -> None:
+
+    # distributions
+
+    randint_3_10 = randint(3, 10)
+    loguniform_0_01_0_10 = loguniform(0.01, 0.1)
+    loguniform_0_05_0_10 = loguniform(0.05, 0.1)
+    zipfian_1_32 = zipfian(1.0, 32)
+
+    # parameter space 1
+
+    pipeline_1 = RegressorPipelineDF(
+        regressor=RandomForestRegressorDF(random_state=42),
+        preprocessing=simple_preprocessor,
+    )
+    ps_1 = ParameterSpace(pipeline_1)
+    ps_1.regressor.min_weight_fraction_leaf = loguniform_0_01_0_10
+    ps_1.regressor.max_depth = randint_3_10
+    ps_1.regressor.min_samples_leaf = loguniform_0_05_0_10
+
+    with pytest.raises(
+        AttributeError,
+        match=r"^unknown parameter name for RandomForestRegressorDF: unknown$",
+    ):
+        ps_1.regressor.unknown = 1
+
+    with pytest.raises(
+        TypeError,
+        match=(
+            "^expected list or distribution for parameter min_samples_leaf "
+            "but got: 1$"
+        ),
+    ):
+        ps_1.regressor.min_samples_leaf = 1
+
+    # parameter space 2
+
+    pipeline_2 = RegressorPipelineDF(
+        regressor=LGBMRegressorDF(random_state=42),
+        preprocessing=simple_preprocessor,
+    )
+    ps_2 = ParameterSpace(pipeline_2)
+    ps_2.regressor.max_depth = randint_3_10
+    ps_2.regressor.min_child_samples = zipfian_1_32
+
+    # multi parameter space
+
+    with pytest.raises(
+        TypeError,
+        match=(
+            r"^arg estimator_type must be a subclass of ClassifierPipelineDF but is: "
+            r"RegressorPipelineDF$"
+        ),
+    ):
+        # noinspection PyTypeChecker
+        MultiClassifierParameterSpace(ps_1, ps_2, estimator_type=RegressorPipelineDF)
+
+    with pytest.raises(
+        TypeError,
+        match=(
+            r"^all candidate estimators must be instances of ClassifierPipelineDF, "
+            r"but candidate estimators include: RegressorPipelineDF$"
+        ),
+    ):
+        # noinspection PyTypeChecker
+        MultiClassifierParameterSpace(ps_1, ps_2)
+
+    mps = MultiRegressorParameterSpace(ps_1, ps_2)
+
+    # test
+
+    assert freeze(mps.to_expression()) == freeze(
+        Id.MultiRegressorParameterSpace(
+            Id.PipelineDF(steps=[("candidate", pipeline_1.to_expression())]),
+            [
+                Id.ParameterSpace(
+                    candidate=pipeline_1.to_expression(),
+                    **{
+                        "candidate.regressor.min_weight_fraction_leaf": (
+                            Id.loguniform(0.01, 0.1)
+                        ),
+                        "candidate.regressor.max_depth": Id.randint(3, 10),
+                        "candidate.regressor.min_samples_leaf": (
+                            Id.loguniform(0.05, 0.1)
+                        ),
+                    },
+                ),
+                Id.ParameterSpace(
+                    candidate=pipeline_2.to_expression(),
+                    **{
+                        "candidate.regressor.max_depth": Id.randint(3, 10),
+                        "candidate.regressor.min_child_samples": Id.zipfian(1.0, 32),
+                    },
+                ),
+            ],
+        )
+    )
+
+    assert type(mps.estimator) == PipelineDF
+    assert mps.estimator.steps == [("candidate", pipeline_1)]
+
+    assert mps.parameters == [
+        {
+            "candidate": [pipeline_1],
+            "candidate__regressor__max_depth": randint_3_10,
+            "candidate__regressor__min_samples_leaf": loguniform_0_05_0_10,
+            "candidate__regressor__min_weight_fraction_leaf": loguniform_0_01_0_10,
+        },
+        {
+            "candidate": [pipeline_2],
+            "candidate__regressor__max_depth": randint_3_10,
+            "candidate__regressor__min_child_samples": zipfian_1_32,
+        },
+    ]

From 95ec39f9a0a486c9fca176414891e9bd1aff8073 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 19 Jan 2022 18:18:55 +0100
Subject: [PATCH 046/106] add initial version of CrossFit-free LearnerRanker

---
 src/facet/selection/_selection.py       | 295 +++++++++++++++++++++++-
 src/facet/selection/base/_parameters.py |   2 +-
 test/test/conftest.py                   |  81 ++++++-
 test/test/facet/test_selection.py       |  41 ++++
 4 files changed, 413 insertions(+), 6 deletions(-)

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index e27a3949..989bd265 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -1,12 +1,15 @@
 """
 Core implementation of :mod:`facet.selection`
 """
-
+import inspect
+import itertools
 import logging
 import math
 import operator
+import re
 from functools import reduce
 from itertools import chain
+from re import Pattern
 from types import MappingProxyType
 from typing import (
     Any,
@@ -28,9 +31,9 @@
 import numpy as np
 import pandas as pd
 from numpy.random.mtrand import RandomState
-from sklearn.model_selection import BaseCrossValidator
+from sklearn.model_selection import BaseCrossValidator, GridSearchCV
 
-from pytools.api import AllTracker, inheritdoc, to_tuple
+from pytools.api import AllTracker, deprecated, inheritdoc, to_tuple
 from pytools.fit import FittableMixin
 from pytools.parallelization import JobRunner, ParallelizableMixin
 from sklearndf.pipeline import (
@@ -41,10 +44,23 @@
 
 from facet.crossfit import LearnerCrossfit
 from facet.data import Sample
+from facet.selection.base import BaseParameterSpace
 
 log = logging.getLogger(__name__)
 
-__all__ = ["LearnerGrid", "LearnerEvaluation", "LearnerRanker"]
+__all__ = ["LearnerGrid", "LearnerEvaluation", "LearnerRanker", "LearnerRanker2"]
+
+#
+# Type constants
+#
+
+# sklearn does not publish base class BaseSearchCV, so we pull it from the MRO
+# of GridSearchCV
+BaseSearchCV = [
+    base_class
+    for base_class in GridSearchCV.mro()
+    if base_class.__name__ == "BaseSearchCV"
+][0]
 
 #
 # Type variables
@@ -56,6 +72,13 @@
 )
 T_RegressorPipelineDF = TypeVar("T_RegressorPipelineDF", bound=RegressorPipelineDF)
 T_ClassifierPipelineDF = TypeVar("T_ClassifierPipelineDF", bound=ClassifierPipelineDF)
+T_SearchCV = TypeVar("T_SearchCV", bound=BaseSearchCV)
+
+#
+# Constants
+#
+
+ARG_SAMPLE_WEIGHT = "sample_weight"
 
 #
 # Ensure all symbols introduced below are included in __all__
@@ -69,11 +92,273 @@
 #
 
 
+@inheritdoc(match="[see superclass]")
+class LearnerRanker2(
+    FittableMixin[Sample], ParallelizableMixin, Generic[T_LearnerPipelineDF, T_SearchCV]
+):
+    """
+    Score and rank different parametrizations of one or more learners,
+    using cross-validation.
+
+    The learner ranker can run a simultaneous grid search across multiple alternative
+    learner pipelines, supporting the ability to simultaneously select a learner
+    algorithm and optimize hyper-parameters.
+    """
+
+    #: The searcher used to fit this LearnerRanker; ``None`` if not fitted.
+    searcher_: Optional[T_SearchCV]
+
+    _CV_RESULT_COLUMNS = [
+        r"mean_test_\w+",
+        r"std_test_\w+",
+        r"param_\w+",
+        r"(rank|mean|std)_\w+",
+    ]
+
+    # noinspection PyTypeChecker
+    _CV_RESULT_PATTERNS: List[Pattern] = list(map(re.compile, _CV_RESULT_COLUMNS))
+    _DEFAULT_REPORT_SORT_COLUMN = "rank_test_score"
+
+    def __init__(
+        self,
+        searcher_factory: Callable[..., T_SearchCV],
+        parameter_space: BaseParameterSpace,
+        *,
+        cv: Optional[BaseCrossValidator] = None,
+        scoring: Union[str, Callable[[float, float], float], None] = None,
+        random_state: Union[int, RandomState, None] = None,
+        n_jobs: Optional[int] = None,
+        shared_memory: Optional[bool] = None,
+        pre_dispatch: Optional[Union[str, int]] = None,
+        verbose: Optional[int] = None,
+        **searcher_params: Any,
+    ) -> None:
+        """
+        :param searcher_factory: a cross-validation searcher class, or any other
+            callable that instantiates a cross-validation searcher
+        :param parameter_space: the parameter space to search
+        :param cv: a cross validator (e.g.,
+            :class:`.BootstrapCV`)
+        :param scoring: a scoring function (by name, or as a callable) for evaluating
+            learners (optional; use learner's default scorer if not specified here).
+            If passing a callable, the ``"score"`` will be used as the name of the
+            scoring function unless the callable defines a ``__name__`` attribute
+        :param random_state: optional random seed or random state for shuffling the
+            feature column order
+        %%PARALLELIZABLE_PARAMS%%
+        :param searcher_params: additional parameters to be passed on to the searcher;
+            must not include the first two positional arguments of the searcher
+            constructor used to pass the estimator and the search space, since these
+            will be populated using arg parameter_space
+        """
+        super().__init__(
+            n_jobs=n_jobs,
+            shared_memory=shared_memory,
+            pre_dispatch=pre_dispatch,
+            verbose=verbose,
+        )
+
+        self.searcher_factory = searcher_factory
+        self.parameter_space = parameter_space
+        self.cv = cv
+        self.scoring = scoring
+        self.random_state = random_state
+        self.searcher_params = searcher_params
+
+        #
+        # validate parameters for the searcher factory
+        #
+
+        searcher_factory_params = inspect.signature(searcher_factory).parameters.keys()
+
+        # raise an error if the searcher params include the searcher's first two
+        # positional arguments
+        reserved_params = set(itertools.islice(searcher_factory_params, 2))
+
+        reserved_params_overrides = reserved_params.intersection(searcher_params.keys())
+
+        if reserved_params_overrides:
+            raise ValueError(
+                "arg searcher_params must not include the first two positional "
+                "arguments of arg searcher_factory, but included: "
+                + ", ".join(reserved_params_overrides)
+            )
+
+        # raise an error if the searcher does not support any of the given parameters
+        unsupported_params = set(self._get_searcher_parameters().keys()).difference(
+            searcher_factory_params
+        )
+
+        if unsupported_params:
+            raise TypeError(
+                "parameters not supported by arg searcher_factory: "
+                + ", ".join(unsupported_params)
+            )
+
+        self.searcher_ = None
+
+    __init__.__doc__ = __init__.__doc__.replace(
+        "%%PARALLELIZABLE_PARAMS%%", ParallelizableMixin.__init__.__doc__.strip()
+    )
+
+    @property
+    def is_fitted(self) -> bool:
+        """[see superclass]"""
+        return self.searcher_ is not None
+
+    @property
+    def best_estimator_(self) -> T_LearnerPipelineDF:
+        """
+        The pipeline which obtained the best ranking score, fitted on the entire sample.
+        """
+        self._ensure_fitted()
+        searcher = self.searcher_
+        if searcher.refit:
+            return searcher.best_estimator_
+        else:
+            raise AttributeError(
+                "best_model_ is not defined; use a CV searcher with refit=True"
+            )
+
+    def fit(
+        self: T_Self,
+        sample: Sample,
+        groups: Union[pd.Series, np.ndarray, Sequence, None] = None,
+        **fit_params: Any,
+    ) -> T_Self:
+        """
+        Rank the candidate learners and their hyper-parameter combinations using
+        crossfits from the given sample.
+
+        Other than the scikit-learn implementation of grid search, arbitrary parameters
+        can be passed on to the learner pipeline(s) to be fitted.
+
+        :param sample: the sample from which to fit the crossfits
+        :param groups:
+        :param fit_params: any fit parameters to pass on to the learner's fit method
+        :return: ``self``
+        """
+        self: LearnerRanker2[T_LearnerPipelineDF]  # support type hinting in PyCharm
+
+        self._reset_fit()
+
+        if ARG_SAMPLE_WEIGHT in fit_params:
+            raise ValueError(
+                "arg sample_weight is not supported, use ag sample.weight instead"
+            )
+
+        if isinstance(groups, pd.Series):
+            if not groups.index.equals(sample.index):
+                raise ValueError(
+                    "index of arg groups is not equal to index of arg sample"
+                )
+        elif groups is not None:
+            if len(groups) != len(sample):
+                raise ValueError(
+                    "length of arg groups is not equal to length of arg sample"
+                )
+
+        parameter_space = self.parameter_space
+        searcher: BaseSearchCV
+        searcher = self.searcher_ = self.searcher_factory(
+            parameter_space.estimator,
+            parameter_space.parameters,
+            **self._get_searcher_parameters(),
+        )
+        if sample.weight is not None:
+            fit_params = {ARG_SAMPLE_WEIGHT: sample.weight, **fit_params}
+
+        searcher.fit(X=sample.features, y=sample.target, groups=groups, **fit_params)
+
+        return self
+
+    def summary_report(self, *, sort_by: Optional[str] = None) -> pd.DataFrame:
+        """
+        Create a summary table of the scores achieved by all learners in the grid
+        search, sorted by ranking score in descending order.
+
+        :param sort_by: name of the column to sort the report by, in ascending order,
+            if the column is present (default: ``"%%SORT_COLUMN%%"``)
+
+        :return: the summary report of the grid search as a data frame
+        """
+
+        self._ensure_fitted()
+
+        if sort_by is None:
+            sort_by = self._DEFAULT_REPORT_SORT_COLUMN
+
+        cv_results: Dict[str, Any] = self.searcher_.cv_results_
+
+        # we create a table using a subset of the cv results, to keep the report
+        # relevant and readable
+        cv_results_subset: Dict[str, np.ndarray] = {}
+
+        # add the sorting column as the leftmost column of the report
+        sort_results = sort_by in cv_results
+        if sort_results:
+            cv_results_subset[sort_by] = cv_results[sort_by]
+
+        # add all other columns that match any of the pre-defined patterns
+        for pattern in self._CV_RESULT_PATTERNS:
+            cv_results_subset.update(
+                {
+                    name: values
+                    for name, values in cv_results.items()
+                    if name not in cv_results_subset and pattern.fullmatch(name)
+                }
+            )
+
+        # convert the results into a data frame and sort
+        report = pd.DataFrame(cv_results_subset)
+
+        # split column headers containing one or more "__",
+        # resulting in a column MultiIndex
+
+        report.columns = report.columns.str.split("__", expand=True).map(
+            lambda column: tuple(level if pd.notna(level) else "" for level in column)
+        )
+
+        # sort the report, if applicable
+        if sort_results:
+            report = report.sort_values(by=sort_by)
+
+        return report
+
+    def _reset_fit(self) -> None:
+        # make this object not fitted
+        self.searcher_ = None
+
+    def _get_searcher_parameters(self) -> Dict[str, Any]:
+        # make a dict of all parameters to be passed to the searcher
+        return {
+            **{
+                k: v
+                for k, v in dict(
+                    cv=self.cv,
+                    scoring=self.scoring,
+                    random_state=self.random_state,
+                    n_jobs=self.n_jobs,
+                    shared_memory=self.shared_memory,
+                    pre_dispatch=self.pre_dispatch,
+                    verbose=self.verbose,
+                ).items()
+                if v is not None
+            },
+            **self.searcher_params,
+        }
+
+    summary_report.__doc__ = summary_report.__doc__.replace(
+        "%%SORT_COLUMN%%", _DEFAULT_REPORT_SORT_COLUMN
+    )
+
+
 class LearnerGrid(Generic[T_LearnerPipelineDF]):
     """
     A grid of hyper-parameters for tuning a learner pipeline.
     """
 
+    @deprecated(message=f"use class {LearnerRanker2.__name__} instead")
     def __init__(
         self,
         pipeline: T_LearnerPipelineDF,
@@ -196,6 +481,7 @@ class LearnerEvaluation(Generic[T_LearnerPipelineDF]):
 
     __slots__ = ["pipeline", "parameters", "scoring_name", "scores", "ranking_score"]
 
+    @deprecated(message=f"use class {LearnerRanker2.__name__} instead")
     def __init__(
         self,
         pipeline: T_LearnerPipelineDF,
@@ -246,6 +532,7 @@ class LearnerRanker(
     algorithm and optimize hyper-parameters.
     """
 
+    @deprecated(message=f"use class {LearnerRanker2.__name__} instead")
     def __init__(
         self,
         grids: Union[
diff --git a/src/facet/selection/base/_parameters.py b/src/facet/selection/base/_parameters.py
index 0b311a58..3670fa14 100644
--- a/src/facet/selection/base/_parameters.py
+++ b/src/facet/selection/base/_parameters.py
@@ -30,7 +30,7 @@
 # Type variables
 #
 
-T_Estimator = TypeVar("T_Estimator")
+T_Estimator = TypeVar("T_Estimator", bound=BaseEstimator)
 
 
 #
diff --git a/test/test/conftest.py b/test/test/conftest.py
index a4657574..07233307 100644
--- a/test/test/conftest.py
+++ b/test/test/conftest.py
@@ -29,7 +29,13 @@
 import facet
 from facet.data import Sample
 from facet.inspection import LearnerInspector, TreeExplainerFactory
-from facet.selection import LearnerEvaluation, LearnerGrid, LearnerRanker
+from facet.selection import (
+    LearnerEvaluation,
+    LearnerGrid,
+    LearnerRanker,
+    MultiRegressorParameterSpace,
+    ParameterSpace,
+)
 from facet.validation import BootstrapCV, StratifiedBootstrapCV
 
 logging.basicConfig(level=logging.DEBUG)
@@ -144,6 +150,79 @@ def regressor_grids(simple_preprocessor: TransformerDF) -> List[LearnerGrid]:
     ]
 
 
+@pytest.fixture
+def regressor_parameters(
+    simple_preprocessor: TransformerDF,
+) -> MultiRegressorParameterSpace:
+    random_state = {"random_state": 42}
+
+    space_1 = ParameterSpace(
+        RegressorPipelineDF(
+            preprocessing=simple_preprocessor, regressor=LGBMRegressorDF(**random_state)
+        )
+    )
+    space_1.regressor.max_depth = [5, 10]
+    space_1.regressor.min_split_gain = [0.1, 0.2]
+    space_1.regressor.num_leaves = [50, 100, 200]
+
+    space_2 = ParameterSpace(
+        RegressorPipelineDF(
+            preprocessing=simple_preprocessor,
+            regressor=AdaBoostRegressorDF(**random_state),
+        )
+    )
+    space_2.regressor.n_estimators = [50, 80]
+
+    space_3 = ParameterSpace(
+        RegressorPipelineDF(
+            preprocessing=simple_preprocessor,
+            regressor=RandomForestRegressorDF(**random_state),
+        )
+    )
+    space_3.regressor.n_estimators = [50, 80]
+
+    space_4 = ParameterSpace(
+        RegressorPipelineDF(
+            preprocessing=simple_preprocessor,
+            regressor=DecisionTreeRegressorDF(**random_state),
+        )
+    )
+    space_4.regressor.max_depth = [0.5, 1.0]
+    space_4.regressor.max_features = [0.5, 1.0]
+
+    space_5 = ParameterSpace(
+        RegressorPipelineDF(
+            preprocessing=simple_preprocessor,
+            regressor=ExtraTreeRegressorDF(**random_state),
+        )
+    )
+    space_5.regressor.max_depth = [5, 10, 12]
+
+    space_6 = ParameterSpace(
+        RegressorPipelineDF(preprocessing=simple_preprocessor, regressor=SVRDF())
+    )
+    space_6.regressor.gamma = [0.5, 1]
+    space_6.regressor.C = [50, 100]
+
+    space_7 = ParameterSpace(
+        RegressorPipelineDF(
+            preprocessing=simple_preprocessor, regressor=LinearRegressionDF()
+        )
+    )
+    space_7.regressor.normalize = [False, True]
+
+    return MultiRegressorParameterSpace(
+        space_1,
+        space_2,
+        space_3,
+        space_4,
+        space_5,
+        space_6,
+        space_7,
+        estimator_type=RegressorPipelineDF,
+    )
+
+
 @pytest.fixture
 def regressor_ranker(
     cv_kfold: KFold,
diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
index fd3e1ca5..497aca68 100644
--- a/test/test/facet/test_selection.py
+++ b/test/test/facet/test_selection.py
@@ -10,6 +10,7 @@
 import pytest
 from scipy.stats import loguniform, randint, zipfian
 from sklearn import datasets
+from sklearn.model_selection import GridSearchCV
 
 from pytools.expression import freeze
 from pytools.expression.atomic import Id
@@ -34,6 +35,7 @@
     MultiRegressorParameterSpace,
     ParameterSpace,
 )
+from facet.selection._selection import LearnerRanker2
 from facet.validation import BootstrapCV
 
 log = logging.getLogger(__name__)
@@ -312,3 +314,42 @@ def test_parameter_space(
             "candidate__regressor__min_child_samples": zipfian_1_32,
         },
     ]
+
+
+def test_learner_ranker(
+    regressor_parameters: MultiRegressorParameterSpace, sample: Sample, n_jobs: int
+) -> None:
+
+    # define the circular cross validator with just 5 splits (to speed up testing)
+    cv = BootstrapCV(n_splits=5, random_state=42)
+
+    with pytest.raises(
+        ValueError,
+        match=(
+            "arg searcher_params must not include the first two positional arguments "
+            "of arg searcher_factory, but included: param_grid"
+        ),
+    ):
+        LearnerRanker2(GridSearchCV, regressor_parameters, param_grid=None)
+
+    ranker: LearnerRanker2[RegressorPipelineDF] = LearnerRanker2(
+        GridSearchCV,
+        regressor_parameters,
+        scoring="r2",
+        cv=cv,
+        n_jobs=n_jobs,
+    ).fit(sample=sample)
+
+    assert isinstance(ranker.best_estimator_, PipelineDF)
+
+    report_df = ranker.summary_report()
+    log.debug(report_df.columns.tolist())
+    log.debug(f"\n{report_df}")
+
+    assert len(report_df) > 0
+    assert isinstance(report_df, pd.DataFrame)
+
+    scores_sr: pd.Series = report_df.loc[:, "mean_test_score"]
+    assert all(
+        score_hi >= score_lo for score_hi, score_lo in zip(scores_sr, scores_sr[1:])
+    )

From c76704fbd71fd73c7cc0dfb94c980ddf1404211d Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Thu, 20 Jan 2022 10:07:19 +0100
Subject: [PATCH 047/106] API: provide all univariate simulation results as a
 data frame

---
 src/facet/simulation/_simulation.py | 89 ++++++++++++-----------------
 src/facet/simulation/viz/_draw.py   | 69 +++++++++++-----------
 test/test/facet/test_simulation.py  | 40 +++++++------
 3 files changed, 97 insertions(+), 101 deletions(-)

diff --git a/src/facet/simulation/_simulation.py b/src/facet/simulation/_simulation.py
index 15137d3a..d2eebf4f 100644
--- a/src/facet/simulation/_simulation.py
+++ b/src/facet/simulation/_simulation.py
@@ -63,24 +63,22 @@ class UnivariateSimulationResult(Generic[T_Partition]):
     Summary result of a univariate simulation.
     """
 
+    #: The simulation result as a data frame, indexed by the central values of the
+    #: partitions for which the simulation was run, with the following columns:
+    #:
+    #: - :attr:`.COL_MEAN`: the mean predictions for the simulated values
+    #: - :attr:`.COL_SEM`: the standard errors of the mean predictions
+    #: - :attr:`.COL_LOWER_BOUND`: the lower bounds of the confidence intervals for the
+    #:   simulation outcomes, based on mean, standard error of the mean, and
+    #:   :attr:`confidence_level`
+    #: - :attr:`.COL_UPPER_BOUND`: the upper bounds of the confidence intervals for the
+    #:   simulation outcomes, based on mean, standard error of the mean, and
+    #:   :attr:`confidence_level`
+    data: pd.DataFrame
+
     #: The partitioner used to generate feature values to be simulated.
     partitioner: Partitioner
 
-    #: The mean predictions for the values representing each partition.
-    mean: pd.Series
-
-    #: The standard errors of the mean predictions for the values representing each
-    # partition.
-    sem: pd.Series
-
-    #: The lower bounds of the confidence intervals for the mean predictions for the
-    # values representing each partition.
-    lower_bound: pd.Series
-
-    #: The upper bounds of the confidence intervals for the mean predictions for the
-    # values representing each partition.
-    upper_bound: pd.Series
-
     #: Name of the simulated feature.
     feature_name: str
 
@@ -160,48 +158,30 @@ def __init__(
                 "in the range between 0.0 and 1.0 (exclusive)"
             )
 
-        idx = pd.Index(
-            partitioner.partitions_, name=UnivariateSimulationResult.IDX_PARTITION
-        )
-
         self.partitioner = partitioner
-        self.mean = pd.Series(mean, index=idx, name=UnivariateSimulationResult.COL_MEAN)
-        self.sem = pd.Series(sem, index=idx, name=UnivariateSimulationResult.COL_SEM)
         self.feature_name = feature_name
         self.output_name = output_name
         self.output_unit = output_unit
         self.baseline = baseline
         self.confidence_level = confidence_level
 
-    def _ci_width(self) -> np.ndarray:
-        # get the width of the confidence interval
-        return -stats.norm.ppf((1.0 - self.confidence_level) / 2.0) * self.sem.values
-
-    @property
-    def lower_bound(self) -> pd.Series:
-        """
-        Calculate the lower CI bounds of the distribution of simulation outcomes,
-        for every partition.
-
-        :return: a series of lower CI bounds, indexed by the central values of the
-            partitions for which the simulation was run
-        """
-
-        return (self.mean - self._ci_width()).rename(
-            UnivariateSimulationResult.COL_LOWER_BOUND
-        )
-
-    @property
-    def upper_bound(self) -> pd.Series:
-        """
-        Calculate the lower CI bounds of the distribution of simulation outcomes,
-        for every partition.
-
-        :return: a series of upper CI bounds, indexed by the central values of the
-            partitions for which the simulation was run
-        """
-        return (self.mean + self._ci_width()).rename(
-            UnivariateSimulationResult.COL_UPPER_BOUND
+        # convert mean and sem to numpy arrays
+        mean_arr = np.array(mean)
+        sem_arr = np.array(sem)
+
+        # get the width of the confidence interval (this is a negative number)
+        ci_width = stats.norm.ppf((1.0 - self.confidence_level) / 2.0) * sem_arr
+
+        self.data = pd.DataFrame(
+            data={
+                UnivariateSimulationResult.COL_MEAN: mean_arr,
+                UnivariateSimulationResult.COL_SEM: sem_arr,
+                UnivariateSimulationResult.COL_LOWER_BOUND: mean_arr + ci_width,
+                UnivariateSimulationResult.COL_UPPER_BOUND: mean_arr - ci_width,
+            },
+            index=pd.Index(
+                partitioner.partitions_, name=UnivariateSimulationResult.IDX_PARTITION
+            ),
         )
 
 
@@ -579,7 +559,14 @@ def simulate_feature(
         )
 
         # offset the mean values to get uplift instead of absolute outputs
-        result.mean -= self.expected_output()
+        result.data.loc[
+            :,
+            [
+                UnivariateSimulationResult.COL_MEAN,
+                UnivariateSimulationResult.COL_LOWER_BOUND,
+                UnivariateSimulationResult.COL_UPPER_BOUND,
+            ],
+        ] -= self.expected_output()
 
         return result
 
diff --git a/src/facet/simulation/viz/_draw.py b/src/facet/simulation/viz/_draw.py
index 4be0351f..cc905716 100644
--- a/src/facet/simulation/viz/_draw.py
+++ b/src/facet/simulation/viz/_draw.py
@@ -2,7 +2,9 @@
 Visualizations of simulation results.
 """
 
-from typing import Any, Iterable, Optional, Sequence, Tuple, Type, TypeVar, Union
+from typing import Iterable, Optional, Type, TypeVar, Union
+
+import pandas as pd
 
 from pytools.api import AllTracker, inheritdoc
 from pytools.viz import Drawer
@@ -80,50 +82,53 @@ def get_style_classes(cls) -> Iterable[Type[SimulationStyle]]:
             SimulationReportStyle,
         ]
 
-    def _draw(self, data: UnivariateSimulationResult) -> None:
+    def _draw(self, result: UnivariateSimulationResult) -> None:
         # If the partitioning of the simulation is categorical, sort partitions in
         # ascending order of the median output
-        simulation_result: Tuple[
-            Sequence[float],
-            Sequence[float],
-            Sequence[float],
-            Sequence[Any],
-            Sequence[int],
-        ] = (
-            data.mean.to_list(),
-            data.lower_bound.to_list(),
-            data.upper_bound.to_list(),
-            data.partitioner.partitions_,
-            data.partitioner.frequencies_,
+        simulation_result: pd.DataFrame = result.data.assign(
+            frequencies=result.partitioner.frequencies_
         )
 
-        if data.partitioner.is_categorical:
-            # for categorical features, sort the categories by the median uplift
-            simulation_result = tuple(
-                *zip(*sorted(zip(*simulation_result), key=lambda x: x[0]))
+        if result.partitioner.is_categorical:
+            # for categorical features, sort the categories by mean predictions
+            simulation_result = simulation_result.sort_values(
+                by=UnivariateSimulationResult.COL_MEAN
             )
 
+        partitions = simulation_result.index.values
+        frequencies = simulation_result.frequencies.values
+
         # draw the graph with the uplift curves
         self.style.draw_uplift(
-            feature_name=data.feature_name,
-            output_name=data.output_name,
-            output_unit=data.output_unit,
-            outputs_median=simulation_result[0],
-            outputs_lower_bound=simulation_result[1],
-            outputs_upper_bound=simulation_result[2],
-            baseline=data.baseline,
-            confidence_level=data.confidence_level,
-            partitions=simulation_result[3],
-            frequencies=simulation_result[4],
-            is_categorical_feature=data.partitioner.is_categorical,
+            feature_name=result.feature_name,
+            output_name=result.output_name,
+            output_unit=result.output_unit,
+            outputs_median=(
+                simulation_result.loc[:, UnivariateSimulationResult.COL_MEAN].values
+            ),
+            outputs_lower_bound=(
+                simulation_result.loc[
+                    :, UnivariateSimulationResult.COL_LOWER_BOUND
+                ].values
+            ),
+            outputs_upper_bound=(
+                simulation_result.loc[
+                    :, UnivariateSimulationResult.COL_UPPER_BOUND
+                ].values
+            ),
+            baseline=result.baseline,
+            confidence_level=result.confidence_level,
+            partitions=partitions,
+            frequencies=frequencies,
+            is_categorical_feature=result.partitioner.is_categorical,
         )
 
         if self.histogram:
             # draw the histogram of the simulation values
             self.style.draw_histogram(
-                partitions=simulation_result[3],
-                frequencies=simulation_result[4],
-                is_categorical_feature=data.partitioner.is_categorical,
+                partitions=partitions,
+                frequencies=frequencies,
+                is_categorical_feature=result.partitioner.is_categorical,
             )
 
 
diff --git a/test/test/facet/test_simulation.py b/test/test/facet/test_simulation.py
index c32acd26..af4dc406 100644
--- a/test/test/facet/test_simulation.py
+++ b/test/test/facet/test_simulation.py
@@ -86,7 +86,7 @@ def test_univariate_target_simulation(
     )
 
     assert_series_equal(
-        simulation_result.lower_bound,
+        simulation_result.data.loc[:, UnivariateSimulationResult.COL_LOWER_BOUND],
         pd.Series(
             [24.98646, 24.98646, 21.15398, 20.23877, 20.23877, 20.23877],
             name=UnivariateSimulationResult.COL_LOWER_BOUND,
@@ -95,7 +95,7 @@ def test_univariate_target_simulation(
     )
 
     assert_series_equal(
-        simulation_result.mean,
+        simulation_result.data.loc[:, UnivariateSimulationResult.COL_MEAN],
         pd.Series(
             [25.4571, 25.4571, 21.67744, 20.81063, 20.81063, 20.81063],
             name=UnivariateSimulationResult.COL_MEAN,
@@ -104,7 +104,7 @@ def test_univariate_target_simulation(
     )
 
     assert_series_equal(
-        simulation_result.upper_bound,
+        simulation_result.data.loc[:, UnivariateSimulationResult.COL_UPPER_BOUND],
         pd.Series(
             [25.92774, 25.92774, 22.2009, 21.38249, 21.38249, 21.38249],
             name=UnivariateSimulationResult.COL_UPPER_BOUND,
@@ -147,7 +147,7 @@ def test_univariate_target_subsample_simulation_80(
     )
 
     assert_series_equal(
-        simulation_result.lower_bound,
+        simulation_result.data.loc[:, UnivariateSimulationResult.COL_LOWER_BOUND],
         pd.Series(
             [25.05676, 25.05676, 25.05676, 22.96243, 21.43395]
             + [21.21544, 20.76824, 20.49282, 20.49282],
@@ -157,7 +157,7 @@ def test_univariate_target_subsample_simulation_80(
     )
 
     assert_series_equal(
-        simulation_result.mean,
+        simulation_result.data.loc[:, UnivariateSimulationResult.COL_MEAN],
         pd.Series(
             [25.642227, 25.642227, 25.642227, 23.598706, 22.067057]
             + [21.864828, 21.451056, 21.195954, 21.195954],
@@ -167,7 +167,7 @@ def test_univariate_target_subsample_simulation_80(
     )
 
     assert_series_equal(
-        simulation_result.upper_bound,
+        simulation_result.data.loc[:, UnivariateSimulationResult.COL_UPPER_BOUND],
         pd.Series(
             [26.22769, 26.22769, 26.22769, 24.23498, 22.70016]
             + [22.51422, 22.13387, 21.89909, 21.89909],
@@ -211,7 +211,9 @@ def test_univariate_uplift_subsample_simulation_95(
     )
 
     assert_series_equal(
-        simulation_result.lower_bound.round(6),
+        simulation_result.data.loc[:, UnivariateSimulationResult.COL_LOWER_BOUND].round(
+            6
+        ),
         pd.Series(
             [1.800835, 1.800835, 1.800835, -0.320393, -1.847194]
             + [-2.074327, -2.539217, -2.825394, -2.825394],
@@ -221,7 +223,7 @@ def test_univariate_uplift_subsample_simulation_95(
     )
 
     assert_series_equal(
-        simulation_result.mean.round(6),
+        simulation_result.data.loc[:, UnivariateSimulationResult.COL_MEAN].round(6),
         pd.Series(
             [2.696227, 2.696227, 2.696227, 0.652706, -0.878943]
             + [-1.081172, -1.494944, -1.750046, -1.750046],
@@ -231,7 +233,9 @@ def test_univariate_uplift_subsample_simulation_95(
     )
 
     assert_series_equal(
-        simulation_result.upper_bound.round(6),
+        simulation_result.data.loc[:, UnivariateSimulationResult.COL_UPPER_BOUND].round(
+            6
+        ),
         pd.Series(
             [3.59162, 3.59162, 3.59162, 1.625805, 0.089307]
             + [-0.088017, -0.450671, -0.674698, -0.674698],
@@ -271,7 +275,7 @@ def test_univariate_uplift_simulation(
     )
 
     assert_series_equal(
-        simulation_result.lower_bound,
+        simulation_result.data.loc[:, UnivariateSimulationResult.COL_LOWER_BOUND],
         pd.Series(
             [2.677461, 2.677461, -1.155017, -2.070234, -2.070234, -2.070234],
             name=UnivariateSimulationResult.COL_LOWER_BOUND,
@@ -280,7 +284,7 @@ def test_univariate_uplift_simulation(
     )
 
     assert_series_equal(
-        simulation_result.mean,
+        simulation_result.data.loc[:, UnivariateSimulationResult.COL_MEAN],
         pd.Series(
             [3.148100, 3.148100, -0.631560, -1.498371, -1.498371, -1.498371],
             name=UnivariateSimulationResult.COL_MEAN,
@@ -289,7 +293,7 @@ def test_univariate_uplift_simulation(
     )
 
     assert_series_equal(
-        simulation_result.upper_bound,
+        simulation_result.data.loc[:, UnivariateSimulationResult.COL_UPPER_BOUND],
         pd.Series(
             [3.618739, 3.618739, -0.108103, -0.926508, -0.926508, -0.926508],
             name=UnivariateSimulationResult.COL_UPPER_BOUND,
@@ -331,7 +335,7 @@ def test_univariate_uplift_subsample_simulation(
     )
 
     assert_series_equal(
-        simulation_result.lower_bound,
+        simulation_result.data.loc[:, UnivariateSimulationResult.COL_LOWER_BOUND],
         pd.Series(
             [2.110762, 2.110762, 2.110762, 0.0164306, -1.512048]
             + [-1.730561, -2.177757, -2.453179, -2.453179],
@@ -341,7 +345,7 @@ def test_univariate_uplift_subsample_simulation(
     )
 
     assert_series_equal(
-        simulation_result.mean,
+        simulation_result.data.loc[:, UnivariateSimulationResult.COL_MEAN],
         pd.Series(
             [2.696227, 2.696227, 2.696227, 0.652706, -0.878943]
             + [-1.081172, -1.494944, -1.750046, -1.750046],
@@ -351,7 +355,7 @@ def test_univariate_uplift_subsample_simulation(
     )
 
     assert_series_equal(
-        simulation_result.upper_bound,
+        simulation_result.data.loc[:, UnivariateSimulationResult.COL_UPPER_BOUND],
         pd.Series(
             [3.281693, 3.281693, 3.281693, 1.288981, -0.245838]
             + [-0.431783, -0.81213, -1.046914, -1.046914],
@@ -396,7 +400,7 @@ def test_univariate_probability_simulation(
     assert simulation_result.baseline == approx(0.5)
 
     assert_series_equal(
-        simulation_result.lower_bound,
+        simulation_result.data.loc[:, UnivariateSimulationResult.COL_LOWER_BOUND],
         pd.Series(
             [0.415337, 0.390766, 0.401039, 0.420727, 0.425914, 0.452885, 0.452885],
             name=UnivariateSimulationResult.COL_LOWER_BOUND,
@@ -405,7 +409,7 @@ def test_univariate_probability_simulation(
     )
 
     assert_series_equal(
-        simulation_result.mean,
+        simulation_result.data.loc[:, UnivariateSimulationResult.COL_MEAN],
         pd.Series(
             [0.495814, 0.475288, 0.48689, 0.507294, 0.510055, 0.533888, 0.533888],
             name=UnivariateSimulationResult.COL_MEAN,
@@ -414,7 +418,7 @@ def test_univariate_probability_simulation(
     )
 
     assert_series_equal(
-        simulation_result.upper_bound,
+        simulation_result.data.loc[:, UnivariateSimulationResult.COL_UPPER_BOUND],
         pd.Series(
             [0.576292, 0.559809, 0.57274, 0.593862, 0.594196, 0.614892, 0.614892],
             name=UnivariateSimulationResult.COL_UPPER_BOUND,

From c34dce846e9ce4ce4f308e560d0bdc22f1461151 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Thu, 20 Jan 2022 10:14:39 +0100
Subject: [PATCH 048/106] API: refer to simulation results as mean simulated
 outputs, not median

---
 src/facet/simulation/viz/_draw.py  |  4 ++--
 src/facet/simulation/viz/_style.py | 27 ++++++++++++++-------------
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/src/facet/simulation/viz/_draw.py b/src/facet/simulation/viz/_draw.py
index cc905716..67cf5130 100644
--- a/src/facet/simulation/viz/_draw.py
+++ b/src/facet/simulation/viz/_draw.py
@@ -84,7 +84,7 @@ def get_style_classes(cls) -> Iterable[Type[SimulationStyle]]:
 
     def _draw(self, result: UnivariateSimulationResult) -> None:
         # If the partitioning of the simulation is categorical, sort partitions in
-        # ascending order of the median output
+        # ascending order of the mean output
         simulation_result: pd.DataFrame = result.data.assign(
             frequencies=result.partitioner.frequencies_
         )
@@ -103,7 +103,7 @@ def _draw(self, result: UnivariateSimulationResult) -> None:
             feature_name=result.feature_name,
             output_name=result.output_name,
             output_unit=result.output_unit,
-            outputs_median=(
+            outputs_mean=(
                 simulation_result.loc[:, UnivariateSimulationResult.COL_MEAN].values
             ),
             outputs_lower_bound=(
diff --git a/src/facet/simulation/viz/_style.py b/src/facet/simulation/viz/_style.py
index 048a3617..2a42795c 100644
--- a/src/facet/simulation/viz/_style.py
+++ b/src/facet/simulation/viz/_style.py
@@ -49,7 +49,7 @@ def draw_uplift(
         feature_name: str,
         output_name: str,
         output_unit: str,
-        outputs_median: Sequence[float],
+        outputs_mean: Sequence[float],
         outputs_lower_bound: Sequence[float],
         outputs_upper_bound: Sequence[float],
         baseline: float,
@@ -59,12 +59,13 @@ def draw_uplift(
         is_categorical_feature: bool,
     ) -> None:
         """
-        Draw the graph with the uplift curves: median, low and high percentiles.
+        Draw the simulation results as the mean simulated outputs with their
+        confidence intervals.
 
         :param feature_name: name of the simulated feature
         :param output_name: name of the target for which output values were simulated
         :param output_unit: the unit of the output axis
-        :param outputs_median: the medians of the simulated outputs
+        :param outputs_mean: the mean simulated outputs
         :param outputs_lower_bound: the lower CI bounds of the simulated outputs
         :param outputs_upper_bound: the upper CI bounds of the simulated outputs
         :param baseline: the baseline of the simulation
@@ -95,12 +96,12 @@ def draw_histogram(
 
     @staticmethod
     def _legend(confidence_level: float) -> Tuple[str, ...]:
-        # generate a triple with legend names for the min percentile, median, and max
+        # generate a triple with legend names for the min percentile, mean, and max
         # percentile
         tail_percentile = (100.0 - confidence_level * 100.0) / 2
         return (
             f"{tail_percentile}th percentile",
-            "Median",
+            "Mean",
             f"{100.0 - tail_percentile}th percentile",
             "Baseline",
         )
@@ -111,7 +112,7 @@ class SimulationMatplotStyle(MatplotStyle, SimulationStyle):
     """
     `matplotlib` style for simulation chart.
 
-    Along the range of simulated feature values on the x axis, plots the median and
+    Along the range of simulated feature values on the x axis, plots the mean and
     confidence intervals of the simulated target value.
 
     A bar chart below the plot shows a histogram of actually observed values near the
@@ -127,7 +128,7 @@ def draw_uplift(
         feature_name: str,
         output_name: str,
         output_unit: str,
-        outputs_median: Sequence[float],
+        outputs_mean: Sequence[float],
         outputs_lower_bound: Sequence[float],
         outputs_upper_bound: Sequence[float],
         baseline: float,
@@ -138,7 +139,7 @@ def draw_uplift(
     ) -> None:
         """[see superclass]"""
 
-        # draw the mean predicted uplift, showing median and confidence ranges for
+        # draw the mean predicted uplift, showing mean and confidence ranges for
         # each prediction
         if is_categorical_feature:
             x = range(len(partitions))
@@ -149,9 +150,9 @@ def draw_uplift(
         ax = self.ax
         colors = self.colors
 
-        # plot the confidence bounds and the median
+        # plot the confidence bounds and the mean
         (line_min,) = ax.plot(x, outputs_lower_bound, color=colors.accent_3)
-        (line_median,) = ax.plot(x, outputs_median, color=colors.accent_2)
+        (line_mean,) = ax.plot(x, outputs_mean, color=colors.accent_2)
         (line_max,) = ax.plot(x, outputs_upper_bound, color=colors.accent_3)
 
         # add a horizontal line at the baseline
@@ -159,7 +160,7 @@ def draw_uplift(
 
         # add a legend
         labels = self._legend(confidence_level=confidence_level)
-        handles = (line_max, line_median, line_min, line_base)
+        handles = (line_max, line_mean, line_min, line_base)
         ax.legend(handles, labels)
 
         # label the y axis
@@ -317,7 +318,7 @@ def draw_uplift(
         feature_name: str,
         output_name: str,
         output_unit: str,
-        outputs_median: Sequence[float],
+        outputs_mean: Sequence[float],
         outputs_lower_bound: Sequence[float],
         outputs_upper_bound: Sequence[float],
         baseline: float,
@@ -344,7 +345,7 @@ def draw_uplift(
                     zip(
                         partitions,
                         outputs_lower_bound,
-                        outputs_median,
+                        outputs_mean,
                         outputs_upper_bound,
                     )
                 ),

From 641d30aab2686c635d9e90f96cbc9e983ad69cb1 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Thu, 20 Jan 2022 12:12:42 +0100
Subject: [PATCH 049/106] DEV: get shap module from conda instead of pypi to
 avoid numpy conflict

---
 environment.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/environment.yml b/environment.yml
index 1fb510f8..b92491fd 100644
--- a/environment.yml
+++ b/environment.yml
@@ -14,6 +14,7 @@ dependencies:
   - python ~= 3.8
   - scikit-learn ~= 0.24.2
   - scipy ~= 1.5
+  - shap >=0.34,<0.40
   - sklearndf >= 2dev0, < 3a
   # build/test
   - black = 20.8b1
@@ -44,5 +45,3 @@ dependencies:
   - tableone ~= 0.7
   # pip
   - pip >= 20
-  - pip:
-      - shap >=0.34,<0.40

From e07d3716717126577998f348985e26dafaa52c0f Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Thu, 20 Jan 2022 15:24:22 +0100
Subject: [PATCH 050/106] FIX: Set return signature of ShapProjector._calculate
 to None

---
 src/facet/inspection/_shap_projection.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/facet/inspection/_shap_projection.py b/src/facet/inspection/_shap_projection.py
index b75c6eec..50e6b757 100644
--- a/src/facet/inspection/_shap_projection.py
+++ b/src/facet/inspection/_shap_projection.py
@@ -88,7 +88,7 @@ def _get_context(self, shap_calculator: ShapCalculator) -> ShapContext:
         pass
 
     @abstractmethod
-    def _calculate(self, context: ShapContext) -> AffinityMatrix:
+    def _calculate(self, context: ShapContext) -> None:
         pass
 
     @staticmethod

From 5bebc103c74749f660a917ca16a6ae8df4e9792b Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Thu, 20 Jan 2022 23:36:47 +0100
Subject: [PATCH 051/106] FIX: replace iris_classifier_model_binary with
 iris_classifier_binary

---
 test/test/facet/test_simulation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test/facet/test_simulation.py b/test/test/facet/test_simulation.py
index af4dc406..b9674fba 100644
--- a/test/test/facet/test_simulation.py
+++ b/test/test/facet/test_simulation.py
@@ -372,7 +372,7 @@ def test_univariate_uplift_subsample_simulation(
 
 
 def test_univariate_probability_simulation(
-    iris_classifier_model_binary: ClassifierPipelineDF[RandomForestClassifierDF],
+    iris_classifier_binary: ClassifierPipelineDF[RandomForestClassifierDF],
     iris_sample_binary: Sample,
     n_jobs: int,
 ) -> None:
@@ -382,7 +382,7 @@ def test_univariate_probability_simulation(
     print(iris_sample_binary.feature_names)
 
     proba_simulator = UnivariateProbabilitySimulator(
-        model=iris_classifier_model_binary,
+        model=iris_classifier_binary,
         sample=iris_sample_binary,
         confidence_level=0.95,
         n_jobs=n_jobs,

From fd9408f1970983acbc2f3627122dfd7d17a33a06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= <mat646@gmail.com>
Date: Fri, 21 Jan 2022 10:39:15 +0100
Subject: [PATCH 052/106] API: remove previous LearnerRanker

---
 src/facet/selection/_selection.py  | 327 +----------------------------
 test/test/conftest.py              | 160 +++++---------
 test/test/facet/test_crossfit.py   |  94 ++++-----
 test/test/facet/test_inspection.py |  56 ++---
 test/test/facet/test_selection.py  | 143 ++++++-------
 5 files changed, 193 insertions(+), 587 deletions(-)

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index 989bd265..6f591bd2 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -4,7 +4,6 @@
 import inspect
 import itertools
 import logging
-import math
 import operator
 import re
 from functools import reduce
@@ -25,7 +24,6 @@
     Type,
     TypeVar,
     Union,
-    cast,
 )
 
 import numpy as np
@@ -33,22 +31,21 @@
 from numpy.random.mtrand import RandomState
 from sklearn.model_selection import BaseCrossValidator, GridSearchCV
 
-from pytools.api import AllTracker, deprecated, inheritdoc, to_tuple
+from pytools.api import AllTracker, deprecated, inheritdoc
 from pytools.fit import FittableMixin
-from pytools.parallelization import JobRunner, ParallelizableMixin
+from pytools.parallelization import ParallelizableMixin
 from sklearndf.pipeline import (
     ClassifierPipelineDF,
     LearnerPipelineDF,
     RegressorPipelineDF,
 )
 
-from facet.crossfit import LearnerCrossfit
 from facet.data import Sample
 from facet.selection.base import BaseParameterSpace
 
 log = logging.getLogger(__name__)
 
-__all__ = ["LearnerGrid", "LearnerEvaluation", "LearnerRanker", "LearnerRanker2"]
+__all__ = ["LearnerGrid", "LearnerEvaluation", "LearnerRanker2"]
 
 #
 # Type constants
@@ -238,7 +235,9 @@ def fit(
         :param fit_params: any fit parameters to pass on to the learner's fit method
         :return: ``self``
         """
-        self: LearnerRanker2[T_LearnerPipelineDF]  # support type hinting in PyCharm
+        self: LearnerRanker2[
+            T_LearnerPipelineDF, T_SearchCV
+        ]  # support type hinting in PyCharm
 
         self._reset_fit()
 
@@ -519,320 +518,6 @@ def __init__(
         self.ranking_score = ranking_score
 
 
-@inheritdoc(match="[see superclass]")
-class LearnerRanker(
-    ParallelizableMixin, FittableMixin[Sample], Generic[T_LearnerPipelineDF]
-):
-    """
-    Score and rank different parametrizations of one or more learners,
-    using cross-validation.
-
-    The learner ranker can run a simultaneous grid search across multiple alternative
-    learner pipelines, supporting the ability to simultaneously select a learner
-    algorithm and optimize hyper-parameters.
-    """
-
-    @deprecated(message=f"use class {LearnerRanker2.__name__} instead")
-    def __init__(
-        self,
-        grids: Union[
-            LearnerGrid[T_LearnerPipelineDF], Iterable[LearnerGrid[T_LearnerPipelineDF]]
-        ],
-        cv: Optional[BaseCrossValidator],
-        scoring: Union[str, Callable[[float, float], float], None] = None,
-        ranking_scorer: Callable[[np.ndarray], float] = None,
-        random_state: Union[int, RandomState, None] = None,
-        n_jobs: Optional[int] = None,
-        shared_memory: Optional[bool] = None,
-        pre_dispatch: Optional[Union[str, int]] = None,
-        verbose: Optional[int] = None,
-    ) -> None:
-        """
-        :param grids: learner grids to be ranked
-            (either a single grid, or an iterable of multiple grids)
-        :param cv: a cross validator (e.g.,
-            :class:`.BootstrapCV`)
-        :param scoring: a scoring function (by name, or as a callable) for evaluating
-            learners (optional; use learner's default scorer if not specified here).
-            If passing a callable, the ``"score"`` will be used as the name of the
-            scoring function unless the callable defines a ``__name__`` attribute
-        :param ranking_scorer: a function to calculate a scalar score for every
-            crossfit and returning a float.
-            The resulting score is used to rank all crossfits (highest score is best).
-            Defaults to :meth:`.default_ranking_scorer`, calculating
-            `mean(scores) - 2 * std(scores, ddof=1)`
-        :param random_state: optional random seed or random state for shuffling the
-            feature column order
-        """
-        super().__init__(
-            n_jobs=n_jobs,
-            shared_memory=shared_memory,
-            pre_dispatch=pre_dispatch,
-            verbose=verbose,
-        )
-
-        if scoring is not None and not (isinstance(scoring, str) or callable(scoring)):
-            raise TypeError(
-                "only a single scoring function is currently supported, "
-                f"but a {type(scoring).__name__} was given as arg scoring"
-            )
-
-        grids_tuple: Tuple[LearnerGrid, ...] = to_tuple(
-            grids, element_type=LearnerGrid, arg_name="grids"
-        )
-        if len(grids_tuple) == 0:
-            raise ValueError("arg grids must specify at least one LearnerGrid")
-        learner_type = _learner_type(grids_tuple[0].pipeline)
-        if not all(isinstance(grid.pipeline, learner_type) for grid in grids_tuple[1:]):
-            raise ValueError("arg grids mixes regressor and classifier pipelines")
-
-        self.grids = grids_tuple
-        self.cv = cv
-        self.scoring = scoring
-        self.ranking_scorer = (
-            LearnerRanker.default_ranking_scorer
-            if ranking_scorer is None
-            else ranking_scorer
-        )
-        self.random_state = random_state
-
-        # initialise state
-        self._ranking: Optional[List[LearnerEvaluation]] = None
-        self._best_model: Optional[T_LearnerPipelineDF] = None
-
-    # add parameter documentation of ParallelizableMixin
-    __init__.__doc__ += ParallelizableMixin.__init__.__doc__
-
-    @property
-    def is_fitted(self) -> bool:
-        """[see superclass]"""
-        return self._ranking is not None
-
-    @property
-    def scoring_name(self) -> str:
-        """
-        The name of the scoring function used to rank the learners.
-        """
-        scoring = self.scoring
-        if isinstance(scoring, str):
-            return scoring
-        elif callable(scoring):
-            try:
-                return scoring.__name__
-            except AttributeError:
-                return "score"
-        else:
-            learner_type = _learner_type(self.grids[0].pipeline)
-            if learner_type is RegressorPipelineDF:
-                return "r2_score"
-            elif learner_type is ClassifierPipelineDF:
-                return "accuracy_score"
-            else:
-                # default case - we should not end up here but adding this for forward
-                # compatibility
-                return "score"
-
-    @property
-    def ranking_(self) -> List[LearnerEvaluation[T_LearnerPipelineDF]]:
-        """
-        A list of :class:`.LearnerEvaluation` for all learners evaluated
-        by this ranker, in descending order of the ranking score.
-        """
-        self._ensure_fitted()
-        return self._ranking
-
-    @property
-    def best_model_(self) -> T_LearnerPipelineDF:
-        """
-        The pipeline which obtained the best ranking score, fitted on the entire sample.
-        """
-        self._ensure_fitted()
-        return self._best_model
-
-    @property
-    def best_model_crossfit_(self) -> LearnerCrossfit[T_LearnerPipelineDF]:
-        """
-        The crossfit which obtained the best ranking score.
-        """
-        self._ensure_fitted()
-        return self._best_crossfit
-
-    @staticmethod
-    def default_ranking_scorer(scores: np.ndarray) -> float:
-        """
-        The default function used to rank pipelines.
-
-        Calculates `mean(scores) - 2 * std(scores, ddof=1)`, i.e., ranks pipelines by a
-        (pessimistic) lower bound of the expected score.
-
-        :param scores: the scores for all crossfits
-        :return: scalar score for ranking the pipeline
-        """
-        return scores.mean() - 2 * scores.std(ddof=1)
-
-    def fit(self: T_Self, sample: Sample, **fit_params: Any) -> T_Self:
-        """
-        Rank the candidate learners and their hyper-parameter combinations using
-        crossfits from the given sample.
-
-        Other than the scikit-learn implementation of grid search, arbitrary parameters
-        can be passed on to the learner pipeline(s) to be fitted.
-
-        :param sample: the sample from which to fit the crossfits
-        :param fit_params: any fit parameters to pass on to the learner's fit method
-        :return: ``self``
-        """
-        self: LearnerRanker[T_LearnerPipelineDF]  # support type hinting in PyCharm
-
-        ranking: List[LearnerEvaluation[T_LearnerPipelineDF]] = self._rank_learners(
-            sample=sample, **fit_params
-        )
-        ranking.sort(key=lambda le: le.ranking_score, reverse=True)
-
-        self._ranking = ranking
-        self._best_model = self._ranking[0].pipeline.fit(
-            X=sample.features, y=sample.target
-        )
-
-        return self
-
-    def summary_report(self) -> pd.DataFrame:
-        """
-        Create a summary table of the scores achieved by all learners in the grid
-        search, sorted by ranking score in descending order.
-
-        :return: the summary report of the grid search as a data frame
-        """
-
-        self._ensure_fitted()
-
-        # define the columns of the resulting data frame
-
-        col_ranking_score = "ranking_score"
-        scoring_name = self.scoring_name
-        col_scores_mean = f"{scoring_name}__mean"
-        col_scores_std = f"{scoring_name}__std"
-        col_learner_type = f"{self.grids[0].pipeline.final_estimator_name}__type"
-
-        parameters: List[str] = []
-        for grid in self.grids:
-            # noinspection PyTypeChecker
-            parameters.extend(grid.parameters.keys() - parameters)
-
-        columns = [
-            col_ranking_score,
-            col_scores_mean,
-            col_scores_std,
-            col_learner_type,
-            *parameters,
-        ]
-
-        # build the report
-
-        report = pd.DataFrame.from_records(
-            [
-                {
-                    col_ranking_score: evaluation.ranking_score,
-                    col_scores_mean: evaluation.scores.mean(),
-                    col_scores_std: evaluation.scores.std(ddof=1),
-                    col_learner_type: type(
-                        evaluation.pipeline.final_estimator
-                    ).__name__,
-                    **evaluation.parameters,
-                }
-                for evaluation in (
-                    sorted(
-                        self._ranking,
-                        key=lambda evaluation: evaluation.ranking_score,
-                        reverse=True,
-                    )
-                )
-            ],
-            columns=columns,
-        ).rename_axis(index="rank")
-
-        # split column headers containing one or more "__",
-        # resulting in a column MultiIndex
-
-        report.columns = report.columns.str.split("__", expand=True).map(
-            lambda column: tuple(level if pd.notna(level) else "" for level in column)
-        )
-
-        return report
-
-    def _rank_learners(
-        self, sample: Sample, **fit_params
-    ) -> List[LearnerEvaluation[T_LearnerPipelineDF]]:
-        ranking_scorer = self.ranking_scorer
-
-        pipelines: Iterable[T_LearnerPipelineDF]
-        pipelines_parameters: Iterable[Dict[str, Any]]
-        pipelines, pipelines_parameters = zip(
-            *(
-                (
-                    cast(T_LearnerPipelineDF, grid.pipeline.clone()).set_params(
-                        **parameters
-                    ),
-                    parameters,
-                )
-                for grid in self.grids
-                for parameters in grid
-            )
-        )
-
-        ranking: List[LearnerEvaluation[T_LearnerPipelineDF]] = []
-        best_score: float = -math.inf
-        best_crossfit: Optional[LearnerCrossfit[T_LearnerPipelineDF]] = None
-
-        scoring_name = self.scoring_name
-
-        crossfits = [
-            LearnerCrossfit(
-                pipeline=pipeline,
-                cv=self.cv,
-                random_state=self.random_state,
-                n_jobs=self.n_jobs,
-                shared_memory=self.shared_memory,
-                pre_dispatch=self.pre_dispatch,
-                verbose=self.verbose,
-            )
-            for pipeline in pipelines
-        ]
-
-        queues = (
-            crossfit.fit_score_queue(sample=sample, scoring=self.scoring, **fit_params)
-            for crossfit in crossfits
-        )
-
-        pipeline_scorings: List[np.ndarray] = list(
-            JobRunner.from_parallelizable(self).run_queues(queues)
-        )
-
-        for crossfit, pipeline_parameters, pipeline_scoring in zip(
-            crossfits, pipelines_parameters, pipeline_scorings
-        ):
-
-            ranking_score = ranking_scorer(pipeline_scoring)
-            crossfit_pipeline = crossfit.pipeline
-            assert crossfit_pipeline.is_fitted
-            ranking.append(
-                LearnerEvaluation(
-                    pipeline=crossfit_pipeline,
-                    parameters=pipeline_parameters,
-                    scoring_name=scoring_name,
-                    scores=pipeline_scoring,
-                    ranking_score=ranking_score,
-                )
-            )
-
-            if ranking_score > best_score:
-                best_score = ranking_score
-                best_crossfit = crossfit
-
-        self._best_crossfit = best_crossfit
-        return ranking
-
-
 def _learner_type(pipeline: T_LearnerPipelineDF) -> Type[T_LearnerPipelineDF]:
     # determine whether a learner pipeline fits a regressor or a classifier
     for learner_type in [RegressorPipelineDF, ClassifierPipelineDF]:
diff --git a/test/test/conftest.py b/test/test/conftest.py
index 07233307..c0b1f0eb 100644
--- a/test/test/conftest.py
+++ b/test/test/conftest.py
@@ -5,7 +5,7 @@
 import pandas as pd
 import pytest
 from sklearn import datasets
-from sklearn.model_selection import BaseCrossValidator, KFold
+from sklearn.model_selection import BaseCrossValidator, GridSearchCV, KFold
 from sklearn.utils import Bunch
 
 from sklearndf import TransformerDF
@@ -29,13 +29,7 @@
 import facet
 from facet.data import Sample
 from facet.inspection import LearnerInspector, TreeExplainerFactory
-from facet.selection import (
-    LearnerEvaluation,
-    LearnerGrid,
-    LearnerRanker,
-    MultiRegressorParameterSpace,
-    ParameterSpace,
-)
+from facet.selection import LearnerRanker2, MultiRegressorParameterSpace, ParameterSpace
 from facet.validation import BootstrapCV, StratifiedBootstrapCV
 
 logging.basicConfig(level=logging.DEBUG)
@@ -91,65 +85,6 @@ def cv_stratified_bootstrap() -> BaseCrossValidator:
     return StratifiedBootstrapCV(n_splits=N_BOOTSTRAPS, random_state=42)
 
 
-@pytest.fixture
-def regressor_grids(simple_preprocessor: TransformerDF) -> List[LearnerGrid]:
-    random_state = {"random_state": [42]}
-
-    return [
-        LearnerGrid(
-            pipeline=RegressorPipelineDF(
-                preprocessing=simple_preprocessor, regressor=LGBMRegressorDF()
-            ),
-            learner_parameters={
-                "max_depth": [5, 10],
-                "min_split_gain": [0.1, 0.2],
-                "num_leaves": [50, 100, 200],
-                **random_state,
-            },
-        ),
-        LearnerGrid(
-            pipeline=RegressorPipelineDF(
-                preprocessing=simple_preprocessor, regressor=AdaBoostRegressorDF()
-            ),
-            learner_parameters={"n_estimators": [50, 80], **random_state},
-        ),
-        LearnerGrid(
-            pipeline=RegressorPipelineDF(
-                preprocessing=simple_preprocessor, regressor=RandomForestRegressorDF()
-            ),
-            learner_parameters={"n_estimators": [50, 80], **random_state},
-        ),
-        LearnerGrid(
-            pipeline=RegressorPipelineDF(
-                preprocessing=simple_preprocessor, regressor=DecisionTreeRegressorDF()
-            ),
-            learner_parameters={
-                "max_depth": [0.5, 1.0],
-                "max_features": [0.5, 1.0],
-                **random_state,
-            },
-        ),
-        LearnerGrid(
-            pipeline=RegressorPipelineDF(
-                preprocessing=simple_preprocessor, regressor=ExtraTreeRegressorDF()
-            ),
-            learner_parameters={"max_depth": [5, 10, 12], **random_state},
-        ),
-        LearnerGrid(
-            pipeline=RegressorPipelineDF(
-                preprocessing=simple_preprocessor, regressor=SVRDF()
-            ),
-            learner_parameters={"gamma": [0.5, 1], "C": [50, 100]},
-        ),
-        LearnerGrid(
-            pipeline=RegressorPipelineDF(
-                preprocessing=simple_preprocessor, regressor=LinearRegressionDF()
-            ),
-            learner_parameters={"normalize": [False, True]},
-        ),
-    ]
-
-
 @pytest.fixture
 def regressor_parameters(
     simple_preprocessor: TransformerDF,
@@ -226,28 +161,32 @@ def regressor_parameters(
 @pytest.fixture
 def regressor_ranker(
     cv_kfold: KFold,
-    regressor_grids: List[LearnerGrid[RegressorPipelineDF]],
+    regressor_parameters: MultiRegressorParameterSpace,
     sample: Sample,
     n_jobs: int,
-) -> LearnerRanker[RegressorPipelineDF]:
-    return LearnerRanker(
-        grids=regressor_grids, cv=cv_kfold, scoring="r2", n_jobs=n_jobs
+) -> LearnerRanker2[RegressorPipelineDF, GridSearchCV]:
+    return LearnerRanker2(
+        searcher_factory=GridSearchCV,
+        parameter_space=regressor_parameters,
+        cv=cv_kfold,
+        scoring="r2",
+        n_jobs=n_jobs,
     ).fit(sample=sample)
 
 
 @pytest.fixture
 def best_lgbm_model(
-    regressor_ranker: LearnerRanker[RegressorPipelineDF],
+    regressor_ranker: LearnerRanker2[RegressorPipelineDF, GridSearchCV],
+    sample: Sample,
 ) -> RegressorPipelineDF:
     # we get the best model_evaluation which is a LGBM - for the sake of test
     # performance
-    best_lgbm_evaluation: LearnerEvaluation[RegressorPipelineDF] = [
-        evaluation
-        for evaluation in regressor_ranker.ranking_
-        if isinstance(evaluation.pipeline.regressor, LGBMRegressorDF)
+    candidates = regressor_ranker.summary_report()["param_candidate"].iloc[:, 0]
+    best_lgbm_model = candidates[
+        candidates.apply(lambda x: isinstance(x.regressor, LGBMRegressorDF))
     ][0]
 
-    return best_lgbm_evaluation.pipeline
+    return best_lgbm_model.fit(X=sample.features, y=sample.target)
 
 
 @pytest.fixture
@@ -374,42 +313,35 @@ def iris_sample_binary_dual_target(
 
 
 def check_ranking(
-    ranking: List[LearnerEvaluation],
+    ranking: pd.DataFrame,
+    is_classifier: bool,
     expected_scores: Sequence[float],
-    expected_learners: Optional[Sequence[type]],
     expected_parameters: Optional[Mapping[int, Mapping[str, Any]]],
 ) -> None:
     """
     Test helper to check rankings produced by learner rankers
 
-    :param ranking: a list of LearnerEvaluations
+    :param ranking: summary data frame
+    :param is_classifier: flag if ranking was performed on classifiers, or regressors
     :param expected_scores: expected ranking scores, rounded to 3 decimal places
-    :param expected_learners: expected learner classes
     :param expected_parameters: expected learner parameters
     :return: None
     """
 
-    if expected_learners is None:
-        expected_learners = [None] * len(ranking)
+    SCORE_COLUMN = "mean_test_score"
 
-    for rank, (learner_eval, score_expected, learner_expected) in enumerate(
-        zip(ranking, expected_scores, expected_learners)
-    ):
-        score_actual = round(learner_eval.ranking_score, 3)
+    for rank, score_expected in enumerate(expected_scores):
+        score_actual = round(ranking[SCORE_COLUMN].iloc[rank], 3)
         assert score_actual == pytest.approx(score_expected, abs=0.1), (
             f"unexpected score for learner at rank #{rank + 1}: "
             f"got {score_actual} but expected {score_expected}"
         )
-        if learner_expected is not None:
-            learner_actual = learner_eval.pipeline.final_estimator
-            assert type(learner_actual) == learner_expected, (
-                f"unexpected class for learner at rank #{rank}: "
-                f"got {type(learner_actual)} but expected {learner_expected}"
-            )
+
+    param_column = "param_classifier" if is_classifier else "param_regressor"
 
     if expected_parameters is not None:
         for rank, parameters_expected in expected_parameters.items():
-            parameters_actual = ranking[rank].parameters
+            parameters_actual = ranking[param_column].iloc[rank].to_dict()
             assert parameters_actual == parameters_expected, (
                 f"unexpected parameters for learner at rank #{rank}: "
                 f"got {parameters_actual} but expected {parameters_expected}"
@@ -421,7 +353,7 @@ def iris_classifier_ranker_binary(
     iris_sample_binary: Sample,
     cv_stratified_bootstrap: StratifiedBootstrapCV,
     n_jobs: int,
-) -> LearnerRanker[ClassifierPipelineDF[RandomForestClassifierDF]]:
+) -> LearnerRanker2[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
     return fit_classifier_ranker(
         sample=iris_sample_binary, cv=cv_stratified_bootstrap, n_jobs=n_jobs
     )
@@ -432,7 +364,7 @@ def iris_classifier_ranker_multi_class(
     iris_sample_multi_class: Sample,
     cv_stratified_bootstrap: StratifiedBootstrapCV,
     n_jobs: int,
-) -> LearnerRanker[ClassifierPipelineDF[RandomForestClassifierDF]]:
+) -> LearnerRanker2[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
     return fit_classifier_ranker(
         sample=iris_sample_multi_class, cv=cv_stratified_bootstrap, n_jobs=n_jobs
     )
@@ -441,7 +373,7 @@ def iris_classifier_ranker_multi_class(
 @pytest.fixture
 def iris_classifier_ranker_dual_target(
     iris_sample_binary_dual_target: Sample, cv_bootstrap: BootstrapCV, n_jobs: int
-) -> LearnerRanker[ClassifierPipelineDF[RandomForestClassifierDF]]:
+) -> LearnerRanker2[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
     return fit_classifier_ranker(
         sample=iris_sample_binary_dual_target, cv=cv_bootstrap, n_jobs=n_jobs
     )
@@ -449,16 +381,18 @@ def iris_classifier_ranker_dual_target(
 
 @pytest.fixture
 def iris_classifier_binary(
-    iris_classifier_ranker_binary: LearnerRanker[ClassifierPipelineDF],
+    iris_classifier_ranker_binary: LearnerRanker2[ClassifierPipelineDF, GridSearchCV],
 ) -> ClassifierPipelineDF[RandomForestClassifierDF]:
-    return iris_classifier_ranker_binary.best_model_
+    return iris_classifier_ranker_binary.best_estimator_
 
 
 @pytest.fixture
 def iris_classifier_multi_class(
-    iris_classifier_ranker_multi_class: LearnerRanker[ClassifierPipelineDF],
+    iris_classifier_ranker_multi_class: LearnerRanker2[
+        ClassifierPipelineDF, GridSearchCV
+    ],
 ) -> ClassifierPipelineDF[RandomForestClassifierDF]:
-    return iris_classifier_ranker_multi_class.best_model_
+    return iris_classifier_ranker_multi_class.best_estimator_
 
 
 @pytest.fixture
@@ -479,23 +413,23 @@ def iris_inspector_multi_class(
 
 def fit_classifier_ranker(
     sample: Sample, cv: BaseCrossValidator, n_jobs: int
-) -> LearnerRanker[ClassifierPipelineDF[RandomForestClassifierDF]]:
-    # define the parameter grid
-    grids = [
-        LearnerGrid(
-            pipeline=ClassifierPipelineDF(
-                classifier=RandomForestClassifierDF(random_state=42), preprocessing=None
-            ),
-            learner_parameters={"n_estimators": [10, 50], "min_samples_leaf": [4, 8]},
+) -> LearnerRanker2[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
+    # define the parameter space
+    parameter_space = ParameterSpace(
+        ClassifierPipelineDF(
+            classifier=RandomForestClassifierDF(random_state=42),
+            preprocessing=None,
         )
-    ]
+    )
+    parameter_space.classifier.n_estimators = [10, 50]
+    parameter_space.classifier.min_samples_leaf = [4, 8]
 
     # pipeline inspector only supports binary classification,
     # therefore filter the sample down to only 2 target classes
-    return LearnerRanker(
-        grids=grids,
+    return LearnerRanker2(
+        searcher_factory=GridSearchCV,
+        parameter_space=parameter_space,
         cv=cv,
         scoring="f1_macro",
-        random_state=42,
         n_jobs=n_jobs,
     ).fit(sample=sample)
diff --git a/test/test/facet/test_crossfit.py b/test/test/facet/test_crossfit.py
index e5abf5ec..30e47127 100644
--- a/test/test/facet/test_crossfit.py
+++ b/test/test/facet/test_crossfit.py
@@ -1,14 +1,18 @@
 import logging
 
-import numpy as np
 import pytest
+from sklearn.model_selection import GridSearchCV
 
 from sklearndf.classification import RandomForestClassifierDF
 from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
 from sklearndf.regression import RandomForestRegressorDF
 
-from ..conftest import check_ranking
-from facet.selection import LearnerGrid, LearnerRanker
+# from ..conftest import check_ranking
+from facet.selection import (
+    LearnerRanker2,
+    MultiClassifierParameterSpace,
+    ParameterSpace,
+)
 from facet.validation import StratifiedBootstrapCV
 
 log = logging.getLogger(__name__)
@@ -18,72 +22,58 @@ def test_prediction_classifier(
     iris_sample_multi_class, cv_stratified_bootstrap: StratifiedBootstrapCV, n_jobs: int
 ) -> None:
 
-    expected_learner_scores = [0.889, 0.886, 0.885, 0.879]
+    # expected_learner_scores = [0.889, 0.886, 0.885, 0.879]
 
     # define parameters and crossfit
-    grids = LearnerGrid(
-        pipeline=ClassifierPipelineDF(
-            classifier=RandomForestClassifierDF(random_state=42)
-        ),
-        learner_parameters={"min_samples_leaf": [16, 32], "n_estimators": [50, 80]},
+    ps1 = ParameterSpace(
+        ClassifierPipelineDF(classifier=RandomForestClassifierDF(random_state=42))
     )
+    ps1.classifier.min_samples_leaf = [16, 32]
+    ps1.classifier.n_estimators = [50, 80]
 
-    # define an illegal grid list, mixing classification with regression
-    grids_illegal = [
-        grids,
-        LearnerGrid(
-            pipeline=RegressorPipelineDF(
-                regressor=RandomForestRegressorDF(random_state=42)
-            ),
-            learner_parameters={"min_samples_leaf": [16, 32], "n_estimators": [50, 80]},
-        ),
-    ]
+    ps2 = ParameterSpace(
+        RegressorPipelineDF(regressor=RandomForestRegressorDF(random_state=42))
+    )
+    ps2.regressor.min_samples_leaf = [16, 32]
+    ps2.regressor.n_estimators = [50, 80]
 
     with pytest.raises(
-        ValueError, match="^arg grids mixes regressor and classifier pipelines$"
+        TypeError,
+        match="^all candidate estimators must be instances of "
+        "ClassifierPipelineDF, but candidate estimators include: "
+        "RegressorPipelineDF$",
     ):
-        LearnerRanker(
-            grids=grids_illegal,
-            cv=cv_stratified_bootstrap,
-        )
-
-    model_ranker: LearnerRanker[
-        ClassifierPipelineDF[RandomForestClassifierDF]
-    ] = LearnerRanker(
-        grids=grids,
+        # define an illegal grid list, mixing classification with regression
+        MultiClassifierParameterSpace(ps1, ps2)
+
+    model_ranker: LearnerRanker2[
+        ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV
+    ] = LearnerRanker2(
+        searcher_factory=GridSearchCV,
+        parameter_space=ps1,
         cv=cv_stratified_bootstrap,
         scoring="f1_macro",
         n_jobs=n_jobs,
-        random_state=42,
     )
 
-    model_ranker.fit(sample=iris_sample_multi_class)
-
     with pytest.raises(
-        ValueError, match="do not use arg sample_weight to pass sample weights"
+        ValueError,
+        match="arg sample_weight is not supported, " "use ag sample.weight instead",
     ):
         model_ranker.fit(
             sample=iris_sample_multi_class, sample_weight=iris_sample_multi_class.weight
         )
 
-    log.debug(f"\n{model_ranker.summary_report()}")
-
-    check_ranking(
-        ranking=model_ranker.ranking_,
-        expected_scores=expected_learner_scores,
-        expected_learners=[RandomForestClassifierDF] * 4,
-        expected_parameters={
-            2: dict(classifier__min_samples_leaf=32, classifier__n_estimators=50),
-            3: dict(classifier__min_samples_leaf=32, classifier__n_estimators=80),
-        },
-    )
-
-    # consider: model_with_type(...) function for ModelRanking
-    crossfit = model_ranker.best_model_crossfit_
+    model_ranker.fit(sample=iris_sample_multi_class)
 
-    assert crossfit.is_fitted
+    log.debug(f"\n{model_ranker.summary_report()}")
 
-    accuracy_scores_per_split: np.ndarray = crossfit.score(scoring="accuracy")
-    assert (
-        (accuracy_scores_per_split > 0.9) & (accuracy_scores_per_split <= 1.0)
-    ).all()
+    # check_ranking(
+    #     ranking=model_ranker.ranking_,
+    #     expected_scores=expected_learner_scores,
+    #     expected_learners=[RandomForestClassifierDF] * 4,
+    #     expected_parameters={
+    #         2: dict(classifier__min_samples_leaf=32, classifier__n_estimators=50),
+    #         3: dict(classifier__min_samples_leaf=32, classifier__n_estimators=80),
+    #     },
+    # )
diff --git a/test/test/facet/test_inspection.py b/test/test/facet/test_inspection.py
index 7ccea522..76d92eb1 100644
--- a/test/test/facet/test_inspection.py
+++ b/test/test/facet/test_inspection.py
@@ -3,7 +3,7 @@
 """
 import logging
 import warnings
-from typing import List, Optional, Sequence, TypeVar, Union
+from typing import List, Optional, TypeVar, Union
 
 import numpy as np
 import pandas as pd
@@ -11,7 +11,7 @@
 from numpy.testing import assert_allclose
 from pandas.testing import assert_frame_equal, assert_series_equal
 from sklearn.datasets import make_classification
-from sklearn.model_selection import KFold
+from sklearn.model_selection import GridSearchCV, KFold
 
 from pytools.viz.dendrogram import DendrogramDrawer, DendrogramReportStyle
 from sklearndf import TransformerDF
@@ -21,14 +21,14 @@
 )
 from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
 
-from ..conftest import check_ranking
+# from ..conftest import check_ranking
 from facet.data import Sample
 from facet.inspection import (
     KernelExplainerFactory,
     LearnerInspector,
     TreeExplainerFactory,
 )
-from facet.selection import LearnerGrid, LearnerRanker
+from facet.selection import LearnerRanker2
 
 # noinspection PyMissingOrEmptyDocstring
 
@@ -38,8 +38,7 @@
 
 
 def test_model_inspection(
-    regressor_grids: Sequence[LearnerGrid[RegressorPipelineDF]],
-    regressor_ranker: LearnerRanker[RegressorPipelineDF],
+    regressor_ranker: LearnerRanker2[RegressorPipelineDF, GridSearchCV],
     best_lgbm_model: RegressorPipelineDF,
     preprocessed_feature_names,
     regressor_inspector: LearnerInspector,
@@ -52,14 +51,15 @@ def test_model_inspection(
     # define checksums for this test
     log.debug(f"\n{regressor_ranker.summary_report()}")
 
-    check_ranking(
-        ranking=regressor_ranker.ranking_,
-        expected_scores=(
-            [0.418, 0.400, 0.386, 0.385, 0.122, 0.122, -0.074, -0.074, -0.074, -0.074]
-        ),
-        expected_learners=None,
-        expected_parameters=None,
-    )
+    # TODO adjust
+    # check_ranking(
+    #     ranking=regressor_ranker.ranking_,
+    #     expected_scores=(
+    #         [0.418, 0.400, 0.386, 0.385, 0.122, 0.122, -0.074, -0.074, -0.074, -0.074]
+    #     ),
+    #     expected_learners=None,
+    #     expected_parameters=None,
+    # )
 
     shap_values: pd.DataFrame = regressor_inspector.shap_values()
 
@@ -101,18 +101,20 @@ def test_model_inspection(
 
 def test_binary_classifier_ranking(iris_classifier_ranker_binary) -> None:
 
-    expected_learner_scores = [0.872, 0.868, 0.866, 0.859]
+    # expected_learner_scores = [0.872, 0.868, 0.866, 0.859]
 
     log.debug(f"\n{iris_classifier_ranker_binary.summary_report()}")
-    check_ranking(
-        ranking=iris_classifier_ranker_binary.ranking_,
-        expected_scores=expected_learner_scores,
-        expected_learners=[RandomForestClassifierDF] * 4,
-        expected_parameters={
-            2: dict(classifier__min_samples_leaf=4, classifier__n_estimators=10),
-            3: dict(classifier__min_samples_leaf=8, classifier__n_estimators=10),
-        },
-    )
+
+    # TODO adjust
+    # check_ranking(
+    #     ranking=iris_classifier_ranker_binary.ranking_,
+    #     expected_scores=expected_learner_scores,
+    #     expected_learners=[RandomForestClassifierDF] * 4,
+    #     expected_parameters={
+    #         2: dict(classifier__min_samples_leaf=4, classifier__n_estimators=10),
+    #         3: dict(classifier__min_samples_leaf=8, classifier__n_estimators=10),
+    #     },
+    # )
 
 
 # noinspection DuplicatedCode
@@ -614,13 +616,13 @@ def test_model_inspection_classifier_interaction(
 
 def test_model_inspection_classifier_interaction_dual_target(
     iris_sample_binary_dual_target: Sample,
-    iris_classifier_ranker_dual_target: LearnerRanker[
-        ClassifierPipelineDF[RandomForestClassifierDF]
+    iris_classifier_ranker_dual_target: LearnerRanker2[
+        ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV
     ],
     iris_target_name,
     n_jobs: int,
 ) -> None:
-    iris_classifier_dual_target = iris_classifier_ranker_dual_target.best_model_
+    iris_classifier_dual_target = iris_classifier_ranker_dual_target.best_estimator_
 
     with pytest.raises(
         ValueError,
diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
index 497aca68..c136e515 100644
--- a/test/test/facet/test_selection.py
+++ b/test/test/facet/test_selection.py
@@ -3,7 +3,6 @@
 """
 
 import logging
-from typing import List
 
 import numpy as np
 import pandas as pd
@@ -17,25 +16,20 @@
 from sklearndf import TransformerDF
 from sklearndf.classification import SVCDF
 from sklearndf.pipeline import ClassifierPipelineDF, PipelineDF, RegressorPipelineDF
-from sklearndf.regression import (
-    AdaBoostRegressorDF,
-    LinearRegressionDF,
+from sklearndf.regression import (  # AdaBoostRegressorDF,; LinearRegressionDF,
     RandomForestRegressorDF,
 )
 from sklearndf.regression.extra import LGBMRegressorDF
 
 from ..conftest import check_ranking
-from facet.crossfit import LearnerCrossfit
 from facet.data import Sample
 from facet.selection import (
-    LearnerEvaluation,
     LearnerGrid,
-    LearnerRanker,
+    LearnerRanker2,
     MultiClassifierParameterSpace,
     MultiRegressorParameterSpace,
     ParameterSpace,
 )
-from facet.selection._selection import LearnerRanker2
 from facet.validation import BootstrapCV
 
 log = logging.getLogger(__name__)
@@ -90,84 +84,77 @@ def test_parameter_grid() -> None:
 
 
 def test_model_ranker(
-    regressor_grids: List[LearnerGrid[RegressorPipelineDF]], sample: Sample, n_jobs: int
+    regressor_parameters: MultiRegressorParameterSpace, sample: Sample, n_jobs: int
 ) -> None:
 
-    expected_scores = [0.745, 0.742, 0.7, 0.689, 0.675, 0.675, 0.61, 0.61, 0.61, 0.61]
-    expected_learners = [
-        RandomForestRegressorDF,
-        RandomForestRegressorDF,
-        AdaBoostRegressorDF,
-        AdaBoostRegressorDF,
-        LinearRegressionDF,
-        LinearRegressionDF,
-        LGBMRegressorDF,
-        LGBMRegressorDF,
-        LGBMRegressorDF,
-        LGBMRegressorDF,
-    ]
-    expected_parameters = {
-        0: dict(regressor__n_estimators=80, regressor__random_state=42),
-        1: dict(regressor__n_estimators=50, regressor__random_state=42),
-        2: dict(regressor__n_estimators=50, regressor__random_state=42),
-        3: dict(regressor__n_estimators=80, regressor__random_state=42),
-    }
+    # TODO adjust
+    # expected_scores = [0.745, 0.742, 0.7, 0.689, 0.675, 0.675, 0.61, 0.61, 0.61, 0.61]
+    # expected_learners = [
+    #     RandomForestRegressorDF,
+    #     RandomForestRegressorDF,
+    #     AdaBoostRegressorDF,
+    #     AdaBoostRegressorDF,
+    #     LinearRegressionDF,
+    #     LinearRegressionDF,
+    #     LGBMRegressorDF,
+    #     LGBMRegressorDF,
+    #     LGBMRegressorDF,
+    #     LGBMRegressorDF,
+    # ]
+    # expected_parameters = {
+    #     0: dict(regressor__n_estimators=80, regressor__random_state=42),
+    #     1: dict(regressor__n_estimators=50, regressor__random_state=42),
+    #     2: dict(regressor__n_estimators=50, regressor__random_state=42),
+    #     3: dict(regressor__n_estimators=80, regressor__random_state=42),
+    # }
 
     # define the circular cross validator with just 5 splits (to speed up testing)
     cv = BootstrapCV(n_splits=5, random_state=42)
 
-    ranker: LearnerRanker[RegressorPipelineDF] = LearnerRanker(
-        grids=regressor_grids, cv=cv, scoring="r2", n_jobs=n_jobs
+    ranker: LearnerRanker2[RegressorPipelineDF, GridSearchCV] = LearnerRanker2(
+        searcher_factory=GridSearchCV,
+        parameter_space=regressor_parameters,
+        cv=cv,
+        scoring="r2",
+        n_jobs=n_jobs,
     ).fit(sample=sample)
 
     log.debug(f"\n{ranker.summary_report()}")
 
-    assert isinstance(ranker.best_model_crossfit_, LearnerCrossfit)
+    assert isinstance(ranker.best_estimator_.steps[0][1], RegressorPipelineDF)
 
-    ranking = ranker.ranking_
+    ranking = ranker.summary_report()
 
     assert len(ranking) > 0
-    assert isinstance(ranking[0], LearnerEvaluation)
-    assert all(
-        ranking_hi.ranking_score >= ranking_lo.ranking_score
-        for ranking_hi, ranking_lo in zip(ranking, ranking[1:])
-    )
-
-    # check if parameters set for estimators actually match expected:
-    for evaluation in ranker.ranking_:
-        pipeline_parameters = evaluation.pipeline.get_params()
-        for name, value in evaluation.parameters.items():
-            assert (
-                name in pipeline_parameters
-            ), f"parameter {name} is a parameter in evaluation.pipeline"
-            assert (
-                pipeline_parameters[name] == value
-            ), f"evaluation.pipeline.{name} is set to {value}"
-
-    check_ranking(
-        ranking=ranker.ranking_,
-        expected_scores=expected_scores,
-        expected_learners=expected_learners,
-        expected_parameters=expected_parameters,
-    )
+    # TODO adjust
+    # assert isinstance(ranking[0], LearnerEvaluation)
+    # assert all(
+    #     ranking_hi.ranking_score >= ranking_lo.ranking_score
+    #     for ranking_hi, ranking_lo in zip(ranking, ranking[1:])
+    # )
+
+    # TODO adjust
+    # check_ranking(
+    #     ranking=ranker.ranking_,
+    #     expected_scores=expected_scores,
+    #     expected_learners=expected_learners,
+    #     expected_parameters=expected_parameters,
+    # )
 
 
 def test_model_ranker_no_preprocessing(n_jobs) -> None:
 
-    expected_learner_scores = [0.943, 0.913, 0.913, 0.884]
+    expected_learner_scores = [0.961, 0.957, 0.957, 0.936]
 
     # define a yield-engine circular CV:
     cv = BootstrapCV(n_splits=5, random_state=42)
 
     # define parameters and pipeline
-    models = [
-        LearnerGrid(
-            pipeline=ClassifierPipelineDF(
-                classifier=SVCDF(gamma="scale"), preprocessing=None
-            ),
-            learner_parameters={"kernel": ["linear", "rbf"], "C": [1, 10]},
-        )
-    ]
+    parameter_space = ParameterSpace(
+        ClassifierPipelineDF(classifier=SVCDF(gamma="scale"), preprocessing=None)
+    )
+    parameter_space.classifier.kernel = ["linear", "rbf"]
+    parameter_space.classifier.C = [1, 10]
 
     #  load scikit-learn test-data and convert to pd
     iris = datasets.load_iris()
@@ -177,24 +164,32 @@ def test_model_ranker_no_preprocessing(n_jobs) -> None:
     )
     test_sample: Sample = Sample(observations=test_data, target_name="target")
 
-    model_ranker: LearnerRanker[ClassifierPipelineDF[SVCDF]] = LearnerRanker(
-        grids=models, cv=cv, n_jobs=n_jobs
-    ).fit(sample=test_sample)
+    model_ranker: LearnerRanker2[
+        ClassifierPipelineDF[SVCDF], GridSearchCV
+    ] = LearnerRanker2(
+        searcher_factory=GridSearchCV,
+        parameter_space=parameter_space,
+        cv=cv,
+        n_jobs=n_jobs,
+    ).fit(
+        sample=test_sample
+    )
 
-    log.debug(f"\n{model_ranker.summary_report()}")
+    summary_report = model_ranker.summary_report()
+    log.debug(f"\n{summary_report}")
 
     check_ranking(
-        ranking=model_ranker.ranking_,
+        ranking=summary_report,
+        is_classifier=True,
         expected_scores=expected_learner_scores,
-        expected_learners=[SVCDF] * 4,
         expected_parameters={
-            0: dict(classifier__C=10, classifier__kernel="linear"),
-            3: dict(classifier__C=1, classifier__kernel="rbf"),
+            0: dict(C=10, kernel="linear"),
+            3: dict(C=1, kernel="rbf"),
         },
     )
 
     assert (
-        model_ranker.ranking_[0].ranking_score >= 0.8
+        summary_report["mean_test_score"].iloc[0] >= 0.8
     ), "expected a best performance of at least 0.8"
 
 
@@ -332,7 +327,7 @@ def test_learner_ranker(
     ):
         LearnerRanker2(GridSearchCV, regressor_parameters, param_grid=None)
 
-    ranker: LearnerRanker2[RegressorPipelineDF] = LearnerRanker2(
+    ranker: LearnerRanker2[RegressorPipelineDF, GridSearchCV] = LearnerRanker2(
         GridSearchCV,
         regressor_parameters,
         scoring="r2",

From 50a5bf81287f17415b73c472a1b6f422ece72c27 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= <mat646@gmail.com>
Date: Fri, 21 Jan 2022 15:05:29 +0100
Subject: [PATCH 053/106] API: enable tests for new LearnerRanker

---
 test/test/conftest.py              | 48 +++++++++++++++---
 test/test/facet/test_crossfit.py   | 28 ++++++-----
 test/test/facet/test_inspection.py | 50 +++++++++---------
 test/test/facet/test_selection.py  | 81 +++++++++++++++++-------------
 4 files changed, 128 insertions(+), 79 deletions(-)

diff --git a/test/test/conftest.py b/test/test/conftest.py
index c0b1f0eb..08d252d4 100644
--- a/test/test/conftest.py
+++ b/test/test/conftest.py
@@ -8,7 +8,7 @@
 from sklearn.model_selection import BaseCrossValidator, GridSearchCV, KFold
 from sklearn.utils import Bunch
 
-from sklearndf import TransformerDF
+from sklearndf import LearnerDF, TransformerDF
 from sklearndf.classification import RandomForestClassifierDF
 from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
 from sklearndf.regression import (
@@ -181,10 +181,17 @@ def best_lgbm_model(
 ) -> RegressorPipelineDF:
     # we get the best model_evaluation which is a LGBM - for the sake of test
     # performance
-    candidates = regressor_ranker.summary_report()["param_candidate"].iloc[:, 0]
-    best_lgbm_model = candidates[
-        candidates.apply(lambda x: isinstance(x.regressor, LGBMRegressorDF))
-    ][0]
+    candidates = regressor_ranker.summary_report()["param_candidate"]
+    best_lgbm_model_df = candidates[
+        candidates.apply(
+            lambda x: isinstance(x.iloc[0].regressor, LGBMRegressorDF), axis=1
+        )
+    ].iloc[0]
+
+    best_lgbm_model = best_lgbm_model_df[0]
+    best_lgbm_model.regressor.set_params(
+        **best_lgbm_model_df["regressor"].dropna().to_dict()
+    )
 
     return best_lgbm_model.fit(X=sample.features, y=sample.target)
 
@@ -317,6 +324,7 @@ def check_ranking(
     is_classifier: bool,
     expected_scores: Sequence[float],
     expected_parameters: Optional[Mapping[int, Mapping[str, Any]]],
+    expected_learners: Optional[List[LearnerDF]] = None,
 ) -> None:
     """
     Test helper to check rankings produced by learner rankers
@@ -325,10 +333,27 @@ def check_ranking(
     :param is_classifier: flag if ranking was performed on classifiers, or regressors
     :param expected_scores: expected ranking scores, rounded to 3 decimal places
     :param expected_parameters: expected learner parameters
+    :param expected_learners: optional list of expected learners. Should be present
+                              only for multi estimator search.
     :return: None
     """
 
     SCORE_COLUMN = "mean_test_score"
+    CLASSIFIER_STR = "classifier"
+    REGRESSOR_STR = "regressor"
+    PARAM_CANDIDATE_STR = "param_candidate"
+
+    def _select_parameters(
+        param_column: str, rank: int, learner_str: Optional[str]
+    ) -> Tuple[dict, Optional[LearnerDF]]:
+        if param_column == PARAM_CANDIDATE_STR:
+            raw_parameters = ranking[param_column][learner_str].iloc[rank].to_dict()
+            return (
+                {k: v for k, v in raw_parameters.items() if v is not np.nan},
+                ranking[param_column].iloc[:, 0].iloc[rank],
+            )
+        else:
+            return ranking[param_column].iloc[rank].to_dict(), None
 
     for rank, score_expected in enumerate(expected_scores):
         score_actual = round(ranking[SCORE_COLUMN].iloc[rank], 3)
@@ -337,15 +362,24 @@ def check_ranking(
             f"got {score_actual} but expected {score_expected}"
         )
 
-    param_column = "param_classifier" if is_classifier else "param_regressor"
+    learner_str = CLASSIFIER_STR if is_classifier else REGRESSOR_STR
+    param_column = f"param_{learner_str}"
+    if expected_learners is not None:
+        param_column = PARAM_CANDIDATE_STR
 
     if expected_parameters is not None:
         for rank, parameters_expected in expected_parameters.items():
-            parameters_actual = ranking[param_column].iloc[rank].to_dict()
+            parameters_actual, learner_actual = _select_parameters(
+                param_column, rank, learner_str
+            )
             assert parameters_actual == parameters_expected, (
                 f"unexpected parameters for learner at rank #{rank}: "
                 f"got {parameters_actual} but expected {parameters_expected}"
             )
+            if learner_actual is not None:
+                assert isinstance(
+                    getattr(learner_actual, learner_str), expected_learners[rank]
+                )
 
 
 @pytest.fixture
diff --git a/test/test/facet/test_crossfit.py b/test/test/facet/test_crossfit.py
index 30e47127..41120707 100644
--- a/test/test/facet/test_crossfit.py
+++ b/test/test/facet/test_crossfit.py
@@ -7,7 +7,7 @@
 from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
 from sklearndf.regression import RandomForestRegressorDF
 
-# from ..conftest import check_ranking
+from ..conftest import check_ranking
 from facet.selection import (
     LearnerRanker2,
     MultiClassifierParameterSpace,
@@ -22,7 +22,7 @@ def test_prediction_classifier(
     iris_sample_multi_class, cv_stratified_bootstrap: StratifiedBootstrapCV, n_jobs: int
 ) -> None:
 
-    # expected_learner_scores = [0.889, 0.886, 0.885, 0.879]
+    expected_learner_scores = [0.889, 0.886, 0.885, 0.879]
 
     # define parameters and crossfit
     ps1 = ParameterSpace(
@@ -66,14 +66,16 @@ def test_prediction_classifier(
 
     model_ranker.fit(sample=iris_sample_multi_class)
 
-    log.debug(f"\n{model_ranker.summary_report()}")
-
-    # check_ranking(
-    #     ranking=model_ranker.ranking_,
-    #     expected_scores=expected_learner_scores,
-    #     expected_learners=[RandomForestClassifierDF] * 4,
-    #     expected_parameters={
-    #         2: dict(classifier__min_samples_leaf=32, classifier__n_estimators=50),
-    #         3: dict(classifier__min_samples_leaf=32, classifier__n_estimators=80),
-    #     },
-    # )
+    ranking = model_ranker.summary_report()
+
+    log.debug(f"\n{ranking}")
+
+    check_ranking(
+        ranking=ranking,
+        is_classifier=True,
+        expected_scores=expected_learner_scores,
+        expected_parameters={
+            2: dict(min_samples_leaf=32, n_estimators=50),
+            3: dict(min_samples_leaf=32, n_estimators=80),
+        },
+    )
diff --git a/test/test/facet/test_inspection.py b/test/test/facet/test_inspection.py
index 76d92eb1..0ac10243 100644
--- a/test/test/facet/test_inspection.py
+++ b/test/test/facet/test_inspection.py
@@ -21,7 +21,7 @@
 )
 from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
 
-# from ..conftest import check_ranking
+from ..conftest import check_ranking
 from facet.data import Sample
 from facet.inspection import (
     KernelExplainerFactory,
@@ -48,18 +48,19 @@ def test_model_inspection(
     n_jobs: int,
 ) -> None:
 
+    ranking = regressor_ranker.summary_report()
+
     # define checksums for this test
-    log.debug(f"\n{regressor_ranker.summary_report()}")
-
-    # TODO adjust
-    # check_ranking(
-    #     ranking=regressor_ranker.ranking_,
-    #     expected_scores=(
-    #         [0.418, 0.400, 0.386, 0.385, 0.122, 0.122, -0.074, -0.074, -0.074, -0.074]
-    #     ),
-    #     expected_learners=None,
-    #     expected_parameters=None,
-    # )
+    log.debug(f"\n{ranking}")
+
+    check_ranking(
+        ranking=ranking,
+        is_classifier=False,
+        expected_scores=(
+            [0.693, 0.689, 0.677, 0.661, 0.615, 0.615, 0.367, 0.281, 0.281, 0.281]
+        ),
+        expected_parameters=None,
+    )
 
     shap_values: pd.DataFrame = regressor_inspector.shap_values()
 
@@ -101,20 +102,21 @@ def test_model_inspection(
 
 def test_binary_classifier_ranking(iris_classifier_ranker_binary) -> None:
 
-    # expected_learner_scores = [0.872, 0.868, 0.866, 0.859]
+    expected_learner_scores = [0.872, 0.868, 0.866, 0.859]
+
+    ranking = iris_classifier_ranker_binary.summary_report()
 
-    log.debug(f"\n{iris_classifier_ranker_binary.summary_report()}")
+    log.debug(f"\n{ranking}")
 
-    # TODO adjust
-    # check_ranking(
-    #     ranking=iris_classifier_ranker_binary.ranking_,
-    #     expected_scores=expected_learner_scores,
-    #     expected_learners=[RandomForestClassifierDF] * 4,
-    #     expected_parameters={
-    #         2: dict(classifier__min_samples_leaf=4, classifier__n_estimators=10),
-    #         3: dict(classifier__min_samples_leaf=8, classifier__n_estimators=10),
-    #     },
-    # )
+    check_ranking(
+        ranking=ranking,
+        is_classifier=True,
+        expected_scores=expected_learner_scores,
+        expected_parameters={
+            2: dict(min_samples_leaf=4, n_estimators=10),
+            3: dict(min_samples_leaf=8, n_estimators=10),
+        },
+    )
 
 
 # noinspection DuplicatedCode
diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
index c136e515..90e78904 100644
--- a/test/test/facet/test_selection.py
+++ b/test/test/facet/test_selection.py
@@ -16,7 +16,9 @@
 from sklearndf import TransformerDF
 from sklearndf.classification import SVCDF
 from sklearndf.pipeline import ClassifierPipelineDF, PipelineDF, RegressorPipelineDF
-from sklearndf.regression import (  # AdaBoostRegressorDF,; LinearRegressionDF,
+from sklearndf.regression import (
+    AdaBoostRegressorDF,
+    LinearRegressionDF,
     RandomForestRegressorDF,
 )
 from sklearndf.regression.extra import LGBMRegressorDF
@@ -87,26 +89,36 @@ def test_model_ranker(
     regressor_parameters: MultiRegressorParameterSpace, sample: Sample, n_jobs: int
 ) -> None:
 
-    # TODO adjust
-    # expected_scores = [0.745, 0.742, 0.7, 0.689, 0.675, 0.675, 0.61, 0.61, 0.61, 0.61]
-    # expected_learners = [
-    #     RandomForestRegressorDF,
-    #     RandomForestRegressorDF,
-    #     AdaBoostRegressorDF,
-    #     AdaBoostRegressorDF,
-    #     LinearRegressionDF,
-    #     LinearRegressionDF,
-    #     LGBMRegressorDF,
-    #     LGBMRegressorDF,
-    #     LGBMRegressorDF,
-    #     LGBMRegressorDF,
-    # ]
-    # expected_parameters = {
-    #     0: dict(regressor__n_estimators=80, regressor__random_state=42),
-    #     1: dict(regressor__n_estimators=50, regressor__random_state=42),
-    #     2: dict(regressor__n_estimators=50, regressor__random_state=42),
-    #     3: dict(regressor__n_estimators=80, regressor__random_state=42),
-    # }
+    expected_scores = [
+        0.840,
+        0.837,
+        0.812,
+        0.812,
+        0.793,
+        0.790,
+        0.758,
+        0.758,
+        0.758,
+        0.758,
+    ]
+    expected_learners = [
+        RandomForestRegressorDF,
+        RandomForestRegressorDF,
+        LinearRegressionDF,
+        LinearRegressionDF,
+        AdaBoostRegressorDF,
+        AdaBoostRegressorDF,
+        LGBMRegressorDF,
+        LGBMRegressorDF,
+        LGBMRegressorDF,
+        LGBMRegressorDF,
+    ]
+    expected_parameters = {
+        0: dict(n_estimators=80),
+        1: dict(n_estimators=50),
+        4: dict(n_estimators=50),
+        5: dict(n_estimators=80),
+    }
 
     # define the circular cross validator with just 5 splits (to speed up testing)
     cv = BootstrapCV(n_splits=5, random_state=42)
@@ -124,22 +136,21 @@ def test_model_ranker(
     assert isinstance(ranker.best_estimator_.steps[0][1], RegressorPipelineDF)
 
     ranking = ranker.summary_report()
+    ranking_score = ranking["mean_test_score"]
 
     assert len(ranking) > 0
-    # TODO adjust
-    # assert isinstance(ranking[0], LearnerEvaluation)
-    # assert all(
-    #     ranking_hi.ranking_score >= ranking_lo.ranking_score
-    #     for ranking_hi, ranking_lo in zip(ranking, ranking[1:])
-    # )
-
-    # TODO adjust
-    # check_ranking(
-    #     ranking=ranker.ranking_,
-    #     expected_scores=expected_scores,
-    #     expected_learners=expected_learners,
-    #     expected_parameters=expected_parameters,
-    # )
+    assert all(
+        ranking_hi >= ranking_lo
+        for ranking_hi, ranking_lo in zip(ranking_score, ranking_score[1:])
+    )
+
+    check_ranking(
+        ranking=ranking,
+        is_classifier=False,
+        expected_scores=expected_scores,
+        expected_parameters=expected_parameters,
+        expected_learners=expected_learners,
+    )
 
 
 def test_model_ranker_no_preprocessing(n_jobs) -> None:

From c0b8017f3191f13dcc938ddc3e504263003f68ab Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Fri, 21 Jan 2022 15:13:08 +0100
Subject: [PATCH 054/106] REFACTOR: remove obsolete function _learner_type()

---
 src/facet/selection/_selection.py | 21 +--------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index 6f591bd2..cc748a34 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -21,7 +21,6 @@
     Optional,
     Sequence,
     Tuple,
-    Type,
     TypeVar,
     Union,
 )
@@ -34,11 +33,7 @@
 from pytools.api import AllTracker, deprecated, inheritdoc
 from pytools.fit import FittableMixin
 from pytools.parallelization import ParallelizableMixin
-from sklearndf.pipeline import (
-    ClassifierPipelineDF,
-    LearnerPipelineDF,
-    RegressorPipelineDF,
-)
+from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
 
 from facet.data import Sample
 from facet.selection.base import BaseParameterSpace
@@ -518,18 +513,4 @@ def __init__(
         self.ranking_score = ranking_score
 
 
-def _learner_type(pipeline: T_LearnerPipelineDF) -> Type[T_LearnerPipelineDF]:
-    # determine whether a learner pipeline fits a regressor or a classifier
-    for learner_type in [RegressorPipelineDF, ClassifierPipelineDF]:
-        if isinstance(pipeline, learner_type):
-            return learner_type
-    if isinstance(pipeline, LearnerPipelineDF):
-        raise TypeError(f"unknown learner pipeline type: {type(learner_type).__name__}")
-    else:
-        raise TypeError(
-            "attribute grid.pipeline is not a learner pipeline: "
-            f"{type(learner_type).__name__}"
-        )
-
-
 __tracker.validate()

From 0fb3f81e872cce3f3505e88fde45a57eb5952679 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Fri, 21 Jan 2022 15:58:19 +0100
Subject: [PATCH 055/106] REFACTOR: rename LearnerRanker2 to LearnerRanker

---
 src/facet/selection/_selection.py  | 10 +++++-----
 test/test/conftest.py              | 22 +++++++++++-----------
 test/test/facet/test_crossfit.py   | 10 +++-------
 test/test/facet/test_inspection.py |  6 +++---
 test/test/facet/test_selection.py  | 12 ++++++------
 5 files changed, 28 insertions(+), 32 deletions(-)

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index cc748a34..3e021af9 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -40,7 +40,7 @@
 
 log = logging.getLogger(__name__)
 
-__all__ = ["LearnerGrid", "LearnerEvaluation", "LearnerRanker2"]
+__all__ = ["LearnerGrid", "LearnerEvaluation", "LearnerRanker"]
 
 #
 # Type constants
@@ -85,7 +85,7 @@
 
 
 @inheritdoc(match="[see superclass]")
-class LearnerRanker2(
+class LearnerRanker(
     FittableMixin[Sample], ParallelizableMixin, Generic[T_LearnerPipelineDF, T_SearchCV]
 ):
     """
@@ -230,7 +230,7 @@ def fit(
         :param fit_params: any fit parameters to pass on to the learner's fit method
         :return: ``self``
         """
-        self: LearnerRanker2[
+        self: LearnerRanker[
             T_LearnerPipelineDF, T_SearchCV
         ]  # support type hinting in PyCharm
 
@@ -352,7 +352,7 @@ class LearnerGrid(Generic[T_LearnerPipelineDF]):
     A grid of hyper-parameters for tuning a learner pipeline.
     """
 
-    @deprecated(message=f"use class {LearnerRanker2.__name__} instead")
+    @deprecated(message=f"use class {LearnerRanker.__name__} instead")
     def __init__(
         self,
         pipeline: T_LearnerPipelineDF,
@@ -475,7 +475,7 @@ class LearnerEvaluation(Generic[T_LearnerPipelineDF]):
 
     __slots__ = ["pipeline", "parameters", "scoring_name", "scores", "ranking_score"]
 
-    @deprecated(message=f"use class {LearnerRanker2.__name__} instead")
+    @deprecated(message=f"use class {LearnerRanker.__name__} instead")
     def __init__(
         self,
         pipeline: T_LearnerPipelineDF,
diff --git a/test/test/conftest.py b/test/test/conftest.py
index 08d252d4..b047defb 100644
--- a/test/test/conftest.py
+++ b/test/test/conftest.py
@@ -29,7 +29,7 @@
 import facet
 from facet.data import Sample
 from facet.inspection import LearnerInspector, TreeExplainerFactory
-from facet.selection import LearnerRanker2, MultiRegressorParameterSpace, ParameterSpace
+from facet.selection import LearnerRanker, MultiRegressorParameterSpace, ParameterSpace
 from facet.validation import BootstrapCV, StratifiedBootstrapCV
 
 logging.basicConfig(level=logging.DEBUG)
@@ -164,8 +164,8 @@ def regressor_ranker(
     regressor_parameters: MultiRegressorParameterSpace,
     sample: Sample,
     n_jobs: int,
-) -> LearnerRanker2[RegressorPipelineDF, GridSearchCV]:
-    return LearnerRanker2(
+) -> LearnerRanker[RegressorPipelineDF, GridSearchCV]:
+    return LearnerRanker(
         searcher_factory=GridSearchCV,
         parameter_space=regressor_parameters,
         cv=cv_kfold,
@@ -176,7 +176,7 @@ def regressor_ranker(
 
 @pytest.fixture
 def best_lgbm_model(
-    regressor_ranker: LearnerRanker2[RegressorPipelineDF, GridSearchCV],
+    regressor_ranker: LearnerRanker[RegressorPipelineDF, GridSearchCV],
     sample: Sample,
 ) -> RegressorPipelineDF:
     # we get the best model_evaluation which is a LGBM - for the sake of test
@@ -387,7 +387,7 @@ def iris_classifier_ranker_binary(
     iris_sample_binary: Sample,
     cv_stratified_bootstrap: StratifiedBootstrapCV,
     n_jobs: int,
-) -> LearnerRanker2[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
+) -> LearnerRanker[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
     return fit_classifier_ranker(
         sample=iris_sample_binary, cv=cv_stratified_bootstrap, n_jobs=n_jobs
     )
@@ -398,7 +398,7 @@ def iris_classifier_ranker_multi_class(
     iris_sample_multi_class: Sample,
     cv_stratified_bootstrap: StratifiedBootstrapCV,
     n_jobs: int,
-) -> LearnerRanker2[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
+) -> LearnerRanker[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
     return fit_classifier_ranker(
         sample=iris_sample_multi_class, cv=cv_stratified_bootstrap, n_jobs=n_jobs
     )
@@ -407,7 +407,7 @@ def iris_classifier_ranker_multi_class(
 @pytest.fixture
 def iris_classifier_ranker_dual_target(
     iris_sample_binary_dual_target: Sample, cv_bootstrap: BootstrapCV, n_jobs: int
-) -> LearnerRanker2[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
+) -> LearnerRanker[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
     return fit_classifier_ranker(
         sample=iris_sample_binary_dual_target, cv=cv_bootstrap, n_jobs=n_jobs
     )
@@ -415,14 +415,14 @@ def iris_classifier_ranker_dual_target(
 
 @pytest.fixture
 def iris_classifier_binary(
-    iris_classifier_ranker_binary: LearnerRanker2[ClassifierPipelineDF, GridSearchCV],
+    iris_classifier_ranker_binary: LearnerRanker[ClassifierPipelineDF, GridSearchCV],
 ) -> ClassifierPipelineDF[RandomForestClassifierDF]:
     return iris_classifier_ranker_binary.best_estimator_
 
 
 @pytest.fixture
 def iris_classifier_multi_class(
-    iris_classifier_ranker_multi_class: LearnerRanker2[
+    iris_classifier_ranker_multi_class: LearnerRanker[
         ClassifierPipelineDF, GridSearchCV
     ],
 ) -> ClassifierPipelineDF[RandomForestClassifierDF]:
@@ -447,7 +447,7 @@ def iris_inspector_multi_class(
 
 def fit_classifier_ranker(
     sample: Sample, cv: BaseCrossValidator, n_jobs: int
-) -> LearnerRanker2[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
+) -> LearnerRanker[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
     # define the parameter space
     parameter_space = ParameterSpace(
         ClassifierPipelineDF(
@@ -460,7 +460,7 @@ def fit_classifier_ranker(
 
     # pipeline inspector only supports binary classification,
     # therefore filter the sample down to only 2 target classes
-    return LearnerRanker2(
+    return LearnerRanker(
         searcher_factory=GridSearchCV,
         parameter_space=parameter_space,
         cv=cv,
diff --git a/test/test/facet/test_crossfit.py b/test/test/facet/test_crossfit.py
index 41120707..dadef120 100644
--- a/test/test/facet/test_crossfit.py
+++ b/test/test/facet/test_crossfit.py
@@ -8,11 +8,7 @@
 from sklearndf.regression import RandomForestRegressorDF
 
 from ..conftest import check_ranking
-from facet.selection import (
-    LearnerRanker2,
-    MultiClassifierParameterSpace,
-    ParameterSpace,
-)
+from facet.selection import LearnerRanker, MultiClassifierParameterSpace, ParameterSpace
 from facet.validation import StratifiedBootstrapCV
 
 log = logging.getLogger(__name__)
@@ -46,9 +42,9 @@ def test_prediction_classifier(
         # define an illegal grid list, mixing classification with regression
         MultiClassifierParameterSpace(ps1, ps2)
 
-    model_ranker: LearnerRanker2[
+    model_ranker: LearnerRanker[
         ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV
-    ] = LearnerRanker2(
+    ] = LearnerRanker(
         searcher_factory=GridSearchCV,
         parameter_space=ps1,
         cv=cv_stratified_bootstrap,
diff --git a/test/test/facet/test_inspection.py b/test/test/facet/test_inspection.py
index 0ac10243..e8f28dcd 100644
--- a/test/test/facet/test_inspection.py
+++ b/test/test/facet/test_inspection.py
@@ -28,7 +28,7 @@
     LearnerInspector,
     TreeExplainerFactory,
 )
-from facet.selection import LearnerRanker2
+from facet.selection import LearnerRanker
 
 # noinspection PyMissingOrEmptyDocstring
 
@@ -38,7 +38,7 @@
 
 
 def test_model_inspection(
-    regressor_ranker: LearnerRanker2[RegressorPipelineDF, GridSearchCV],
+    regressor_ranker: LearnerRanker[RegressorPipelineDF, GridSearchCV],
     best_lgbm_model: RegressorPipelineDF,
     preprocessed_feature_names,
     regressor_inspector: LearnerInspector,
@@ -618,7 +618,7 @@ def test_model_inspection_classifier_interaction(
 
 def test_model_inspection_classifier_interaction_dual_target(
     iris_sample_binary_dual_target: Sample,
-    iris_classifier_ranker_dual_target: LearnerRanker2[
+    iris_classifier_ranker_dual_target: LearnerRanker[
         ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV
     ],
     iris_target_name,
diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
index 90e78904..15449ae9 100644
--- a/test/test/facet/test_selection.py
+++ b/test/test/facet/test_selection.py
@@ -27,7 +27,7 @@
 from facet.data import Sample
 from facet.selection import (
     LearnerGrid,
-    LearnerRanker2,
+    LearnerRanker,
     MultiClassifierParameterSpace,
     MultiRegressorParameterSpace,
     ParameterSpace,
@@ -123,7 +123,7 @@ def test_model_ranker(
     # define the circular cross validator with just 5 splits (to speed up testing)
     cv = BootstrapCV(n_splits=5, random_state=42)
 
-    ranker: LearnerRanker2[RegressorPipelineDF, GridSearchCV] = LearnerRanker2(
+    ranker: LearnerRanker[RegressorPipelineDF, GridSearchCV] = LearnerRanker(
         searcher_factory=GridSearchCV,
         parameter_space=regressor_parameters,
         cv=cv,
@@ -175,9 +175,9 @@ def test_model_ranker_no_preprocessing(n_jobs) -> None:
     )
     test_sample: Sample = Sample(observations=test_data, target_name="target")
 
-    model_ranker: LearnerRanker2[
+    model_ranker: LearnerRanker[
         ClassifierPipelineDF[SVCDF], GridSearchCV
-    ] = LearnerRanker2(
+    ] = LearnerRanker(
         searcher_factory=GridSearchCV,
         parameter_space=parameter_space,
         cv=cv,
@@ -336,9 +336,9 @@ def test_learner_ranker(
             "of arg searcher_factory, but included: param_grid"
         ),
     ):
-        LearnerRanker2(GridSearchCV, regressor_parameters, param_grid=None)
+        LearnerRanker(GridSearchCV, regressor_parameters, param_grid=None)
 
-    ranker: LearnerRanker2[RegressorPipelineDF, GridSearchCV] = LearnerRanker2(
+    ranker: LearnerRanker[RegressorPipelineDF, GridSearchCV] = LearnerRanker(
         GridSearchCV,
         regressor_parameters,
         scoring="r2",

From 38243e823a75c96e3919a9c3930414f28490ab3c Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Fri, 21 Jan 2022 17:44:15 +0100
Subject: [PATCH 056/106] REFACTOR: remove classes LearnerGrid and
 LearnerEvaluation

---
 src/facet/selection/_selection.py | 177 +-----------------------------
 test/test/facet/test_selection.py |  49 ---------
 2 files changed, 2 insertions(+), 224 deletions(-)

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index 3e021af9..011bcb9e 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -4,23 +4,16 @@
 import inspect
 import itertools
 import logging
-import operator
 import re
-from functools import reduce
-from itertools import chain
 from re import Pattern
-from types import MappingProxyType
 from typing import (
     Any,
     Callable,
     Dict,
     Generic,
-    Iterable,
     List,
-    Mapping,
     Optional,
     Sequence,
-    Tuple,
     TypeVar,
     Union,
 )
@@ -30,7 +23,7 @@
 from numpy.random.mtrand import RandomState
 from sklearn.model_selection import BaseCrossValidator, GridSearchCV
 
-from pytools.api import AllTracker, deprecated, inheritdoc
+from pytools.api import AllTracker, inheritdoc
 from pytools.fit import FittableMixin
 from pytools.parallelization import ParallelizableMixin
 from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
@@ -40,7 +33,7 @@
 
 log = logging.getLogger(__name__)
 
-__all__ = ["LearnerGrid", "LearnerEvaluation", "LearnerRanker"]
+__all__ = ["LearnerRanker"]
 
 #
 # Type constants
@@ -347,170 +340,4 @@ def _get_searcher_parameters(self) -> Dict[str, Any]:
     )
 
 
-class LearnerGrid(Generic[T_LearnerPipelineDF]):
-    """
-    A grid of hyper-parameters for tuning a learner pipeline.
-    """
-
-    @deprecated(message=f"use class {LearnerRanker.__name__} instead")
-    def __init__(
-        self,
-        pipeline: T_LearnerPipelineDF,
-        learner_parameters: Dict[str, Sequence],
-        preprocessing_parameters: Optional[Dict[str, Sequence]] = None,
-    ) -> None:
-        """
-        :param pipeline: the :class:`~.sklearndf.pipeline.RegressorPipelineDF` or
-            :class:`~.sklearndf.pipeline.ClassifierPipelineDF` to which the
-            hyper-parameters will be applied
-        :param learner_parameters: the hyper-parameter grid in which to search for the
-            optimal parameter values for the pipeline's final estimator
-        :param preprocessing_parameters: the hyper-parameter grid in which to search
-            for the optimal parameter values for the pipeline's preprocessing pipeline
-            (optional)
-        """
-        self.pipeline = pipeline
-
-        def _prefix_parameter_names(
-            parameters: Dict[str, Sequence], prefix: str
-        ) -> Iterable[Tuple[str, Any]]:
-            return (
-                (f"{prefix}__{param}", values) for param, values in parameters.items()
-            )
-
-        grid_parameters: Iterable[Tuple[str, Sequence]] = _prefix_parameter_names(
-            parameters=learner_parameters, prefix=pipeline.final_estimator_name
-        )
-
-        if preprocessing_parameters is not None:
-            grid_parameters = chain(
-                grid_parameters,
-                _prefix_parameter_names(
-                    parameters=preprocessing_parameters,
-                    prefix=pipeline.preprocessing_name,
-                ),
-            )
-
-        self._grid_parameters: List[Tuple[str, Sequence]] = list(grid_parameters)
-        self._grid_dict: Dict[str, Sequence] = dict(self._grid_parameters)
-
-    @property
-    def parameters(self) -> Mapping[str, Sequence[Any]]:
-        """
-        The parameter grid for the entire pipeline.
-        """
-        return MappingProxyType(self._grid_dict)
-
-    def __iter__(self) -> Iterable[Dict[str, Any]]:
-        grid = self._grid_parameters
-        params: List[Tuple[str, Any]] = [("", None) for _ in grid]
-
-        def _iter_parameter(param_index: int):
-            if param_index < 0:
-                yield dict(params)
-            else:
-                name, values = grid[param_index]
-                for value in values:
-                    params[param_index] = (name, value)
-                    yield from _iter_parameter(param_index=param_index - 1)
-
-        yield from _iter_parameter(len(grid) - 1)
-
-    def __getitem__(
-        self, pos: Union[int, slice]
-    ) -> Union[Dict[str, Sequence], Sequence[Dict[str, Sequence]]]:
-
-        _len = len(self)
-
-        def _get(i: int) -> Dict[str, Sequence]:
-            assert i >= 0
-
-            parameters = self._grid_parameters
-            result: Dict[str, Sequence] = {}
-
-            for name, values in parameters:
-                n_values = len(values)
-                result[name] = values[i % n_values]
-                i //= n_values
-
-            assert i == 0
-
-            return result
-
-        def _clip(i: int, i_max: int) -> int:
-            if i < 0:
-                return max(_len + i, 0)
-            else:
-                return min(i, i_max)
-
-        if isinstance(pos, slice):
-            return [
-                _get(i)
-                for i in range(
-                    _clip(pos.start or 0, _len - 1),
-                    _clip(pos.stop or _len, _len),
-                    pos.step or 1,
-                )
-            ]
-        else:
-            if pos < -_len or pos >= _len:
-                raise ValueError(f"index out of bounds: {pos}")
-            return _get(_len + pos if pos < 0 else pos)
-
-    def __len__(self) -> int:
-        return reduce(
-            operator.mul,
-            (
-                len(values_for_parameter)
-                for values_for_parameter in self._grid_dict.values()
-            ),
-        )
-
-
-class LearnerEvaluation(Generic[T_LearnerPipelineDF]):
-    """
-    A collection of scores for a specific parametrization of a learner pipeline,
-    generated by a :class:`.LearnerRanker`.
-    """
-
-    __slots__ = ["pipeline", "parameters", "scoring_name", "scores", "ranking_score"]
-
-    @deprecated(message=f"use class {LearnerRanker.__name__} instead")
-    def __init__(
-        self,
-        pipeline: T_LearnerPipelineDF,
-        parameters: Mapping[str, Any],
-        scoring_name: str,
-        scores: np.ndarray,
-        ranking_score: float,
-    ) -> None:
-        """
-        :param pipeline: the unfitted learner pipeline
-        :param parameters: the hyper-parameters for which the learner pipeline was
-            scored, as a mapping of parameter names to parameter values
-        :param scoring_name: the name of the scoring function used to calculate the
-            scores
-        :param scores: the scores of all crossfits of the learner pipeline
-        :param ranking_score: the aggregate score determined by the ranking
-            metric of :class:`.LearnerRanker`, used for ranking the learners
-        """
-        super().__init__()
-
-        #: The unfitted learner pipeline.
-        self.pipeline = pipeline
-
-        #: The hyper-parameters for which the learner pipeline was scored.
-        self.parameters = parameters
-
-        #: The name of the scoring function used to calculate the scores.
-        self.scoring_name = scoring_name
-
-        #: The scores of all crossfits of the learner pipeline.
-        self.scores = scores
-
-        #: The aggregate score determined by the ranking metric of
-        #: :class:`.LearnerRanker`, used for ranking the learners.
-        self.ranking_score = ranking_score
-
-
 __tracker.validate()
diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
index 15449ae9..19078800 100644
--- a/test/test/facet/test_selection.py
+++ b/test/test/facet/test_selection.py
@@ -26,7 +26,6 @@
 from ..conftest import check_ranking
 from facet.data import Sample
 from facet.selection import (
-    LearnerGrid,
     LearnerRanker,
     MultiClassifierParameterSpace,
     MultiRegressorParameterSpace,
@@ -37,54 +36,6 @@
 log = logging.getLogger(__name__)
 
 
-def test_parameter_grid() -> None:
-
-    grid = LearnerGrid(
-        pipeline=ClassifierPipelineDF(classifier=SVCDF(gamma="scale")),
-        learner_parameters={"a": [1, 2, 3], "b": [11, 12], "c": [21, 22]},
-    )
-
-    grid_expected = [
-        {"classifier__a": 1, "classifier__b": 11, "classifier__c": 21},
-        {"classifier__a": 2, "classifier__b": 11, "classifier__c": 21},
-        {"classifier__a": 3, "classifier__b": 11, "classifier__c": 21},
-        {"classifier__a": 1, "classifier__b": 12, "classifier__c": 21},
-        {"classifier__a": 2, "classifier__b": 12, "classifier__c": 21},
-        {"classifier__a": 3, "classifier__b": 12, "classifier__c": 21},
-        {"classifier__a": 1, "classifier__b": 11, "classifier__c": 22},
-        {"classifier__a": 2, "classifier__b": 11, "classifier__c": 22},
-        {"classifier__a": 3, "classifier__b": 11, "classifier__c": 22},
-        {"classifier__a": 1, "classifier__b": 12, "classifier__c": 22},
-        {"classifier__a": 2, "classifier__b": 12, "classifier__c": 22},
-        {"classifier__a": 3, "classifier__b": 12, "classifier__c": 22},
-    ]
-
-    _len = len(grid_expected)
-
-    # length of the grid
-    assert len(grid) == _len
-
-    # iterating all items in the grid
-    for item, expected in zip(grid, grid_expected):
-        assert item == expected
-
-    # positive indices
-    for i in range(_len):
-        assert grid[i] == grid_expected[i]
-
-    # negative indices
-    for i in range(-_len, 0):
-        assert grid[i] == grid_expected[_len + i]
-
-    # exceptions raised for out-of-bounds indices
-    with pytest.raises(expected_exception=ValueError):
-        _ = grid[_len]
-        _ = grid[-_len - 1]
-
-    # slicing support
-    assert grid[-10:10:2] == grid_expected[-10:10:2]
-
-
 def test_model_ranker(
     regressor_parameters: MultiRegressorParameterSpace, sample: Sample, n_jobs: int
 ) -> None:

From 266d79e495dab050c05c154e924901bc87f72fd6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= <mat646@gmail.com>
Date: Mon, 24 Jan 2022 18:25:53 +0100
Subject: [PATCH 057/106] API: set default candidate to None

---
 src/facet/selection/_parameters.py | 2 +-
 test/test/facet/test_selection.py  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index a43ea6fe..556bf46c 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -291,7 +291,7 @@ def __init__(
 
         super().__init__(
             estimator=cls_pipeline(
-                [(MultiEstimatorParameterSpace.STEP_CANDIDATE, candidates[0].estimator)]
+                [(MultiEstimatorParameterSpace.STEP_CANDIDATE, None)]
             )
         )
 
diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
index 19078800..918bcf30 100644
--- a/test/test/facet/test_selection.py
+++ b/test/test/facet/test_selection.py
@@ -230,7 +230,7 @@ def test_parameter_space(
 
     assert freeze(mps.to_expression()) == freeze(
         Id.MultiRegressorParameterSpace(
-            Id.PipelineDF(steps=[("candidate", pipeline_1.to_expression())]),
+            Id.PipelineDF(steps=[("candidate", None)]),
             [
                 Id.ParameterSpace(
                     candidate=pipeline_1.to_expression(),
@@ -256,7 +256,7 @@ def test_parameter_space(
     )
 
     assert type(mps.estimator) == PipelineDF
-    assert mps.estimator.steps == [("candidate", pipeline_1)]
+    assert mps.estimator.steps == [("candidate", None)]
 
     assert mps.parameters == [
         {

From 282ad4e34031e5c4e6a5c890ca17deff74c29680 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= <mat646@gmail.com>
Date: Tue, 25 Jan 2022 14:13:55 +0100
Subject: [PATCH 058/106] API: add unlift method to ParameterSpace

---
 src/facet/selection/_parameters.py      | 10 ++++++++++
 src/facet/selection/_selection.py       |  2 +-
 src/facet/selection/base/_parameters.py | 11 +++++++++++
 test/test/facet/test_selection.py       |  4 ++--
 4 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index 556bf46c..1166766b 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -149,6 +149,11 @@ def get_parameters(self, prefix: Optional[str] = None) -> ParameterDict:
             for (name, values) in self._iter_parameters([prefix] if prefix else [])
         }
 
+    @staticmethod
+    def unlift_estimator(estimator: T_Estimator) -> T_Estimator:
+        """[see superclass]"""
+        return estimator
+
     def _validate_parameter(self, name: str, value: ParameterSet) -> None:
 
         if name not in self._params:
@@ -318,6 +323,11 @@ def get_parameters(self, prefix: Optional[str] = None) -> List[ParameterDict]:
             for candidate in self.candidates
         ]
 
+    @staticmethod
+    def unlift_estimator(estimator: T_Estimator) -> T_Estimator:
+        """[see superclass]"""
+        return estimator.steps[0][1]
+
     def to_expression(self) -> "Expression":
         """[see superclass]"""
         # noinspection PyProtectedMember
diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index 011bcb9e..719d7f67 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -199,7 +199,7 @@ def best_estimator_(self) -> T_LearnerPipelineDF:
         self._ensure_fitted()
         searcher = self.searcher_
         if searcher.refit:
-            return searcher.best_estimator_
+            return self.parameter_space.unlift_estimator(searcher.best_estimator_)
         else:
             raise AttributeError(
                 "best_model_ is not defined; use a CV searcher with refit=True"
diff --git a/src/facet/selection/base/_parameters.py b/src/facet/selection/base/_parameters.py
index 3670fa14..35b1f0bc 100644
--- a/src/facet/selection/base/_parameters.py
+++ b/src/facet/selection/base/_parameters.py
@@ -91,5 +91,16 @@ def get_parameters(
         """
         pass
 
+    @staticmethod
+    @abstractmethod
+    def unlift_estimator(estimator: T_Estimator) -> T_Estimator:
+        """
+        Unlift passed estimator from a context.
+
+        :param estimator: estimator in a context
+        :return: unlifted estimator
+        """
+        pass
+
 
 __tracker.validate()
diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
index 918bcf30..0bab36ae 100644
--- a/test/test/facet/test_selection.py
+++ b/test/test/facet/test_selection.py
@@ -84,7 +84,7 @@ def test_model_ranker(
 
     log.debug(f"\n{ranker.summary_report()}")
 
-    assert isinstance(ranker.best_estimator_.steps[0][1], RegressorPipelineDF)
+    assert isinstance(ranker.best_estimator_, RegressorPipelineDF)
 
     ranking = ranker.summary_report()
     ranking_score = ranking["mean_test_score"]
@@ -297,7 +297,7 @@ def test_learner_ranker(
         n_jobs=n_jobs,
     ).fit(sample=sample)
 
-    assert isinstance(ranker.best_estimator_, PipelineDF)
+    assert isinstance(ranker.best_estimator_, RegressorPipelineDF)
 
     report_df = ranker.summary_report()
     log.debug(report_df.columns.tolist())

From 7fb158487c211962b0fa7695ea4a19d1094d6986 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= <mat646@gmail.com>
Date: Thu, 27 Jan 2022 15:30:38 +0100
Subject: [PATCH 059/106] API: introduce CandidateEstimatorDF

---
 src/facet/selection/_parameters.py      | 107 +++++++++++++++++-------
 src/facet/selection/_selection.py       |   2 +-
 src/facet/selection/base/_parameters.py |  11 ---
 test/test/facet/test_selection.py       |   7 +-
 4 files changed, 79 insertions(+), 48 deletions(-)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index 1166766b..3298e86d 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -4,6 +4,7 @@
 
 import logging
 import warnings
+from abc import ABCMeta
 from typing import (
     Any,
     Collection,
@@ -12,6 +13,7 @@
     Iterable,
     Iterator,
     List,
+    Mapping,
     Optional,
     Set,
     Tuple,
@@ -20,21 +22,22 @@
     Union,
 )
 
+import pandas as pd
 from scipy import stats
 from sklearn.base import BaseEstimator
-from sklearn.pipeline import Pipeline
 
 from pytools.api import AllTracker, inheritdoc, subsdoc, to_list, validate_element_types
 from pytools.expression import Expression, make_expression
 from pytools.expression.atomic import Id
 from sklearndf import EstimatorDF
-from sklearndf.pipeline import ClassifierPipelineDF, PipelineDF, RegressorPipelineDF
+from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
 
 from .base import BaseParameterSpace
 
 log = logging.getLogger(__name__)
 
 __all__ = [
+    "CandidateEstimatorDF",
     "MultiClassifierParameterSpace",
     "MultiEstimatorParameterSpace",
     "MultiRegressorParameterSpace",
@@ -57,6 +60,7 @@
 # Type variables
 #
 
+T_Self = TypeVar("T_Self")
 T_Estimator = TypeVar("T_Estimator", bound=BaseEstimator)
 
 
@@ -120,7 +124,7 @@ def __init__(self, estimator: T_Estimator) -> None:
         :param estimator: the estimator to which to apply the parameters to
         """
 
-        super().__init__(estimator=estimator)
+        super().__init__(estimator=CandidateEstimatorDF(estimator))
 
         params: Dict[str, Any] = {
             name: param
@@ -149,16 +153,12 @@ def get_parameters(self, prefix: Optional[str] = None) -> ParameterDict:
             for (name, values) in self._iter_parameters([prefix] if prefix else [])
         }
 
-    @staticmethod
-    def unlift_estimator(estimator: T_Estimator) -> T_Estimator:
-        """[see superclass]"""
-        return estimator
-
     def _validate_parameter(self, name: str, value: ParameterSet) -> None:
 
         if name not in self._params:
             raise AttributeError(
-                f"unknown parameter name for {type(self.estimator).__name__}: {name}"
+                f"unknown parameter name for "
+                f"{type(self.estimator.candidate).__name__}: {name}"
             )
 
         if not (
@@ -248,10 +248,10 @@ def _values_to_expression(values: ParameterSet) -> Expression:
 
         if path_prefix:
             return Id(type(self))(
-                **{".".join(path_prefix): self.estimator}, **parameters
+                **{".".join(path_prefix): self.estimator.candidate}, **parameters
             )
         else:
-            return Id(type(self))(self.estimator, **parameters)
+            return Id(type(self))(self.estimator.candidate, **parameters)
 
 
 @inheritdoc(match="""[see superclass]""")
@@ -287,18 +287,7 @@ def __init__(
         if len(candidates) == 0:
             raise TypeError("no parameter space passed; need to pass at least one")
 
-        if all(
-            isinstance(candidate.estimator, EstimatorDF) for candidate in candidates
-        ):
-            cls_pipeline = PipelineDF
-        else:
-            cls_pipeline = Pipeline
-
-        super().__init__(
-            estimator=cls_pipeline(
-                [(MultiEstimatorParameterSpace.STEP_CANDIDATE, None)]
-            )
-        )
+        super().__init__(estimator=CandidateEstimatorDF())
 
         self.candidates = candidates
         self.estimator_type = estimator_type
@@ -315,7 +304,9 @@ def get_parameters(self, prefix: Optional[str] = None) -> List[ParameterDict]:
         """[see superclass]"""
         return [
             {
-                MultiEstimatorParameterSpace.STEP_CANDIDATE: [candidate.estimator],
+                MultiEstimatorParameterSpace.STEP_CANDIDATE: [
+                    candidate.estimator.candidate
+                ],
                 **candidate.get_parameters(
                     prefix=MultiEstimatorParameterSpace.STEP_CANDIDATE
                 ),
@@ -323,16 +314,11 @@ def get_parameters(self, prefix: Optional[str] = None) -> List[ParameterDict]:
             for candidate in self.candidates
         ]
 
-    @staticmethod
-    def unlift_estimator(estimator: T_Estimator) -> T_Estimator:
-        """[see superclass]"""
-        return estimator.steps[0][1]
-
     def to_expression(self) -> "Expression":
         """[see superclass]"""
         # noinspection PyProtectedMember
         return Id(type(self))(
-            self.estimator,
+            self.estimator.candidate,
             [
                 candidate._to_expression(
                     path_prefix=MultiEstimatorParameterSpace.STEP_CANDIDATE
@@ -372,6 +358,63 @@ def __init__(
         super().__init__(*candidates, estimator_type=estimator_type)
 
 
+@inheritdoc(match="""[see superclass]""")
+class CandidateEstimatorDF(EstimatorDF, metaclass=ABCMeta):
+    """
+    Wrapper class providing proper unboxing of `estimator` member for
+    :class:`.ParameterSpace` and class:`.MultiEstimatorParameterSpace` classes.
+    """
+
+    def __init__(self, candidate: Optional[EstimatorDF] = None) -> None:
+        """
+        :param candidate: estimator to be wrapped as candidate
+        """
+        self.candidate = candidate
+
+    def fit(
+        self: T_Self,
+        X: pd.DataFrame,
+        y: Optional[Union[pd.Series, pd.DataFrame]] = None,
+        **fit_params: Any,
+    ) -> T_Self:
+        """[see superclass]"""
+        self.candidate.fit(X, y, **fit_params)
+        return self
+
+    @property
+    def is_fitted(self) -> bool:
+        """[see superclass]"""
+        return False if self.candidate is None else self.candidate.is_fitted
+
+    def _get_features_in(self) -> pd.Index:
+        return self.candidate._get_features_in()
+
+    def _get_n_outputs(self) -> int:
+        return self.candidate._get_n_outputs()
+
+    def get_params(self, deep: bool = True) -> Mapping[str, Any]:
+        """[see superclass]"""
+        return {
+            "candidate": self.candidate,
+        }
+
+    def set_params(self, **params: Any) -> Any:
+        """[see superclass]"""
+        if "candidate" in params:
+            self.candidate = params["candidate"]
+            del params["candidate"]
+            params = {k[11:]: v for k, v in params.items()}
+        self.candidate.set_params(**params)
+        return self
+
+    def to_expression(self) -> "Expression":
+        """[see superclass]"""
+        return self.candidate.to_expression()
+
+    def __getattr__(self, item):
+        return getattr(self.candidate, item)
+
+
 __tracker.validate()
 
 
@@ -409,9 +452,9 @@ def validate_candidates(
     """
 
     non_compliant_candidate_estimators: Set[str] = {
-        type(candidate.estimator).__name__
+        type(candidate.estimator.candidate).__name__
         for candidate in candidates
-        if not isinstance(candidate.estimator, expected_estimator_type)
+        if not isinstance(candidate.estimator.candidate, expected_estimator_type)
     }
     if non_compliant_candidate_estimators:
         raise TypeError(
diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index 719d7f67..57bfa29f 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -199,7 +199,7 @@ def best_estimator_(self) -> T_LearnerPipelineDF:
         self._ensure_fitted()
         searcher = self.searcher_
         if searcher.refit:
-            return self.parameter_space.unlift_estimator(searcher.best_estimator_)
+            return searcher.best_estimator_.candidate
         else:
             raise AttributeError(
                 "best_model_ is not defined; use a CV searcher with refit=True"
diff --git a/src/facet/selection/base/_parameters.py b/src/facet/selection/base/_parameters.py
index 35b1f0bc..3670fa14 100644
--- a/src/facet/selection/base/_parameters.py
+++ b/src/facet/selection/base/_parameters.py
@@ -91,16 +91,5 @@ def get_parameters(
         """
         pass
 
-    @staticmethod
-    @abstractmethod
-    def unlift_estimator(estimator: T_Estimator) -> T_Estimator:
-        """
-        Unlift passed estimator from a context.
-
-        :param estimator: estimator in a context
-        :return: unlifted estimator
-        """
-        pass
-
 
 __tracker.validate()
diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
index 0bab36ae..06b2813f 100644
--- a/test/test/facet/test_selection.py
+++ b/test/test/facet/test_selection.py
@@ -15,7 +15,7 @@
 from pytools.expression.atomic import Id
 from sklearndf import TransformerDF
 from sklearndf.classification import SVCDF
-from sklearndf.pipeline import ClassifierPipelineDF, PipelineDF, RegressorPipelineDF
+from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
 from sklearndf.regression import (
     AdaBoostRegressorDF,
     LinearRegressionDF,
@@ -230,7 +230,7 @@ def test_parameter_space(
 
     assert freeze(mps.to_expression()) == freeze(
         Id.MultiRegressorParameterSpace(
-            Id.PipelineDF(steps=[("candidate", None)]),
+            None,
             [
                 Id.ParameterSpace(
                     candidate=pipeline_1.to_expression(),
@@ -255,8 +255,7 @@ def test_parameter_space(
         )
     )
 
-    assert type(mps.estimator) == PipelineDF
-    assert mps.estimator.steps == [("candidate", None)]
+    assert mps.estimator.candidate is None
 
     assert mps.parameters == [
         {

From a2eb45b0066a40092b555c3369383a2a136f9324 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= <mat646@gmail.com>
Date: Thu, 27 Jan 2022 17:55:46 +0100
Subject: [PATCH 060/106] API: rename candidate to raw_estimator

---
 src/facet/selection/_parameters.py | 47 +++++++++++++++++-------------
 src/facet/selection/_selection.py  |  2 +-
 test/test/facet/test_selection.py  |  2 +-
 3 files changed, 29 insertions(+), 22 deletions(-)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index 3298e86d..962c2b67 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -158,7 +158,7 @@ def _validate_parameter(self, name: str, value: ParameterSet) -> None:
         if name not in self._params:
             raise AttributeError(
                 f"unknown parameter name for "
-                f"{type(self.estimator.candidate).__name__}: {name}"
+                f"{type(self.estimator.raw_estimator).__name__}: {name}"
             )
 
         if not (
@@ -248,10 +248,10 @@ def _values_to_expression(values: ParameterSet) -> Expression:
 
         if path_prefix:
             return Id(type(self))(
-                **{".".join(path_prefix): self.estimator.candidate}, **parameters
+                **{".".join(path_prefix): self.estimator.raw_estimator}, **parameters
             )
         else:
-            return Id(type(self))(self.estimator.candidate, **parameters)
+            return Id(type(self))(self.estimator.raw_estimator, **parameters)
 
 
 @inheritdoc(match="""[see superclass]""")
@@ -287,7 +287,7 @@ def __init__(
         if len(candidates) == 0:
             raise TypeError("no parameter space passed; need to pass at least one")
 
-        super().__init__(estimator=CandidateEstimatorDF())
+        super().__init__(estimator=CandidateEstimatorDF.empty())
 
         self.candidates = candidates
         self.estimator_type = estimator_type
@@ -305,7 +305,7 @@ def get_parameters(self, prefix: Optional[str] = None) -> List[ParameterDict]:
         return [
             {
                 MultiEstimatorParameterSpace.STEP_CANDIDATE: [
-                    candidate.estimator.candidate
+                    candidate.estimator.raw_estimator
                 ],
                 **candidate.get_parameters(
                     prefix=MultiEstimatorParameterSpace.STEP_CANDIDATE
@@ -318,7 +318,7 @@ def to_expression(self) -> "Expression":
         """[see superclass]"""
         # noinspection PyProtectedMember
         return Id(type(self))(
-            self.estimator.candidate,
+            self.estimator.raw_estimator,
             [
                 candidate._to_expression(
                     path_prefix=MultiEstimatorParameterSpace.STEP_CANDIDATE
@@ -367,9 +367,16 @@ class CandidateEstimatorDF(EstimatorDF, metaclass=ABCMeta):
 
     def __init__(self, candidate: Optional[EstimatorDF] = None) -> None:
         """
-        :param candidate: estimator to be wrapped as candidate
+        :param candidate: candidate estimator to be wrapped
         """
-        self.candidate = candidate
+        self.raw_estimator = candidate
+
+    @classmethod
+    def empty(cls) -> "CandidateEstimatorDF":
+        """
+        :return: new instance with an empty candidate
+        """
+        return cls()
 
     def fit(
         self: T_Self,
@@ -378,41 +385,41 @@ def fit(
         **fit_params: Any,
     ) -> T_Self:
         """[see superclass]"""
-        self.candidate.fit(X, y, **fit_params)
+        self.raw_estimator.fit(X, y, **fit_params)
         return self
 
     @property
     def is_fitted(self) -> bool:
         """[see superclass]"""
-        return False if self.candidate is None else self.candidate.is_fitted
+        return False if self.raw_estimator is None else self.raw_estimator.is_fitted
 
     def _get_features_in(self) -> pd.Index:
-        return self.candidate._get_features_in()
+        return self.raw_estimator._get_features_in()
 
     def _get_n_outputs(self) -> int:
-        return self.candidate._get_n_outputs()
+        return self.raw_estimator._get_n_outputs()
 
     def get_params(self, deep: bool = True) -> Mapping[str, Any]:
         """[see superclass]"""
         return {
-            "candidate": self.candidate,
+            "candidate": self.raw_estimator,
         }
 
     def set_params(self, **params: Any) -> Any:
         """[see superclass]"""
         if "candidate" in params:
-            self.candidate = params["candidate"]
+            self.raw_estimator = params["candidate"]
             del params["candidate"]
-            params = {k[11:]: v for k, v in params.items()}
-        self.candidate.set_params(**params)
+            params = {k[len("candidate__") :]: v for k, v in params.items()}
+        self.raw_estimator.set_params(**params)
         return self
 
     def to_expression(self) -> "Expression":
         """[see superclass]"""
-        return self.candidate.to_expression()
+        return self.raw_estimator.to_expression()
 
     def __getattr__(self, item):
-        return getattr(self.candidate, item)
+        return getattr(self.raw_estimator, item)
 
 
 __tracker.validate()
@@ -452,9 +459,9 @@ def validate_candidates(
     """
 
     non_compliant_candidate_estimators: Set[str] = {
-        type(candidate.estimator.candidate).__name__
+        type(candidate.estimator.raw_estimator).__name__
         for candidate in candidates
-        if not isinstance(candidate.estimator.candidate, expected_estimator_type)
+        if not isinstance(candidate.estimator.raw_estimator, expected_estimator_type)
     }
     if non_compliant_candidate_estimators:
         raise TypeError(
diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index 57bfa29f..ccb145f6 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -199,7 +199,7 @@ def best_estimator_(self) -> T_LearnerPipelineDF:
         self._ensure_fitted()
         searcher = self.searcher_
         if searcher.refit:
-            return searcher.best_estimator_.candidate
+            return searcher.best_estimator_.raw_estimator
         else:
             raise AttributeError(
                 "best_model_ is not defined; use a CV searcher with refit=True"
diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
index 06b2813f..c214b737 100644
--- a/test/test/facet/test_selection.py
+++ b/test/test/facet/test_selection.py
@@ -255,7 +255,7 @@ def test_parameter_space(
         )
     )
 
-    assert mps.estimator.candidate is None
+    assert mps.estimator.raw_estimator is None
 
     assert mps.parameters == [
         {

From 9a62324d2faf1a6db3a62778be62f614b099a2f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= <mat646@gmail.com>
Date: Thu, 27 Jan 2022 18:11:20 +0100
Subject: [PATCH 061/106] API: add scoring preprocessing

---
 src/facet/selection/_selection.py | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index ccb145f6..7dfcd113 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -21,12 +21,17 @@
 import numpy as np
 import pandas as pd
 from numpy.random.mtrand import RandomState
+from sklearn.metrics import check_scoring
 from sklearn.model_selection import BaseCrossValidator, GridSearchCV
 
 from pytools.api import AllTracker, inheritdoc
 from pytools.fit import FittableMixin
 from pytools.parallelization import ParallelizableMixin
-from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
+from sklearndf.pipeline import (
+    ClassifierPipelineDF,
+    LearnerPipelineDF,
+    RegressorPipelineDF,
+)
 
 from facet.data import Sample
 from facet.selection.base import BaseParameterSpace
@@ -180,6 +185,9 @@ def __init__(
                 + ", ".join(unsupported_params)
             )
 
+        if type(self.scoring) == str:
+            self.scoring = self._preprocess_scoring(self.scoring)
+
         self.searcher_ = None
 
     __init__.__doc__ = __init__.__doc__.replace(
@@ -191,6 +199,25 @@ def is_fitted(self) -> bool:
         """[see superclass]"""
         return self.searcher_ is not None
 
+    @staticmethod
+    def _preprocess_scoring(scoring: str):
+        def _score_fn(estimator, X: pd.DataFrame, y: pd.Series):
+            estimator = estimator.raw_estimator
+
+            if isinstance(estimator, LearnerPipelineDF):
+                if estimator.preprocessing:
+                    X = estimator.preprocessing.transform(X=X)
+                estimator = estimator.final_estimator
+
+            scorer = check_scoring(
+                estimator=estimator.native_estimator,
+                scoring=scoring,
+            )
+
+            return scorer(estimator.native_estimator, X.values, y.values)
+
+        return _score_fn
+
     @property
     def best_estimator_(self) -> T_LearnerPipelineDF:
         """

From 77fa9a358d0b1a9c728f04a4cec1f455b23221b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= <mat646@gmail.com>
Date: Sat, 29 Jan 2022 14:53:37 +0100
Subject: [PATCH 062/106] API: refactor CandidateEstimatorDF

---
 src/facet/selection/_parameters.py | 126 +++++++++++++++++------------
 src/facet/selection/_selection.py  |   4 +-
 test/test/conftest.py              |  23 +++---
 test/test/facet/test_selection.py  |   2 +-
 4 files changed, 91 insertions(+), 64 deletions(-)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index 962c2b67..41465d0b 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -4,7 +4,6 @@
 
 import logging
 import warnings
-from abc import ABCMeta
 from typing import (
     Any,
     Collection,
@@ -13,8 +12,8 @@
     Iterable,
     Iterator,
     List,
-    Mapping,
     Optional,
+    Sequence,
     Set,
     Tuple,
     Type,
@@ -29,7 +28,7 @@
 from pytools.api import AllTracker, inheritdoc, subsdoc, to_list, validate_element_types
 from pytools.expression import Expression, make_expression
 from pytools.expression.atomic import Id
-from sklearndf import EstimatorDF
+from sklearndf import ClassifierDF, EstimatorDF, RegressorDF, TransformerDF
 from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
 
 from .base import BaseParameterSpace
@@ -119,6 +118,8 @@ class ParameterSpace(BaseParameterSpace[T_Estimator], Generic[T_Estimator]):
 
     """
 
+    STEP_CANDIDATE = "candidate"
+
     def __init__(self, estimator: T_Estimator) -> None:
         """
         :param estimator: the estimator to which to apply the parameters to
@@ -145,7 +146,7 @@ def __init__(self, estimator: T_Estimator) -> None:
         replacement="",
         using=BaseParameterSpace.get_parameters,
     )
-    def get_parameters(self, prefix: Optional[str] = None) -> ParameterDict:
+    def get_parameters(self, prefix: Optional[str] = STEP_CANDIDATE) -> ParameterDict:
         """[see superclass]"""
 
         return {
@@ -158,7 +159,7 @@ def _validate_parameter(self, name: str, value: ParameterSet) -> None:
         if name not in self._params:
             raise AttributeError(
                 f"unknown parameter name for "
-                f"{type(self.estimator.raw_estimator).__name__}: {name}"
+                f"{type(self.estimator.candidate).__name__}: {name}"
             )
 
         if not (
@@ -248,10 +249,10 @@ def _values_to_expression(values: ParameterSet) -> Expression:
 
         if path_prefix:
             return Id(type(self))(
-                **{".".join(path_prefix): self.estimator.raw_estimator}, **parameters
+                **{".".join(path_prefix): self.estimator.candidate}, **parameters
             )
         else:
-            return Id(type(self))(self.estimator.raw_estimator, **parameters)
+            return Id(type(self))(self.estimator.candidate, **parameters)
 
 
 @inheritdoc(match="""[see superclass]""")
@@ -266,8 +267,6 @@ class MultiEstimatorParameterSpace(
     spaces.
     """
 
-    STEP_CANDIDATE = "candidate"
-
     #: The estimator base type which all candidate estimators must implement.
     estimator_type: Type[T_Estimator]
 
@@ -304,12 +303,8 @@ def get_parameters(self, prefix: Optional[str] = None) -> List[ParameterDict]:
         """[see superclass]"""
         return [
             {
-                MultiEstimatorParameterSpace.STEP_CANDIDATE: [
-                    candidate.estimator.raw_estimator
-                ],
-                **candidate.get_parameters(
-                    prefix=MultiEstimatorParameterSpace.STEP_CANDIDATE
-                ),
+                ParameterSpace.STEP_CANDIDATE: [candidate.estimator.candidate],
+                **candidate.get_parameters(),
             }
             for candidate in self.candidates
         ]
@@ -318,11 +313,9 @@ def to_expression(self) -> "Expression":
         """[see superclass]"""
         # noinspection PyProtectedMember
         return Id(type(self))(
-            self.estimator.raw_estimator,
+            self.estimator.candidate,
             [
-                candidate._to_expression(
-                    path_prefix=MultiEstimatorParameterSpace.STEP_CANDIDATE
-                )
+                candidate._to_expression(path_prefix=ParameterSpace.STEP_CANDIDATE)
                 for candidate in self.candidates
             ],
         )
@@ -359,17 +352,20 @@ def __init__(
 
 
 @inheritdoc(match="""[see superclass]""")
-class CandidateEstimatorDF(EstimatorDF, metaclass=ABCMeta):
+class CandidateEstimatorDF(ClassifierDF, RegressorDF, TransformerDF):
     """
-    Wrapper class providing proper unboxing of `estimator` member for
-    :class:`.ParameterSpace` and class:`.MultiEstimatorParameterSpace` classes.
+    Metaclass providing representation for candidate estimator to be used in
+    hyperparameter search. Unifies evaluation approach for :class:`.ParameterSpace`
+    and class:`.MultiEstimatorParameterSpace`. For the latter it provides "empty"
+    candidate where actual estimator is a hyperparameter itself.
     """
 
     def __init__(self, candidate: Optional[EstimatorDF] = None) -> None:
         """
-        :param candidate: candidate estimator to be wrapped
+        :param candidate: candidate estimator. If None then estimators to be evaluated
+                          should be provided in the parameter grid under "candidate" key
         """
-        self.raw_estimator = candidate
+        self.candidate = candidate
 
     @classmethod
     def empty(cls) -> "CandidateEstimatorDF":
@@ -378,6 +374,47 @@ def empty(cls) -> "CandidateEstimatorDF":
         """
         return cls()
 
+    @property
+    def classes_(self) -> Sequence[Any]:
+        """[see superclass]"""
+        return self.candidate.classes_
+
+    def predict_proba(
+        self, X: pd.DataFrame, **predict_params: Any
+    ) -> Union[pd.DataFrame, List[pd.DataFrame]]:
+        """[see superclass]"""
+        return self.candidate.predict_proba(X, **predict_params)
+
+    def predict_log_proba(
+        self, X: pd.DataFrame, **predict_params: Any
+    ) -> Union[pd.DataFrame, List[pd.DataFrame]]:
+        """[see superclass]"""
+        return self.candidate.predict_log_proba(X, **predict_params)
+
+    def decision_function(
+        self, X: pd.DataFrame, **predict_params: Any
+    ) -> Union[pd.Series, pd.DataFrame]:
+        """[see superclass]"""
+        return self.candidate.decision_function(X, **predict_params)
+
+    def score(
+        self, X: pd.DataFrame, y: pd.Series, sample_weight: Optional[pd.Series] = None
+    ) -> float:
+        """[see superclass]"""
+        return self.candidate.score(X, y, sample_weight)
+
+    def predict(
+        self, X: pd.DataFrame, **predict_params: Any
+    ) -> Union[pd.Series, pd.DataFrame]:
+        """[see superclass]"""
+        return self.candidate.predic(X, **predict_params)
+
+    def fit_predict(
+        self, X: pd.DataFrame, y: pd.Series, **fit_params: Any
+    ) -> Union[pd.Series, pd.DataFrame]:
+        """[see superclass]"""
+        return self.candidate.fit_predict(X, y, **fit_params)
+
     def fit(
         self: T_Self,
         X: pd.DataFrame,
@@ -385,41 +422,30 @@ def fit(
         **fit_params: Any,
     ) -> T_Self:
         """[see superclass]"""
-        self.raw_estimator.fit(X, y, **fit_params)
+        self.candidate.fit(X, y, **fit_params)
         return self
 
     @property
     def is_fitted(self) -> bool:
         """[see superclass]"""
-        return False if self.raw_estimator is None else self.raw_estimator.is_fitted
-
-    def _get_features_in(self) -> pd.Index:
-        return self.raw_estimator._get_features_in()
-
-    def _get_n_outputs(self) -> int:
-        return self.raw_estimator._get_n_outputs()
+        return False if self.candidate is None else self.candidate.is_fitted
 
-    def get_params(self, deep: bool = True) -> Mapping[str, Any]:
+    def inverse_transform(self, X: pd.DataFrame) -> pd.DataFrame:
         """[see superclass]"""
-        return {
-            "candidate": self.raw_estimator,
-        }
+        return self.candidate.inverse_transform(X)
 
-    def set_params(self, **params: Any) -> Any:
+    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         """[see superclass]"""
-        if "candidate" in params:
-            self.raw_estimator = params["candidate"]
-            del params["candidate"]
-            params = {k[len("candidate__") :]: v for k, v in params.items()}
-        self.raw_estimator.set_params(**params)
-        return self
+        return self.candidate.transform(X)
 
-    def to_expression(self) -> "Expression":
-        """[see superclass]"""
-        return self.raw_estimator.to_expression()
+    def _get_features_in(self) -> pd.Index:
+        return self.candidate._get_features_in()
+
+    def _get_n_outputs(self) -> int:
+        return self.candidate._get_n_outputs()
 
-    def __getattr__(self, item):
-        return getattr(self.raw_estimator, item)
+    def _get_features_original(self) -> pd.Series:
+        return self.candidate._get_features_original()
 
 
 __tracker.validate()
@@ -459,9 +485,9 @@ def validate_candidates(
     """
 
     non_compliant_candidate_estimators: Set[str] = {
-        type(candidate.estimator.raw_estimator).__name__
+        type(candidate.estimator.candidate).__name__
         for candidate in candidates
-        if not isinstance(candidate.estimator.raw_estimator, expected_estimator_type)
+        if not isinstance(candidate.estimator.candidate, expected_estimator_type)
     }
     if non_compliant_candidate_estimators:
         raise TypeError(
diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index 7dfcd113..c72b9ce7 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -202,7 +202,7 @@ def is_fitted(self) -> bool:
     @staticmethod
     def _preprocess_scoring(scoring: str):
         def _score_fn(estimator, X: pd.DataFrame, y: pd.Series):
-            estimator = estimator.raw_estimator
+            estimator = estimator.candidate
 
             if isinstance(estimator, LearnerPipelineDF):
                 if estimator.preprocessing:
@@ -226,7 +226,7 @@ def best_estimator_(self) -> T_LearnerPipelineDF:
         self._ensure_fitted()
         searcher = self.searcher_
         if searcher.refit:
-            return searcher.best_estimator_.raw_estimator
+            return searcher.best_estimator_.candidate
         else:
             raise AttributeError(
                 "best_model_ is not defined; use a CV searcher with refit=True"
diff --git a/test/test/conftest.py b/test/test/conftest.py
index b047defb..aa528d06 100644
--- a/test/test/conftest.py
+++ b/test/test/conftest.py
@@ -342,18 +342,21 @@ def check_ranking(
     CLASSIFIER_STR = "classifier"
     REGRESSOR_STR = "regressor"
     PARAM_CANDIDATE_STR = "param_candidate"
+    ESTIMATOR_COLUMN = ""
 
     def _select_parameters(
         param_column: str, rank: int, learner_str: Optional[str]
     ) -> Tuple[dict, Optional[LearnerDF]]:
-        if param_column == PARAM_CANDIDATE_STR:
-            raw_parameters = ranking[param_column][learner_str].iloc[rank].to_dict()
-            return (
-                {k: v for k, v in raw_parameters.items() if v is not np.nan},
-                ranking[param_column].iloc[:, 0].iloc[rank],
-            )
-        else:
-            return ranking[param_column].iloc[rank].to_dict(), None
+        raw_parameters = ranking[param_column][learner_str].iloc[rank].to_dict()
+        estimator = (
+            ranking[param_column][ESTIMATOR_COLUMN].iloc[rank]
+            if ESTIMATOR_COLUMN in ranking[param_column]
+            else None
+        )
+        return (
+            {k: v for k, v in raw_parameters.items() if v is not np.nan},
+            estimator,
+        )
 
     for rank, score_expected in enumerate(expected_scores):
         score_actual = round(ranking[SCORE_COLUMN].iloc[rank], 3)
@@ -363,9 +366,7 @@ def _select_parameters(
         )
 
     learner_str = CLASSIFIER_STR if is_classifier else REGRESSOR_STR
-    param_column = f"param_{learner_str}"
-    if expected_learners is not None:
-        param_column = PARAM_CANDIDATE_STR
+    param_column = PARAM_CANDIDATE_STR
 
     if expected_parameters is not None:
         for rank, parameters_expected in expected_parameters.items():
diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
index c214b737..06b2813f 100644
--- a/test/test/facet/test_selection.py
+++ b/test/test/facet/test_selection.py
@@ -255,7 +255,7 @@ def test_parameter_space(
         )
     )
 
-    assert mps.estimator.raw_estimator is None
+    assert mps.estimator.candidate is None
 
     assert mps.parameters == [
         {

From f6b21659ff806f3bd95501e9b7f6f6de406901d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= <mat646@gmail.com>
Date: Tue, 1 Feb 2022 18:23:04 +0100
Subject: [PATCH 063/106] API: introduce name parameter

---
 src/facet/selection/_parameters.py | 75 +++++++++++++++++-------------
 test/test/conftest.py              |  2 +-
 test/test/facet/test_selection.py  |  8 +++-
 3 files changed, 50 insertions(+), 35 deletions(-)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index 41465d0b..feffbc22 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -98,7 +98,8 @@ class ParameterSpace(BaseParameterSpace[T_Estimator], Generic[T_Estimator]):
             RegressorPipelineDF(
                 regressor=RandomForestRegressorDF(random_state=42),
                 preprocessing=simple_preprocessor,
-            )
+            ),
+            candidate_name="rf_candidate"
         )
         ps.regressor.min_weight_fraction_leaf = scipy.stats.loguniform(0.01, 0.1)
         ps.regressor.max_depth = [3, 4, 5, 7, 10]
@@ -119,13 +120,15 @@ class ParameterSpace(BaseParameterSpace[T_Estimator], Generic[T_Estimator]):
     """
 
     STEP_CANDIDATE = "candidate"
+    STEP_NAME = "candidate_name"
 
-    def __init__(self, estimator: T_Estimator) -> None:
+    def __init__(self, estimator: T_Estimator, name: Optional[str] = None) -> None:
         """
         :param estimator: the estimator to which to apply the parameters to
+        :param name: a name for the estimator to be used in the report data frame
         """
 
-        super().__init__(estimator=CandidateEstimatorDF(estimator))
+        super().__init__(estimator=CandidateEstimatorDF(estimator, name))
 
         params: Dict[str, Any] = {
             name: param
@@ -272,23 +275,23 @@ class MultiEstimatorParameterSpace(
 
     def __init__(
         self,
-        *candidates: ParameterSpace[T_Estimator],
+        *spaces: ParameterSpace[T_Estimator],
         estimator_type: Type[T_Estimator],
     ) -> None:
         """
-        :param candidates: the parameter spaces from which to select the best estimator
+        :param spaces: the parameter spaces from which to select the best estimator
         :param estimator_type: the estimator base type which all candidate estimators
             must implement
         """
-        validate_element_types(candidates, expected_type=ParameterSpace)
-        validate_candidates(candidates, expected_estimator_type=estimator_type)
+        validate_element_types(spaces, expected_type=ParameterSpace)
+        validate_spaces(spaces, expected_estimator_type=estimator_type)
 
-        if len(candidates) == 0:
+        if len(spaces) == 0:
             raise TypeError("no parameter space passed; need to pass at least one")
 
         super().__init__(estimator=CandidateEstimatorDF.empty())
 
-        self.candidates = candidates
+        self.spaces = spaces
         self.estimator_type = estimator_type
 
     @subsdoc(
@@ -303,10 +306,11 @@ def get_parameters(self, prefix: Optional[str] = None) -> List[ParameterDict]:
         """[see superclass]"""
         return [
             {
-                ParameterSpace.STEP_CANDIDATE: [candidate.estimator.candidate],
-                **candidate.get_parameters(),
+                ParameterSpace.STEP_CANDIDATE: [space.estimator.candidate],
+                ParameterSpace.STEP_NAME: [space.estimator.candidate_name],
+                **space.get_parameters(),
             }
-            for candidate in self.candidates
+            for space in self.spaces
         ]
 
     def to_expression(self) -> "Expression":
@@ -315,8 +319,8 @@ def to_expression(self) -> "Expression":
         return Id(type(self))(
             self.estimator.candidate,
             [
-                candidate._to_expression(path_prefix=ParameterSpace.STEP_CANDIDATE)
-                for candidate in self.candidates
+                space._to_expression(path_prefix=ParameterSpace.STEP_CANDIDATE)
+                for space in self.spaces
             ],
         )
 
@@ -328,12 +332,12 @@ class MultiRegressorParameterSpace(MultiEstimatorParameterSpace[RegressorPipelin
 
     def __init__(
         self,
-        *candidates: ParameterSpace[RegressorPipelineDF],
+        *spaces: ParameterSpace[RegressorPipelineDF],
         estimator_type: Type[RegressorPipelineDF] = RegressorPipelineDF,
     ) -> None:
         """[see superclass]"""
         ensure_subclass(estimator_type, RegressorPipelineDF)
-        super().__init__(*candidates, estimator_type=estimator_type)
+        super().__init__(*spaces, estimator_type=estimator_type)
 
 
 @subsdoc(pattern="a competing estimator", replacement="a competing classifier pipeline")
@@ -343,12 +347,12 @@ class MultiClassifierParameterSpace(MultiEstimatorParameterSpace[ClassifierPipel
 
     def __init__(
         self,
-        *candidates: ParameterSpace[ClassifierPipelineDF],
+        *spaces: ParameterSpace[ClassifierPipelineDF],
         estimator_type: Type[ClassifierPipelineDF] = ClassifierPipelineDF,
     ) -> None:
         """[see superclass]"""
         ensure_subclass(estimator_type, ClassifierPipelineDF)
-        super().__init__(*candidates, estimator_type=estimator_type)
+        super().__init__(*spaces, estimator_type=estimator_type)
 
 
 @inheritdoc(match="""[see superclass]""")
@@ -360,17 +364,24 @@ class CandidateEstimatorDF(ClassifierDF, RegressorDF, TransformerDF):
     candidate where actual estimator is a hyperparameter itself.
     """
 
-    def __init__(self, candidate: Optional[EstimatorDF] = None) -> None:
+    def __init__(
+        self,
+        candidate: Optional[EstimatorDF] = None,
+        candidate_name: Optional[str] = None,
+    ) -> None:
         """
-        :param candidate: candidate estimator. If None then estimators to be evaluated
-                          should be provided in the parameter grid under "candidate" key
+        :param candidate: the candidate estimator. If ``None`` then estimators to be
+                          evaluated should be provided in the parameter grid under a
+                          "candidate" key.
+        :param candidate_name: a name for the candidate
         """
         self.candidate = candidate
+        self.candidate_name = candidate_name
 
     @classmethod
     def empty(cls) -> "CandidateEstimatorDF":
         """
-        :return: new instance with an empty candidate
+        :return: new candidate instance without internal estimator
         """
         return cls()
 
@@ -428,7 +439,7 @@ def fit(
     @property
     def is_fitted(self) -> bool:
         """[see superclass]"""
-        return False if self.candidate is None else self.candidate.is_fitted
+        return self.candidate is not None and self.candidate.is_fitted
 
     def inverse_transform(self, X: pd.DataFrame) -> pd.DataFrame:
         """[see superclass]"""
@@ -439,13 +450,13 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         return self.candidate.transform(X)
 
     def _get_features_in(self) -> pd.Index:
-        return self.candidate._get_features_in()
+        return self.candidate.feature_names_in_
 
     def _get_n_outputs(self) -> int:
-        return self.candidate._get_n_outputs()
+        return self.candidate.n_outputs_
 
     def _get_features_original(self) -> pd.Series:
-        return self.candidate._get_features_original()
+        return self.candidate.feature_names_original_
 
 
 __tracker.validate()
@@ -472,22 +483,22 @@ def ensure_subclass(
         )
 
 
-def validate_candidates(
-    candidates: Collection[ParameterSpace[T_Estimator]],
+def validate_spaces(
+    spaces: Collection[ParameterSpace[T_Estimator]],
     expected_estimator_type: Type[T_Estimator],
 ) -> None:
     """
     Ensure that all candidates implement a given estimator type.
 
-    :param candidates: the candidates to check
+    :param spaces: the candidates to check
     :param expected_estimator_type: the type that all candidates' estimators must
         implement
     """
 
     non_compliant_candidate_estimators: Set[str] = {
-        type(candidate.estimator.candidate).__name__
-        for candidate in candidates
-        if not isinstance(candidate.estimator.candidate, expected_estimator_type)
+        type(space.estimator.candidate).__name__
+        for space in spaces
+        if not isinstance(space.estimator.candidate, expected_estimator_type)
     }
     if non_compliant_candidate_estimators:
         raise TypeError(
diff --git a/test/test/conftest.py b/test/test/conftest.py
index aa528d06..953a6675 100644
--- a/test/test/conftest.py
+++ b/test/test/conftest.py
@@ -43,7 +43,7 @@
 
 # configure pandas text output
 pd.set_option("display.width", None)  # get display width from terminal
-pd.set_option("precision", 3)  # 3 digits precision for easier readability
+pd.set_option("display.precision", 3)  # 3 digits precision for easier readability
 
 K_FOLDS = 5
 N_BOOTSTRAPS = 30
diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
index 06b2813f..2886435a 100644
--- a/test/test/facet/test_selection.py
+++ b/test/test/facet/test_selection.py
@@ -172,7 +172,8 @@ def test_parameter_space(
         regressor=RandomForestRegressorDF(random_state=42),
         preprocessing=simple_preprocessor,
     )
-    ps_1 = ParameterSpace(pipeline_1)
+    ps_1_name = "rf_regressor"
+    ps_1 = ParameterSpace(pipeline_1, name=ps_1_name)
     ps_1.regressor.min_weight_fraction_leaf = loguniform_0_01_0_10
     ps_1.regressor.max_depth = randint_3_10
     ps_1.regressor.min_samples_leaf = loguniform_0_05_0_10
@@ -198,7 +199,8 @@ def test_parameter_space(
         regressor=LGBMRegressorDF(random_state=42),
         preprocessing=simple_preprocessor,
     )
-    ps_2 = ParameterSpace(pipeline_2)
+    ps_2_name = "lgbm"
+    ps_2 = ParameterSpace(pipeline_2, name=ps_2_name)
     ps_2.regressor.max_depth = randint_3_10
     ps_2.regressor.min_child_samples = zipfian_1_32
 
@@ -260,12 +262,14 @@ def test_parameter_space(
     assert mps.parameters == [
         {
             "candidate": [pipeline_1],
+            "candidate_name": [ps_1_name],
             "candidate__regressor__max_depth": randint_3_10,
             "candidate__regressor__min_samples_leaf": loguniform_0_05_0_10,
             "candidate__regressor__min_weight_fraction_leaf": loguniform_0_01_0_10,
         },
         {
             "candidate": [pipeline_2],
+            "candidate_name": [ps_2_name],
             "candidate__regressor__max_depth": randint_3_10,
             "candidate__regressor__min_child_samples": zipfian_1_32,
         },

From 4eef0a45e3506e55236241117e4b7d6ffe442120 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 2 Feb 2022 11:14:40 +0100
Subject: [PATCH 064/106] API: expect EstimatorDF arguments for
 BaseParameterSpace

---
 src/facet/selection/base/_parameters.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/facet/selection/base/_parameters.py b/src/facet/selection/base/_parameters.py
index 3670fa14..f5a6f7ca 100644
--- a/src/facet/selection/base/_parameters.py
+++ b/src/facet/selection/base/_parameters.py
@@ -7,10 +7,10 @@
 from typing import Any, Dict, Generic, List, Optional, TypeVar, Union
 
 from scipy import stats
-from sklearn.base import BaseEstimator
 
 from pytools.api import AllTracker
 from pytools.expression import HasExpressionRepr
+from sklearndf import EstimatorDF
 
 log = logging.getLogger(__name__)
 
@@ -30,7 +30,7 @@
 # Type variables
 #
 
-T_Estimator = TypeVar("T_Estimator", bound=BaseEstimator)
+T_Estimator = TypeVar("T_Estimator", bound=EstimatorDF)
 
 
 #
@@ -51,11 +51,14 @@ class BaseParameterSpace(HasExpressionRepr, Generic[T_Estimator], metaclass=ABCM
     optimization.
     """
 
+    #: The estimator associated with this parameter space.
+    estimator: T_Estimator
+
     def __init__(self, estimator: T_Estimator) -> None:
         """
         :param estimator: the estimator for which to capture parameters
         """
-        self._estimator: BaseEstimator = estimator
+        self._estimator = estimator
 
     @property
     def estimator(self) -> T_Estimator:

From 64244074d1a64cc04a236c870bb772dae73d5feb Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 2 Feb 2022 11:25:10 +0100
Subject: [PATCH 065/106] API: remove MultiRegressorParameterSpace,
 MultiClassifierParameterSpace

---
 src/facet/selection/_parameters.py | 33 ------------------------------
 1 file changed, 33 deletions(-)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index feffbc22..4ffde977 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -29,7 +29,6 @@
 from pytools.expression import Expression, make_expression
 from pytools.expression.atomic import Id
 from sklearndf import ClassifierDF, EstimatorDF, RegressorDF, TransformerDF
-from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
 
 from .base import BaseParameterSpace
 
@@ -37,9 +36,7 @@
 
 __all__ = [
     "CandidateEstimatorDF",
-    "MultiClassifierParameterSpace",
     "MultiEstimatorParameterSpace",
-    "MultiRegressorParameterSpace",
     "ParameterSpace",
 ]
 
@@ -325,36 +322,6 @@ def to_expression(self) -> "Expression":
         )
 
 
-@subsdoc(pattern="a competing estimator", replacement="a competing regressor pipeline")
-@inheritdoc(match="""[see superclass]""")
-class MultiRegressorParameterSpace(MultiEstimatorParameterSpace[RegressorPipelineDF]):
-    """[see superclass]"""
-
-    def __init__(
-        self,
-        *spaces: ParameterSpace[RegressorPipelineDF],
-        estimator_type: Type[RegressorPipelineDF] = RegressorPipelineDF,
-    ) -> None:
-        """[see superclass]"""
-        ensure_subclass(estimator_type, RegressorPipelineDF)
-        super().__init__(*spaces, estimator_type=estimator_type)
-
-
-@subsdoc(pattern="a competing estimator", replacement="a competing classifier pipeline")
-@inheritdoc(match="""[see superclass]""")
-class MultiClassifierParameterSpace(MultiEstimatorParameterSpace[ClassifierPipelineDF]):
-    """[see superclass]"""
-
-    def __init__(
-        self,
-        *spaces: ParameterSpace[ClassifierPipelineDF],
-        estimator_type: Type[ClassifierPipelineDF] = ClassifierPipelineDF,
-    ) -> None:
-        """[see superclass]"""
-        ensure_subclass(estimator_type, ClassifierPipelineDF)
-        super().__init__(*spaces, estimator_type=estimator_type)
-
-
 @inheritdoc(match="""[see superclass]""")
 class CandidateEstimatorDF(ClassifierDF, RegressorDF, TransformerDF):
     """

From 751a404d27bcb887e2bf45dfb884e0a6ed6faf6f Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 2 Feb 2022 11:31:57 +0100
Subject: [PATCH 066/106] API: add T_Candidate_co; update (Multi)ParamaterSpace
 class signatures

---
 src/facet/selection/_parameters.py | 69 ++++++++++++------------------
 1 file changed, 28 insertions(+), 41 deletions(-)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index 4ffde977..7d009a48 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -57,7 +57,7 @@
 #
 
 T_Self = TypeVar("T_Self")
-T_Estimator = TypeVar("T_Estimator", bound=BaseEstimator)
+T_Candidate_co = TypeVar("T_Candidate_co", covariant=True, bound=EstimatorDF)
 
 
 #
@@ -73,8 +73,7 @@
 
 
 @inheritdoc(match="""[see superclass]""")
-class ParameterSpace(BaseParameterSpace[T_Estimator], Generic[T_Estimator]):
-    # noinspection SpellCheckingInspection
+class ParameterSpace(BaseParameterSpace[T_Candidate_co], Generic[T_Candidate_co]):
     """
     A set of parameters spanning a parameter space for optimizing the hyper-parameters
     of a single estimator.
@@ -119,22 +118,25 @@ class ParameterSpace(BaseParameterSpace[T_Estimator], Generic[T_Estimator]):
     STEP_CANDIDATE = "candidate"
     STEP_NAME = "candidate_name"
 
-    def __init__(self, estimator: T_Estimator, name: Optional[str] = None) -> None:
+    def __init__(
+        self, candidate: T_Candidate_co, candidate_name: Optional[str] = None
+    ) -> None:
         """
-        :param estimator: the estimator to which to apply the parameters to
-        :param name: a name for the estimator to be used in the report data frame
+        :param candidate: the estimator candidate to which to apply the parameters to
+        :param candidate_name: a name for the estimator candidate to be used in summary
+            reports
         """
 
-        super().__init__(estimator=CandidateEstimatorDF(estimator, name))
+        super().__init__(estimator=CandidateEstimatorDF(candidate, candidate_name))
 
         params: Dict[str, Any] = {
             name: param
-            for name, param in estimator.get_params(deep=True).items()
+            for name, param in candidate.get_params(deep=True).items()
             if "__" not in name
         }
 
         self._children: Dict[str, ParameterSpace] = {
-            name: ParameterSpace(estimator=value)
+            name: ParameterSpace(candidate=value)
             for name, value in params.items()
             if isinstance(value, BaseEstimator)
         }
@@ -257,7 +259,7 @@ def _values_to_expression(values: ParameterSet) -> Expression:
 
 @inheritdoc(match="""[see superclass]""")
 class MultiEstimatorParameterSpace(
-    BaseParameterSpace[T_Estimator], Generic[T_Estimator]
+    BaseParameterSpace[T_Candidate_co], Generic[T_Candidate_co]
 ):
     """
     A collection of parameter spaces, each representing a competing estimator from which
@@ -267,21 +269,12 @@ class MultiEstimatorParameterSpace(
     spaces.
     """
 
-    #: The estimator base type which all candidate estimators must implement.
-    estimator_type: Type[T_Estimator]
-
-    def __init__(
-        self,
-        *spaces: ParameterSpace[T_Estimator],
-        estimator_type: Type[T_Estimator],
-    ) -> None:
+    def __init__(self, *spaces: ParameterSpace[T_Candidate_co]) -> None:
         """
         :param spaces: the parameter spaces from which to select the best estimator
-        :param estimator_type: the estimator base type which all candidate estimators
-            must implement
         """
         validate_element_types(spaces, expected_type=ParameterSpace)
-        validate_spaces(spaces, expected_estimator_type=estimator_type)
+        validate_spaces(spaces)
 
         if len(spaces) == 0:
             raise TypeError("no parameter space passed; need to pass at least one")
@@ -289,7 +282,6 @@ def __init__(
         super().__init__(estimator=CandidateEstimatorDF.empty())
 
         self.spaces = spaces
-        self.estimator_type = estimator_type
 
     @subsdoc(
         pattern=(
@@ -323,7 +315,9 @@ def to_expression(self) -> "Expression":
 
 
 @inheritdoc(match="""[see superclass]""")
-class CandidateEstimatorDF(ClassifierDF, RegressorDF, TransformerDF):
+class CandidateEstimatorDF(
+    ClassifierDF, RegressorDF, TransformerDF, Generic[T_Candidate_co]
+):
     """
     Metaclass providing representation for candidate estimator to be used in
     hyperparameter search. Unifies evaluation approach for :class:`.ParameterSpace`
@@ -333,7 +327,7 @@ class CandidateEstimatorDF(ClassifierDF, RegressorDF, TransformerDF):
 
     def __init__(
         self,
-        candidate: Optional[EstimatorDF] = None,
+        candidate: Optional[T_Candidate_co] = None,
         candidate_name: Optional[str] = None,
     ) -> None:
         """
@@ -435,7 +429,7 @@ def _get_features_original(self) -> pd.Series:
 
 
 def ensure_subclass(
-    estimator_type: Type[T_Estimator], expected_type: Type[T_Estimator]
+    estimator_type: Type[T_Candidate_co], expected_type: Type[T_Candidate_co]
 ) -> None:
     """
     Ensure that the given estimator type is a subclass of the expected estimator type.
@@ -450,27 +444,20 @@ def ensure_subclass(
         )
 
 
-def validate_spaces(
-    spaces: Collection[ParameterSpace[T_Estimator]],
-    expected_estimator_type: Type[T_Estimator],
-) -> None:
+def validate_spaces(spaces: Collection[ParameterSpace[T_Candidate_co]]) -> None:
     """
-    Ensure that all candidates implement a given estimator type.
+    Ensure that all candidates implement the same estimator type (typically regressors
+    or classifiers)
 
     :param spaces: the candidates to check
-    :param expected_estimator_type: the type that all candidates' estimators must
-        implement
     """
 
-    non_compliant_candidate_estimators: Set[str] = {
-        type(space.estimator.candidate).__name__
-        for space in spaces
-        if not isinstance(space.estimator.candidate, expected_estimator_type)
+    estimator_types = {
+        getattr(space.estimator, "_estimator_type", None) for space in spaces
     }
-    if non_compliant_candidate_estimators:
+
+    if len(estimator_types) > 1:
         raise TypeError(
-            f"all candidate estimators must be instances of "
-            f"{expected_estimator_type.__name__}, "
-            f"but candidate estimators include: "
-            f"{', '.join(non_compliant_candidate_estimators)}"
+            "all candidate estimators must have the same estimator type, "
+            "but got multiple types: " + ", ".join(sorted(estimator_types))
         )

From 8452f02062d4e93f6e5d3bdde87f8b755a20e5d8 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 2 Feb 2022 11:37:51 +0100
Subject: [PATCH 067/106] =?UTF-8?q?API:=20add=20PARAM=5FCANDIDATE=E2=80=A6?=
 =?UTF-8?q?=20constants=20to=20CandidateEstimatorDF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/facet/selection/_parameters.py | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index 7d009a48..5c68f006 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -148,12 +148,16 @@ def __init__(
         replacement="",
         using=BaseParameterSpace.get_parameters,
     )
-    def get_parameters(self, prefix: Optional[str] = STEP_CANDIDATE) -> ParameterDict:
+    def get_parameters(self, prefix: Optional[str] = None) -> ParameterDict:
         """[see superclass]"""
 
         return {
             "__".join(name): values
-            for (name, values) in self._iter_parameters([prefix] if prefix else [])
+            for (name, values) in self._iter_parameters(
+                path_prefix=[
+                    CandidateEstimatorDF.PARAM_CANDIDATE if prefix is None else prefix
+                ]
+            )
         }
 
     def _validate_parameter(self, name: str, value: ParameterSet) -> None:
@@ -295,8 +299,10 @@ def get_parameters(self, prefix: Optional[str] = None) -> List[ParameterDict]:
         """[see superclass]"""
         return [
             {
-                ParameterSpace.STEP_CANDIDATE: [space.estimator.candidate],
-                ParameterSpace.STEP_NAME: [space.estimator.candidate_name],
+                CandidateEstimatorDF.PARAM_CANDIDATE: [space.estimator.candidate],
+                CandidateEstimatorDF.PARAM_CANDIDATE_NAME: [
+                    space.estimator.candidate_name
+                ],
                 **space.get_parameters(),
             }
             for space in self.spaces
@@ -308,7 +314,7 @@ def to_expression(self) -> "Expression":
         return Id(type(self))(
             self.estimator.candidate,
             [
-                space._to_expression(path_prefix=ParameterSpace.STEP_CANDIDATE)
+                space._to_expression(path_prefix=CandidateEstimatorDF.PARAM_CANDIDATE)
                 for space in self.spaces
             ],
         )
@@ -325,6 +331,12 @@ class CandidateEstimatorDF(
     candidate where actual estimator is a hyperparameter itself.
     """
 
+    #: name of the `candidate` parameter
+    PARAM_CANDIDATE = "candidate"
+
+    #: name of the `candidate_name` parameter
+    PARAM_CANDIDATE_NAME = "candidate_name"
+
     def __init__(
         self,
         candidate: Optional[T_Candidate_co] = None,

From bce28b84f5a9c4deacd33223bc55a4f5d3517f25 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 2 Feb 2022 11:38:33 +0100
Subject: [PATCH 068/106] API: add documentation for CandidateEstimatorDF
 attributes

---
 src/facet/selection/_parameters.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index 5c68f006..e05e1f1b 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -337,6 +337,12 @@ class CandidateEstimatorDF(
     #: name of the `candidate_name` parameter
     PARAM_CANDIDATE_NAME = "candidate_name"
 
+    #: The currently selected estimator candidate
+    candidate: T_Candidate_co
+
+    #: The name of the candidate
+    candidate_name: str
+
     def __init__(
         self,
         candidate: Optional[T_Candidate_co] = None,

From dc3ebe0e4423e59c4a2b88e865f87b443d0b2b0a Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 2 Feb 2022 11:38:58 +0100
Subject: [PATCH 069/106] API: add call to super().__init__()

---
 src/facet/selection/_parameters.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index e05e1f1b..6c1b0079 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -354,6 +354,8 @@ def __init__(
                           "candidate" key.
         :param candidate_name: a name for the candidate
         """
+        super().__init__()
+
         self.candidate = candidate
         self.candidate_name = candidate_name
 

From 2b8311a6128460abb29eea831aef378d59356280 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 2 Feb 2022 11:40:06 +0100
Subject: [PATCH 070/106] API: suppress PEP warnings for 'X' arg name

---
 src/facet/selection/_parameters.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index 6c1b0079..b8f3025f 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -371,42 +371,49 @@ def classes_(self) -> Sequence[Any]:
         """[see superclass]"""
         return self.candidate.classes_
 
+    # noinspection PyPep8Naming
     def predict_proba(
         self, X: pd.DataFrame, **predict_params: Any
     ) -> Union[pd.DataFrame, List[pd.DataFrame]]:
         """[see superclass]"""
         return self.candidate.predict_proba(X, **predict_params)
 
+    # noinspection PyPep8Naming
     def predict_log_proba(
         self, X: pd.DataFrame, **predict_params: Any
     ) -> Union[pd.DataFrame, List[pd.DataFrame]]:
         """[see superclass]"""
         return self.candidate.predict_log_proba(X, **predict_params)
 
+    # noinspection PyPep8Naming
     def decision_function(
         self, X: pd.DataFrame, **predict_params: Any
     ) -> Union[pd.Series, pd.DataFrame]:
         """[see superclass]"""
         return self.candidate.decision_function(X, **predict_params)
 
+    # noinspection PyPep8Naming
     def score(
         self, X: pd.DataFrame, y: pd.Series, sample_weight: Optional[pd.Series] = None
     ) -> float:
         """[see superclass]"""
         return self.candidate.score(X, y, sample_weight)
 
+    # noinspection PyPep8Naming
     def predict(
         self, X: pd.DataFrame, **predict_params: Any
     ) -> Union[pd.Series, pd.DataFrame]:
         """[see superclass]"""
         return self.candidate.predic(X, **predict_params)
 
+    # noinspection PyPep8Naming
     def fit_predict(
         self, X: pd.DataFrame, y: pd.Series, **fit_params: Any
     ) -> Union[pd.Series, pd.DataFrame]:
         """[see superclass]"""
         return self.candidate.fit_predict(X, y, **fit_params)
 
+    # noinspection PyPep8Naming
     def fit(
         self: T_Self,
         X: pd.DataFrame,
@@ -422,10 +429,12 @@ def is_fitted(self) -> bool:
         """[see superclass]"""
         return self.candidate is not None and self.candidate.is_fitted
 
+    # noinspection PyPep8Naming
     def inverse_transform(self, X: pd.DataFrame) -> pd.DataFrame:
         """[see superclass]"""
         return self.candidate.inverse_transform(X)
 
+    # noinspection PyPep8Naming
     def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         """[see superclass]"""
         return self.candidate.transform(X)

From 91429faf7ece1b60c561109c9a9c7e28380dfcdd Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 2 Feb 2022 11:40:55 +0100
Subject: [PATCH 071/106] API: add property
 CandidateEstimatorDF._estimator_type

---
 src/facet/selection/_parameters.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index b8f3025f..a6b5c68a 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -439,6 +439,11 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         """[see superclass]"""
         return self.candidate.transform(X)
 
+    @property
+    def _estimator_type(self) -> str:
+        # noinspection PyProtectedMember
+        return self.candidate._estimator_type
+
     def _get_features_in(self) -> pd.Index:
         return self.candidate.feature_names_in_
 

From be4d3b7a5d37fabd7e4d8c2f7b78936aab9c4d06 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 2 Feb 2022 11:41:13 +0100
Subject: [PATCH 072/106] DOC: tweak API docstring

---
 src/facet/selection/_parameters.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index a6b5c68a..82f516d3 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -106,7 +106,7 @@ class ParameterSpace(BaseParameterSpace[T_Candidate_co], Generic[T_Candidate_co]
             # ...
         )
 
-        # the following will raise an AttributeError for unknown attribute xyz:
+        # The following will raise an AttributeError for the unknown attribute xyz:
         ps.regressor.xyz = [3, 4, 5, 7, 10]
 
         # the following will raise a TypeError because we do not assign a list or \

From 07eba48127c62f8a77f7a91d5e2998274b3a167a Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 2 Feb 2022 11:44:38 +0100
Subject: [PATCH 073/106] API: streamline/improve handling of estimator scoring
 in LearnerRanker

---
 src/facet/selection/_selection.py | 63 +++++++++++++++++++------------
 1 file changed, 38 insertions(+), 25 deletions(-)

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index c72b9ce7..3a665da8 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -21,12 +21,13 @@
 import numpy as np
 import pandas as pd
 from numpy.random.mtrand import RandomState
-from sklearn.metrics import check_scoring
+from sklearn.metrics import get_scorer
 from sklearn.model_selection import BaseCrossValidator, GridSearchCV
 
 from pytools.api import AllTracker, inheritdoc
 from pytools.fit import FittableMixin
 from pytools.parallelization import ParallelizableMixin
+from sklearndf import EstimatorDF
 from sklearndf.pipeline import (
     ClassifierPipelineDF,
     LearnerPipelineDF,
@@ -34,6 +35,7 @@
 )
 
 from facet.data import Sample
+from facet.selection import CandidateEstimatorDF
 from facet.selection.base import BaseParameterSpace
 
 log = logging.getLogger(__name__)
@@ -115,7 +117,14 @@ def __init__(
         parameter_space: BaseParameterSpace,
         *,
         cv: Optional[BaseCrossValidator] = None,
-        scoring: Union[str, Callable[[float, float], float], None] = None,
+        scoring: Union[
+            str,
+            Callable[
+                [EstimatorDF, pd.Series, pd.Series],
+                float,
+            ],
+            None,
+        ] = None,
         random_state: Union[int, RandomState, None] = None,
         n_jobs: Optional[int] = None,
         shared_memory: Optional[bool] = None,
@@ -185,9 +194,6 @@ def __init__(
                 + ", ".join(unsupported_params)
             )
 
-        if type(self.scoring) == str:
-            self.scoring = self._preprocess_scoring(self.scoring)
-
         self.searcher_ = None
 
     __init__.__doc__ = __init__.__doc__.replace(
@@ -199,25 +205,6 @@ def is_fitted(self) -> bool:
         """[see superclass]"""
         return self.searcher_ is not None
 
-    @staticmethod
-    def _preprocess_scoring(scoring: str):
-        def _score_fn(estimator, X: pd.DataFrame, y: pd.Series):
-            estimator = estimator.candidate
-
-            if isinstance(estimator, LearnerPipelineDF):
-                if estimator.preprocessing:
-                    X = estimator.preprocessing.transform(X=X)
-                estimator = estimator.final_estimator
-
-            scorer = check_scoring(
-                estimator=estimator.native_estimator,
-                scoring=scoring,
-            )
-
-            return scorer(estimator.native_estimator, X.values, y.values)
-
-        return _score_fn
-
     @property
     def best_estimator_(self) -> T_LearnerPipelineDF:
         """
@@ -350,7 +337,7 @@ def _get_searcher_parameters(self) -> Dict[str, Any]:
                 k: v
                 for k, v in dict(
                     cv=self.cv,
-                    scoring=self.scoring,
+                    scoring=self._get_scorer(),
                     random_state=self.random_state,
                     n_jobs=self.n_jobs,
                     shared_memory=self.shared_memory,
@@ -362,6 +349,32 @@ def _get_searcher_parameters(self) -> Dict[str, Any]:
             **self.searcher_params,
         }
 
+    def _get_scorer(
+        self,
+    ) -> Optional[Callable[[CandidateEstimatorDF, pd.DataFrame, pd.Series], float]]:
+        scoring = self.scoring
+
+        if scoring is None:
+            return None
+
+        elif isinstance(scoring, str):
+            scorer = get_scorer(scoring)
+
+        # noinspection PyPep8Naming
+        def _scorer_fn(
+            estimator: CandidateEstimatorDF, X: pd.DataFrame, y: pd.Series
+        ) -> float:
+            candidate = estimator.candidate
+
+            if isinstance(candidate, LearnerPipelineDF):
+                if candidate.preprocessing:
+                    X = candidate.preprocessing.transform(X=X)
+                candidate = candidate.final_estimator
+
+            return scorer(candidate.native_estimator, X, y)
+
+        return _scorer_fn
+
     summary_report.__doc__ = summary_report.__doc__.replace(
         "%%SORT_COLUMN%%", _DEFAULT_REPORT_SORT_COLUMN
     )

From 83de5a6fdbca311e4482c827abf796bac4f03768 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 2 Feb 2022 11:45:57 +0100
Subject: [PATCH 074/106] FIX: typo in exception message

---
 src/facet/selection/_selection.py | 2 +-
 test/test/facet/test_crossfit.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index 3a665da8..5199284e 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -245,7 +245,7 @@ def fit(
 
         if ARG_SAMPLE_WEIGHT in fit_params:
             raise ValueError(
-                "arg sample_weight is not supported, use ag sample.weight instead"
+                "arg sample_weight is not supported, use arg sample.weight instead"
             )
 
         if isinstance(groups, pd.Series):
diff --git a/test/test/facet/test_crossfit.py b/test/test/facet/test_crossfit.py
index dadef120..53b29bcb 100644
--- a/test/test/facet/test_crossfit.py
+++ b/test/test/facet/test_crossfit.py
@@ -54,7 +54,7 @@ def test_prediction_classifier(
 
     with pytest.raises(
         ValueError,
-        match="arg sample_weight is not supported, " "use ag sample.weight instead",
+        match="arg sample_weight is not supported, use arg sample.weight instead",
     ):
         model_ranker.fit(
             sample=iris_sample_multi_class, sample_weight=iris_sample_multi_class.weight

From 7821c24100da7a55b5fb9e4ad42f888076c7e4c3 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 2 Feb 2022 11:46:20 +0100
Subject: [PATCH 075/106] TEST: update unit tests for API changes

---
 test/test/conftest.py             | 13 +++++-----
 test/test/facet/test_crossfit.py  | 11 +++++----
 test/test/facet/test_selection.py | 41 ++++++++++++-------------------
 3 files changed, 28 insertions(+), 37 deletions(-)

diff --git a/test/test/conftest.py b/test/test/conftest.py
index 953a6675..878493c5 100644
--- a/test/test/conftest.py
+++ b/test/test/conftest.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Any, List, Mapping, Optional, Sequence, Set, Tuple
+from typing import Any, List, Mapping, Optional, Sequence, Set, Tuple, Type
 
 import numpy as np
 import pandas as pd
@@ -29,7 +29,7 @@
 import facet
 from facet.data import Sample
 from facet.inspection import LearnerInspector, TreeExplainerFactory
-from facet.selection import LearnerRanker, MultiRegressorParameterSpace, ParameterSpace
+from facet.selection import LearnerRanker, MultiEstimatorParameterSpace, ParameterSpace
 from facet.validation import BootstrapCV, StratifiedBootstrapCV
 
 logging.basicConfig(level=logging.DEBUG)
@@ -88,7 +88,7 @@ def cv_stratified_bootstrap() -> BaseCrossValidator:
 @pytest.fixture
 def regressor_parameters(
     simple_preprocessor: TransformerDF,
-) -> MultiRegressorParameterSpace:
+) -> MultiEstimatorParameterSpace[RegressorPipelineDF]:
     random_state = {"random_state": 42}
 
     space_1 = ParameterSpace(
@@ -146,7 +146,7 @@ def regressor_parameters(
     )
     space_7.regressor.normalize = [False, True]
 
-    return MultiRegressorParameterSpace(
+    return MultiEstimatorParameterSpace(
         space_1,
         space_2,
         space_3,
@@ -154,14 +154,13 @@ def regressor_parameters(
         space_5,
         space_6,
         space_7,
-        estimator_type=RegressorPipelineDF,
     )
 
 
 @pytest.fixture
 def regressor_ranker(
     cv_kfold: KFold,
-    regressor_parameters: MultiRegressorParameterSpace,
+    regressor_parameters: MultiEstimatorParameterSpace[RegressorPipelineDF],
     sample: Sample,
     n_jobs: int,
 ) -> LearnerRanker[RegressorPipelineDF, GridSearchCV]:
@@ -324,7 +323,7 @@ def check_ranking(
     is_classifier: bool,
     expected_scores: Sequence[float],
     expected_parameters: Optional[Mapping[int, Mapping[str, Any]]],
-    expected_learners: Optional[List[LearnerDF]] = None,
+    expected_learners: Optional[List[Type[LearnerDF]]] = None,
 ) -> None:
     """
     Test helper to check rankings produced by learner rankers
diff --git a/test/test/facet/test_crossfit.py b/test/test/facet/test_crossfit.py
index 53b29bcb..dd619410 100644
--- a/test/test/facet/test_crossfit.py
+++ b/test/test/facet/test_crossfit.py
@@ -8,7 +8,7 @@
 from sklearndf.regression import RandomForestRegressorDF
 
 from ..conftest import check_ranking
-from facet.selection import LearnerRanker, MultiClassifierParameterSpace, ParameterSpace
+from facet.selection import LearnerRanker, MultiEstimatorParameterSpace, ParameterSpace
 from facet.validation import StratifiedBootstrapCV
 
 log = logging.getLogger(__name__)
@@ -35,12 +35,13 @@ def test_prediction_classifier(
 
     with pytest.raises(
         TypeError,
-        match="^all candidate estimators must be instances of "
-        "ClassifierPipelineDF, but candidate estimators include: "
-        "RegressorPipelineDF$",
+        match=(
+            "^all candidate estimators must have the same estimator type, "
+            "but got multiple types: classifier, regressor$"
+        ),
     ):
         # define an illegal grid list, mixing classification with regression
-        MultiClassifierParameterSpace(ps1, ps2)
+        MultiEstimatorParameterSpace(ps1, ps2)
 
     model_ranker: LearnerRanker[
         ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV
diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
index 2886435a..610c9767 100644
--- a/test/test/facet/test_selection.py
+++ b/test/test/facet/test_selection.py
@@ -25,19 +25,16 @@
 
 from ..conftest import check_ranking
 from facet.data import Sample
-from facet.selection import (
-    LearnerRanker,
-    MultiClassifierParameterSpace,
-    MultiRegressorParameterSpace,
-    ParameterSpace,
-)
+from facet.selection import LearnerRanker, MultiEstimatorParameterSpace, ParameterSpace
 from facet.validation import BootstrapCV
 
 log = logging.getLogger(__name__)
 
 
 def test_model_ranker(
-    regressor_parameters: MultiRegressorParameterSpace, sample: Sample, n_jobs: int
+    regressor_parameters: MultiEstimatorParameterSpace[RegressorPipelineDF],
+    sample: Sample,
+    n_jobs: int,
 ) -> None:
 
     expected_scores = [
@@ -173,7 +170,7 @@ def test_parameter_space(
         preprocessing=simple_preprocessor,
     )
     ps_1_name = "rf_regressor"
-    ps_1 = ParameterSpace(pipeline_1, name=ps_1_name)
+    ps_1 = ParameterSpace(pipeline_1, candidate_name=ps_1_name)
     ps_1.regressor.min_weight_fraction_leaf = loguniform_0_01_0_10
     ps_1.regressor.max_depth = randint_3_10
     ps_1.regressor.min_samples_leaf = loguniform_0_05_0_10
@@ -200,7 +197,7 @@ def test_parameter_space(
         preprocessing=simple_preprocessor,
     )
     ps_2_name = "lgbm"
-    ps_2 = ParameterSpace(pipeline_2, name=ps_2_name)
+    ps_2 = ParameterSpace(pipeline_2, candidate_name=ps_2_name)
     ps_2.regressor.max_depth = randint_3_10
     ps_2.regressor.min_child_samples = zipfian_1_32
 
@@ -209,29 +206,21 @@ def test_parameter_space(
     with pytest.raises(
         TypeError,
         match=(
-            r"^arg estimator_type must be a subclass of ClassifierPipelineDF but is: "
-            r"RegressorPipelineDF$"
+            r"^all candidate estimators must have the same estimator type, "
+            r"but got multiple types: classifier, regressor$"
         ),
     ):
         # noinspection PyTypeChecker
-        MultiClassifierParameterSpace(ps_1, ps_2, estimator_type=RegressorPipelineDF)
-
-    with pytest.raises(
-        TypeError,
-        match=(
-            r"^all candidate estimators must be instances of ClassifierPipelineDF, "
-            r"but candidate estimators include: RegressorPipelineDF$"
-        ),
-    ):
-        # noinspection PyTypeChecker
-        MultiClassifierParameterSpace(ps_1, ps_2)
+        MultiEstimatorParameterSpace(
+            ps_1, ps_2, ParameterSpace(ClassifierPipelineDF(classifier=SVCDF()))
+        )
 
-    mps = MultiRegressorParameterSpace(ps_1, ps_2)
+    mps = MultiEstimatorParameterSpace(ps_1, ps_2)
 
     # test
 
     assert freeze(mps.to_expression()) == freeze(
-        Id.MultiRegressorParameterSpace(
+        Id.MultiEstimatorParameterSpace(
             None,
             [
                 Id.ParameterSpace(
@@ -277,7 +266,9 @@ def test_parameter_space(
 
 
 def test_learner_ranker(
-    regressor_parameters: MultiRegressorParameterSpace, sample: Sample, n_jobs: int
+    regressor_parameters: MultiEstimatorParameterSpace[RegressorPipelineDF],
+    sample: Sample,
+    n_jobs: int,
 ) -> None:
 
     # define the circular cross validator with just 5 splits (to speed up testing)

From f1709e324194a9d9e0b0d45ee3c1ae5271c7d391 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 2 Feb 2022 12:15:52 +0100
Subject: [PATCH 076/106] =?UTF-8?q?API:=20remove=20obsolete=20constants=20?=
 =?UTF-8?q?ParamaterSpace.STEP=5F=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/facet/selection/_parameters.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index 82f516d3..cdcd9ca4 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -115,9 +115,6 @@ class ParameterSpace(BaseParameterSpace[T_Candidate_co], Generic[T_Candidate_co]
 
     """
 
-    STEP_CANDIDATE = "candidate"
-    STEP_NAME = "candidate_name"
-
     def __init__(
         self, candidate: T_Candidate_co, candidate_name: Optional[str] = None
     ) -> None:

From 132f0988cd3035a724bf058c7a2ad57ee48d5f19 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Thu, 3 Feb 2022 12:13:29 +0100
Subject: [PATCH 077/106] API: don't use CandidateEstimatorDF in
 ParameterSpace; add auto-naming

---
 src/facet/selection/_parameters.py |  99 ++++++++++++++++-------
 src/facet/selection/_selection.py  | 124 +++++++++++++++++++++--------
 2 files changed, 160 insertions(+), 63 deletions(-)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index cdcd9ca4..081bae82 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -29,6 +29,7 @@
 from pytools.expression import Expression, make_expression
 from pytools.expression.atomic import Id
 from sklearndf import ClassifierDF, EstimatorDF, RegressorDF, TransformerDF
+from sklearndf.pipeline import LearnerPipelineDF, PipelineDF
 
 from .base import BaseParameterSpace
 
@@ -115,31 +116,48 @@ class ParameterSpace(BaseParameterSpace[T_Candidate_co], Generic[T_Candidate_co]
 
     """
 
-    def __init__(
-        self, candidate: T_Candidate_co, candidate_name: Optional[str] = None
-    ) -> None:
+    def __init__(self, estimator: T_Candidate_co, name: Optional[str] = None) -> None:
         """
-        :param candidate: the estimator candidate to which to apply the parameters to
-        :param candidate_name: a name for the estimator candidate to be used in summary
-            reports
+        :param estimator: the estimator candidate to which to apply the parameters to
+        :param name: a name for the estimator candidate to be used in summary reports
         """
 
-        super().__init__(estimator=CandidateEstimatorDF(candidate, candidate_name))
+        super().__init__(estimator=estimator)
 
         params: Dict[str, Any] = {
             name: param
-            for name, param in candidate.get_params(deep=True).items()
+            for name, param in estimator.get_params(deep=True).items()
             if "__" not in name
         }
 
         self._children: Dict[str, ParameterSpace] = {
-            name: ParameterSpace(candidate=value)
+            name: ParameterSpace(estimator=value)
             for name, value in params.items()
             if isinstance(value, BaseEstimator)
         }
+
+        self._name = name
         self._values: ParameterDict = {}
         self._params: Set[str] = set(params.keys())
 
+    def get_name(self) -> str:
+        """
+        Get the name for this parameter space.
+
+        If no name was passed to the constructor, determine the default name as follows:
+
+            - for meta-estimators, this is the default name of the delegate estimator
+            - for pipelines, this is the default name of the final estimator
+            - for all other estimators, this is the name of the estimator's type
+
+        :return: the name for this parameter space
+        """
+
+        if self._name is None:
+            return get_default_estimator_name(self._estimator)
+        else:
+            return self._name
+
     @subsdoc(
         pattern="or a list of such dictionaries, ",
         replacement="",
@@ -151,9 +169,7 @@ def get_parameters(self, prefix: Optional[str] = None) -> ParameterDict:
         return {
             "__".join(name): values
             for (name, values) in self._iter_parameters(
-                path_prefix=[
-                    CandidateEstimatorDF.PARAM_CANDIDATE if prefix is None else prefix
-                ]
+                path_prefix=[] if prefix is None else [prefix]
             )
         }
 
@@ -162,7 +178,7 @@ def _validate_parameter(self, name: str, value: ParameterSet) -> None:
         if name not in self._params:
             raise AttributeError(
                 f"unknown parameter name for "
-                f"{type(self.estimator.candidate).__name__}: {name}"
+                f"{type(self.estimator).__name__}: {name}"
             )
 
         if not (
@@ -252,10 +268,10 @@ def _values_to_expression(values: ParameterSet) -> Expression:
 
         if path_prefix:
             return Id(type(self))(
-                **{".".join(path_prefix): self.estimator.candidate}, **parameters
+                **{".".join(path_prefix): self.estimator}, **parameters
             )
         else:
-            return Id(type(self))(self.estimator.candidate, **parameters)
+            return Id(type(self))(self.estimator, **parameters)
 
 
 @inheritdoc(match="""[see superclass]""")
@@ -296,11 +312,9 @@ def get_parameters(self, prefix: Optional[str] = None) -> List[ParameterDict]:
         """[see superclass]"""
         return [
             {
-                CandidateEstimatorDF.PARAM_CANDIDATE: [space.estimator.candidate],
-                CandidateEstimatorDF.PARAM_CANDIDATE_NAME: [
-                    space.estimator.candidate_name
-                ],
-                **space.get_parameters(),
+                CandidateEstimatorDF.PARAM_CANDIDATE: [space.estimator],
+                CandidateEstimatorDF.PARAM_CANDIDATE_NAME: [space.get_name()],
+                **space.get_parameters(prefix=CandidateEstimatorDF.PARAM_CANDIDATE),
             }
             for space in self.spaces
         ]
@@ -308,18 +322,15 @@ def get_parameters(self, prefix: Optional[str] = None) -> List[ParameterDict]:
     def to_expression(self) -> "Expression":
         """[see superclass]"""
         # noinspection PyProtectedMember
-        return Id(type(self))(
-            self.estimator.candidate,
-            [
-                space._to_expression(path_prefix=CandidateEstimatorDF.PARAM_CANDIDATE)
-                for space in self.spaces
-            ],
-        )
+        return Id(type(self))(*self.spaces)
 
 
 @inheritdoc(match="""[see superclass]""")
 class CandidateEstimatorDF(
-    ClassifierDF, RegressorDF, TransformerDF, Generic[T_Candidate_co]
+    ClassifierDF,
+    RegressorDF,
+    TransformerDF,
+    Generic[T_Candidate_co],
 ):
     """
     Metaclass providing representation for candidate estimator to be used in
@@ -359,7 +370,9 @@ def __init__(
     @classmethod
     def empty(cls) -> "CandidateEstimatorDF":
         """
-        :return: new candidate instance without internal estimator
+        Create a new candidate estimator with no candidate set.
+
+        :return: the new candidate estimator
         """
         return cls()
 
@@ -492,3 +505,31 @@ def validate_spaces(spaces: Collection[ParameterSpace[T_Candidate_co]]) -> None:
             "all candidate estimators must have the same estimator type, "
             "but got multiple types: " + ", ".join(sorted(estimator_types))
         )
+
+
+def get_default_estimator_name(estimator: EstimatorDF) -> str:
+    """
+    Get a default name of the estimator.
+
+    For meta-estimators, this is the default name of the delegate estimator.
+
+    For pipelines, this is the default name of the final estimator.
+
+    For all other estimators, this is the name of the estimator's type.
+
+    :param estimator: the estimator to get the default name for
+    :return: the default name
+    """
+
+    while True:
+        if isinstance(estimator, CandidateEstimatorDF):
+            estimator = estimator.candidate
+
+        elif isinstance(estimator, PipelineDF) and estimator.steps:
+            estimator = estimator.steps[-1]
+
+        elif isinstance(estimator, LearnerPipelineDF):
+            estimator = estimator.final_estimator
+
+        else:
+            return type(estimator).__name__
diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index 5199284e..b1d862fe 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -14,6 +14,7 @@
     List,
     Optional,
     Sequence,
+    Tuple,
     TypeVar,
     Union,
 )
@@ -101,14 +102,22 @@ class LearnerRanker(
     searcher_: Optional[T_SearchCV]
 
     _CV_RESULT_COLUMNS = [
-        r"mean_test_\w+",
-        r"std_test_\w+",
-        r"param_\w+",
-        r"(rank|mean|std)_\w+",
+        (r"rank_test_(\w+)", r"\1__test__rank"),
+        (r"(mean|std)_test_(\w+)", r"\2__test__\1"),
+        (r"param_(\w+)", r"param__\1"),
+        (r"(rank|mean|std)_(\w+)_time", r"time__\2__\1"),
+        (r"(rank|mean|std)_(\w+)_(\w+)", r"\3__\2__\1"),
     ]
 
+    _CV_RESULT_CANDIDATE_PATTERN = re.compile(
+        r"^(?:(param__)candidate__|param__(candidate(?:_name)?)$)"
+    )
+    _CV_RESULT_CANDIDATE_REPL = r"\1\2"
+
     # noinspection PyTypeChecker
-    _CV_RESULT_PATTERNS: List[Pattern] = list(map(re.compile, _CV_RESULT_COLUMNS))
+    _CV_RESULT_PATTERNS: List[Tuple[Pattern, str]] = [
+        (re.compile(pattern), repl) for pattern, repl in _CV_RESULT_COLUMNS
+    ]
     _DEFAULT_REPORT_SORT_COLUMN = "rank_test_score"
 
     def __init__(
@@ -212,11 +221,17 @@ def best_estimator_(self) -> T_LearnerPipelineDF:
         """
         self._ensure_fitted()
         searcher = self.searcher_
+
         if searcher.refit:
-            return searcher.best_estimator_.candidate
+            best_estimator = searcher.best_estimator_
+            while isinstance(best_estimator, CandidateEstimatorDF):
+                # unpack the candidate estimator
+                best_estimator = best_estimator.candidate
+            return best_estimator
+
         else:
             raise AttributeError(
-                "best_model_ is not defined; use a CV searcher with refit=True"
+                "best_estimator_ is not defined; use a CV searcher with refit=True"
             )
 
     def fit(
@@ -293,37 +308,78 @@ def summary_report(self, *, sort_by: Optional[str] = None) -> pd.DataFrame:
 
         # we create a table using a subset of the cv results, to keep the report
         # relevant and readable
-        cv_results_subset: Dict[str, np.ndarray] = {}
+        cv_results_processed: Dict[str, np.ndarray] = {}
 
-        # add the sorting column as the leftmost column of the report
-        sort_results = sort_by in cv_results
-        if sort_results:
-            cv_results_subset[sort_by] = cv_results[sort_by]
+        unpack_candidate: bool = isinstance(
+            self.parameter_space.estimator, CandidateEstimatorDF
+        )
+
+        def _process(name: str) -> Optional[str]:
+            # process the name of the original cv_results_ record
+            # to achieve a better table format
+
+            match = pattern.fullmatch(name)
+            if match is None:
+                # we could not match the name:
+                # return None so we don't include it in the summary report
+                return None
+
+            name = match.expand(repl)
+            if unpack_candidate:
+                # remove the "candidate" layer in the parameter output if we're dealing
+                # with a multi parameter space
+                return LearnerRanker._CV_RESULT_CANDIDATE_PATTERN.sub(
+                    LearnerRanker._CV_RESULT_CANDIDATE_REPL, name
+                )
+            else:
+                return name
 
-        # add all other columns that match any of the pre-defined patterns
-        for pattern in self._CV_RESULT_PATTERNS:
-            cv_results_subset.update(
+        # add all columns that match any of the pre-defined patterns
+        for pattern, repl in self._CV_RESULT_PATTERNS:
+            cv_results_processed.update(
                 {
-                    name: values
-                    for name, values in cv_results.items()
-                    if name not in cv_results_subset and pattern.fullmatch(name)
+                    name: (name_processed, values)
+                    for name, name_processed, values in (
+                        # iterate matches between pattern and name
+                        (name, _process(name), values)
+                        for name, values in cv_results.items()
+                        if name not in cv_results_processed
+                    )
+                    if name_processed is not None
                 }
             )
 
+        # add the sorting column as the leftmost column of the report
+        sort_column_processed: Optional[str]
+
+        sort_column_processed, _ = cv_results_processed.get(sort_by, None)
+        if sort_column_processed is None:
+            sort_column_values = cv_results.get(sort_by, None)
+            if sort_column_values is None:
+                sort_column_processed = None
+            else:
+                sort_column_processed = sort_by
+                cv_results_processed[sort_by] = cv_results[sort_by]
+
         # convert the results into a data frame and sort
-        report = pd.DataFrame(cv_results_subset)
+        report = pd.DataFrame(
+            {
+                name_processed: values
+                for name_processed, values in cv_results_processed.values()
+            }
+        )
+
+        # sort the report, if applicable
+        if sort_column_processed is not None:
+            report = report.sort_values(by=sort_column_processed)
 
         # split column headers containing one or more "__",
         # resulting in a column MultiIndex
 
         report.columns = report.columns.str.split("__", expand=True).map(
-            lambda column: tuple(level if pd.notna(level) else "" for level in column)
+            lambda column: tuple(level if pd.notna(level) else "-" for level in column)
         )
 
-        # sort the report, if applicable
-        if sort_results:
-            report = report.sort_values(by=sort_by)
-
         return report
 
     def _reset_fit(self) -> None:
@@ -351,7 +407,7 @@ def _get_searcher_parameters(self) -> Dict[str, Any]:
 
     def _get_scorer(
         self,
-    ) -> Optional[Callable[[CandidateEstimatorDF, pd.DataFrame, pd.Series], float]]:
+    ) -> Optional[Callable[[EstimatorDF, pd.DataFrame, pd.Series], float]]:
         scoring = self.scoring
 
         if scoring is None:
@@ -359,19 +415,19 @@ def _get_scorer(
 
         elif isinstance(scoring, str):
             scorer = get_scorer(scoring)
+            print(f"{scoring} --> {scorer}")
 
         # noinspection PyPep8Naming
-        def _scorer_fn(
-            estimator: CandidateEstimatorDF, X: pd.DataFrame, y: pd.Series
-        ) -> float:
-            candidate = estimator.candidate
+        def _scorer_fn(estimator: EstimatorDF, X: pd.DataFrame, y: pd.Series) -> float:
+            while isinstance(estimator, CandidateEstimatorDF):
+                estimator = estimator.candidate
 
-            if isinstance(candidate, LearnerPipelineDF):
-                if candidate.preprocessing:
-                    X = candidate.preprocessing.transform(X=X)
-                candidate = candidate.final_estimator
+            if isinstance(estimator, LearnerPipelineDF):
+                if estimator.preprocessing:
+                    X = estimator.preprocessing.transform(X=X)
+                estimator = estimator.final_estimator
 
-            return scorer(candidate.native_estimator, X, y)
+            return scorer(estimator.native_estimator, X, y)
 
         return _scorer_fn
 

From 46ca0638b88d34211279ce79a634df89ba7e1706 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Thu, 3 Feb 2022 12:20:53 +0100
Subject: [PATCH 078/106] API: add type validation for LearnerRanker parameter
 searcher_factory

---
 src/facet/selection/_selection.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index b1d862fe..b8e72840 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -177,6 +177,12 @@ def __init__(
         # validate parameters for the searcher factory
         #
 
+        if not callable(searcher_factory):
+            raise TypeError(
+                "arg searcher_factory expected to be a callable, "
+                f"but is a {type(searcher_factory).__name__}"
+            )
+
         searcher_factory_params = inspect.signature(searcher_factory).parameters.keys()
 
         # raise an error if the searcher params include the searcher's first two

From d32f0ebad6b3562852a87d80a6feb0760165c97a Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Thu, 3 Feb 2022 12:21:08 +0100
Subject: [PATCH 079/106] DOC: tweak a docstring

---
 src/facet/selection/_selection.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index b8e72840..e1df9a60 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -157,7 +157,7 @@ def __init__(
         :param searcher_params: additional parameters to be passed on to the searcher;
             must not include the first two positional arguments of the searcher
             constructor used to pass the estimator and the search space, since these
-            will be populated using arg parameter_space
+            will be populated using arg ``parameter_space``
         """
         super().__init__(
             n_jobs=n_jobs,

From b6dfee1a36d922ad290c770e2d1d39108f1fb433 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Thu, 3 Feb 2022 12:22:50 +0100
Subject: [PATCH 080/106] TEST: streamline and update unit tests

---
 test/test/conftest.py              | 126 ++++++++++++++++-------------
 test/test/facet/test_crossfit.py   |   6 +-
 test/test/facet/test_inspection.py |  10 +--
 test/test/facet/test_selection.py  |  99 +++++++++++++----------
 4 files changed, 132 insertions(+), 109 deletions(-)

diff --git a/test/test/conftest.py b/test/test/conftest.py
index 878493c5..320e57ce 100644
--- a/test/test/conftest.py
+++ b/test/test/conftest.py
@@ -1,14 +1,15 @@
 import logging
-from typing import Any, List, Mapping, Optional, Sequence, Set, Tuple, Type
+from typing import Any, Dict, List, Mapping, Optional, Sequence, Set, Tuple
 
 import numpy as np
 import pandas as pd
 import pytest
+from numpy.testing import assert_array_almost_equal, assert_array_equal
 from sklearn import datasets
 from sklearn.model_selection import BaseCrossValidator, GridSearchCV, KFold
 from sklearn.utils import Bunch
 
-from sklearndf import LearnerDF, TransformerDF
+from sklearndf import TransformerDF
 from sklearndf.classification import RandomForestClassifierDF
 from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
 from sklearndf.regression import (
@@ -173,6 +174,9 @@ def regressor_ranker(
     ).fit(sample=sample)
 
 
+PARAM_CANDIDATE__ = "param_candidate__"
+
+
 @pytest.fixture
 def best_lgbm_model(
     regressor_ranker: LearnerRanker[RegressorPipelineDF, GridSearchCV],
@@ -180,19 +184,27 @@ def best_lgbm_model(
 ) -> RegressorPipelineDF:
     # we get the best model_evaluation which is a LGBM - for the sake of test
     # performance
-    candidates = regressor_ranker.summary_report()["param_candidate"]
-    best_lgbm_model_df = candidates[
-        candidates.apply(
-            lambda x: isinstance(x.iloc[0].regressor, LGBMRegressorDF), axis=1
+    best_lgbm_params: Dict[str, Any] = (
+        pd.DataFrame(regressor_ranker.searcher_.cv_results_)
+        .pipe(
+            lambda df: df.loc[df.loc[:, "param_candidate_name"] == "LGBMRegressorDF", :]
         )
-    ].iloc[0]
-
-    best_lgbm_model = best_lgbm_model_df[0]
-    best_lgbm_model.regressor.set_params(
-        **best_lgbm_model_df["regressor"].dropna().to_dict()
+        .pipe(lambda df: df.loc[df.loc[:, "rank_test_score"].idxmin(), "params"])
     )
 
-    return best_lgbm_model.fit(X=sample.features, y=sample.target)
+    len_param_candidate = len(PARAM_CANDIDATE__)
+    return (
+        best_lgbm_params["candidate"]
+        .clone()
+        .set_params(
+            **{
+                param[len_param_candidate:]: value
+                for param, value in best_lgbm_params.items()
+                if param.startswith(PARAM_CANDIDATE__)
+            }
+        )
+        .fit(X=sample.features, y=sample.target)
+    )
 
 
 @pytest.fixture
@@ -318,68 +330,68 @@ def iris_sample_binary_dual_target(
     )
 
 
+COL_PARAM = "param"
+COL_CANDIDATE = "candidate"
+COL_CANDIDATE_NAME = "candidate_name"
+COL_CLASSIFIER = "classifier"
+COL_REGRESSOR = "regressor"
+COL_SCORE = ("score", "test", "mean")
+
+
 def check_ranking(
     ranking: pd.DataFrame,
     is_classifier: bool,
-    expected_scores: Sequence[float],
-    expected_parameters: Optional[Mapping[int, Mapping[str, Any]]],
-    expected_learners: Optional[List[Type[LearnerDF]]] = None,
+    scores_expected: Sequence[float],
+    params_expected: Optional[Mapping[int, Mapping[str, Any]]],
+    candidate_names_expected: Optional[Sequence[str]] = None,
 ) -> None:
     """
-    Test helper to check rankings produced by learner rankers
+    Test helper to check rankings produced by learner rankers.
 
     :param ranking: summary data frame
     :param is_classifier: flag if ranking was performed on classifiers, or regressors
-    :param expected_scores: expected ranking scores, rounded to 3 decimal places
-    :param expected_parameters: expected learner parameters
-    :param expected_learners: optional list of expected learners. Should be present
-                              only for multi estimator search.
-    :return: None
+    :param scores_expected: expected ranking scores, rounded to 3 decimal places
+    :param params_expected: expected learner parameters
+    :param candidate_names_expected: optional list of expected learners;
+        only required for multi estimator search
     """
 
-    SCORE_COLUMN = "mean_test_score"
-    CLASSIFIER_STR = "classifier"
-    REGRESSOR_STR = "regressor"
-    PARAM_CANDIDATE_STR = "param_candidate"
-    ESTIMATOR_COLUMN = ""
-
-    def _select_parameters(
-        param_column: str, rank: int, learner_str: Optional[str]
-    ) -> Tuple[dict, Optional[LearnerDF]]:
-        raw_parameters = ranking[param_column][learner_str].iloc[rank].to_dict()
-        estimator = (
-            ranking[param_column][ESTIMATOR_COLUMN].iloc[rank]
-            if ESTIMATOR_COLUMN in ranking[param_column]
-            else None
-        )
-        return (
-            {k: v for k, v in raw_parameters.items() if v is not np.nan},
-            estimator,
-        )
-
-    for rank, score_expected in enumerate(expected_scores):
-        score_actual = round(ranking[SCORE_COLUMN].iloc[rank], 3)
-        assert score_actual == pytest.approx(score_expected, abs=0.1), (
-            f"unexpected score for learner at rank #{rank + 1}: "
-            f"got {score_actual} but expected {score_expected}"
-        )
+    col_score = COL_SCORE  # + ("-",) * (ranking.columns.nlevels - len(COL_SCORE))
+    scores_actual: pd.Series = ranking.loc[:, col_score].values[: len(scores_expected)]
+    assert_array_almost_equal(
+        scores_actual,
+        scores_expected,
+        decimal=3,
+        err_msg=(
+            f"unexpected scores: " f"got {scores_actual} but expected {scores_expected}"
+        ),
+    )
 
-    learner_str = CLASSIFIER_STR if is_classifier else REGRESSOR_STR
-    param_column = PARAM_CANDIDATE_STR
+    col_learner = COL_CLASSIFIER if is_classifier else COL_REGRESSOR
 
-    if expected_parameters is not None:
-        for rank, parameters_expected in expected_parameters.items():
-            parameters_actual, learner_actual = _select_parameters(
-                param_column, rank, learner_str
+    if params_expected is not None:
+        param_columns: pd.DataFrame = ranking.loc[:, (COL_PARAM, col_learner)]
+        for rank, parameters_expected in params_expected.items():
+            parameters_actual: Dict[str, Any] = (
+                param_columns.iloc[rank, :].dropna().to_dict()
             )
             assert parameters_actual == parameters_expected, (
                 f"unexpected parameters for learner at rank #{rank}: "
                 f"got {parameters_actual} but expected {parameters_expected}"
             )
-            if learner_actual is not None:
-                assert isinstance(
-                    getattr(learner_actual, learner_str), expected_learners[rank]
-                )
+
+    if candidate_names_expected:
+        candidates_actual: np.ndarray = ranking.loc[
+            :, (COL_CANDIDATE_NAME, "-", "-")
+        ].values[: len(candidate_names_expected)]
+        assert_array_equal(
+            candidates_actual,
+            candidate_names_expected,
+            (
+                f"unexpected candidate names: got {list(candidates_actual)} "
+                f"but expected {list(candidate_names_expected)}"
+            ),
+        )
 
 
 @pytest.fixture
diff --git a/test/test/facet/test_crossfit.py b/test/test/facet/test_crossfit.py
index dd619410..3a504c9c 100644
--- a/test/test/facet/test_crossfit.py
+++ b/test/test/facet/test_crossfit.py
@@ -18,7 +18,7 @@ def test_prediction_classifier(
     iris_sample_multi_class, cv_stratified_bootstrap: StratifiedBootstrapCV, n_jobs: int
 ) -> None:
 
-    expected_learner_scores = [0.889, 0.886, 0.885, 0.879]
+    expected_learner_scores = [0.965, 0.964, 0.957, 0.956]
 
     # define parameters and crossfit
     ps1 = ParameterSpace(
@@ -70,8 +70,8 @@ def test_prediction_classifier(
     check_ranking(
         ranking=ranking,
         is_classifier=True,
-        expected_scores=expected_learner_scores,
-        expected_parameters={
+        scores_expected=expected_learner_scores,
+        params_expected={
             2: dict(min_samples_leaf=32, n_estimators=50),
             3: dict(min_samples_leaf=32, n_estimators=80),
         },
diff --git a/test/test/facet/test_inspection.py b/test/test/facet/test_inspection.py
index e8f28dcd..190cd539 100644
--- a/test/test/facet/test_inspection.py
+++ b/test/test/facet/test_inspection.py
@@ -56,10 +56,10 @@ def test_model_inspection(
     check_ranking(
         ranking=ranking,
         is_classifier=False,
-        expected_scores=(
+        scores_expected=(
             [0.693, 0.689, 0.677, 0.661, 0.615, 0.615, 0.367, 0.281, 0.281, 0.281]
         ),
-        expected_parameters=None,
+        params_expected=None,
     )
 
     shap_values: pd.DataFrame = regressor_inspector.shap_values()
@@ -102,7 +102,7 @@ def test_model_inspection(
 
 def test_binary_classifier_ranking(iris_classifier_ranker_binary) -> None:
 
-    expected_learner_scores = [0.872, 0.868, 0.866, 0.859]
+    expected_learner_scores = [0.938, 0.936, 0.936, 0.929]
 
     ranking = iris_classifier_ranker_binary.summary_report()
 
@@ -111,8 +111,8 @@ def test_binary_classifier_ranking(iris_classifier_ranker_binary) -> None:
     check_ranking(
         ranking=ranking,
         is_classifier=True,
-        expected_scores=expected_learner_scores,
-        expected_parameters={
+        scores_expected=expected_learner_scores,
+        params_expected={
             2: dict(min_samples_leaf=4, n_estimators=10),
             3: dict(min_samples_leaf=8, n_estimators=10),
         },
diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
index 610c9767..2913f9cf 100644
--- a/test/test/facet/test_selection.py
+++ b/test/test/facet/test_selection.py
@@ -1,8 +1,8 @@
 """
 Tests for module facet.selection
 """
-
 import logging
+from typing import List
 
 import numpy as np
 import pandas as pd
@@ -49,17 +49,20 @@ def test_model_ranker(
         0.758,
         0.758,
     ]
-    expected_learners = [
-        RandomForestRegressorDF,
-        RandomForestRegressorDF,
-        LinearRegressionDF,
-        LinearRegressionDF,
-        AdaBoostRegressorDF,
-        AdaBoostRegressorDF,
-        LGBMRegressorDF,
-        LGBMRegressorDF,
-        LGBMRegressorDF,
-        LGBMRegressorDF,
+    expected_learners: List[str] = [
+        cls.__name__
+        for cls in (
+            RandomForestRegressorDF,
+            RandomForestRegressorDF,
+            LinearRegressionDF,
+            LinearRegressionDF,
+            AdaBoostRegressorDF,
+            AdaBoostRegressorDF,
+            LGBMRegressorDF,
+            LGBMRegressorDF,
+            LGBMRegressorDF,
+            LGBMRegressorDF,
+        )
     ]
     expected_parameters = {
         0: dict(n_estimators=80),
@@ -84,7 +87,7 @@ def test_model_ranker(
     assert isinstance(ranker.best_estimator_, RegressorPipelineDF)
 
     ranking = ranker.summary_report()
-    ranking_score = ranking["mean_test_score"]
+    ranking_score = ranking[("score", "test", "mean")]
 
     assert len(ranking) > 0
     assert all(
@@ -95,9 +98,9 @@ def test_model_ranker(
     check_ranking(
         ranking=ranking,
         is_classifier=False,
-        expected_scores=expected_scores,
-        expected_parameters=expected_parameters,
-        expected_learners=expected_learners,
+        scores_expected=expected_scores,
+        params_expected=expected_parameters,
+        candidate_names_expected=expected_learners,
     )
 
 
@@ -140,15 +143,15 @@ def test_model_ranker_no_preprocessing(n_jobs) -> None:
     check_ranking(
         ranking=summary_report,
         is_classifier=True,
-        expected_scores=expected_learner_scores,
-        expected_parameters={
+        scores_expected=expected_learner_scores,
+        params_expected={
             0: dict(C=10, kernel="linear"),
             3: dict(C=1, kernel="rbf"),
         },
     )
 
     assert (
-        summary_report["mean_test_score"].iloc[0] >= 0.8
+        summary_report[("score", "test", "mean")].iloc[0] >= 0.8
     ), "expected a best performance of at least 0.8"
 
 
@@ -170,7 +173,7 @@ def test_parameter_space(
         preprocessing=simple_preprocessor,
     )
     ps_1_name = "rf_regressor"
-    ps_1 = ParameterSpace(pipeline_1, candidate_name=ps_1_name)
+    ps_1 = ParameterSpace(pipeline_1, name=ps_1_name)
     ps_1.regressor.min_weight_fraction_leaf = loguniform_0_01_0_10
     ps_1.regressor.max_depth = randint_3_10
     ps_1.regressor.min_samples_leaf = loguniform_0_05_0_10
@@ -197,7 +200,7 @@ def test_parameter_space(
         preprocessing=simple_preprocessor,
     )
     ps_2_name = "lgbm"
-    ps_2 = ParameterSpace(pipeline_2, candidate_name=ps_2_name)
+    ps_2 = ParameterSpace(pipeline_2, name=ps_2_name)
     ps_2.regressor.max_depth = randint_3_10
     ps_2.regressor.min_child_samples = zipfian_1_32
 
@@ -219,30 +222,38 @@ def test_parameter_space(
 
     # test
 
+    def regressor_repr(model: Id):
+        return Id.RegressorPipelineDF(
+            preprocessing=Id.ColumnTransformerDF(
+                transformers=[
+                    (
+                        "impute",
+                        Id.SimpleImputerDF(strategy="median"),
+                        ["CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE"]
+                        + ["DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT"],
+                    )
+                ]
+            ),
+            regressor=model(random_state=42),
+        )
+
     assert freeze(mps.to_expression()) == freeze(
         Id.MultiEstimatorParameterSpace(
-            None,
-            [
-                Id.ParameterSpace(
-                    candidate=pipeline_1.to_expression(),
-                    **{
-                        "candidate.regressor.min_weight_fraction_leaf": (
-                            Id.loguniform(0.01, 0.1)
-                        ),
-                        "candidate.regressor.max_depth": Id.randint(3, 10),
-                        "candidate.regressor.min_samples_leaf": (
-                            Id.loguniform(0.05, 0.1)
-                        ),
-                    },
-                ),
-                Id.ParameterSpace(
-                    candidate=pipeline_2.to_expression(),
-                    **{
-                        "candidate.regressor.max_depth": Id.randint(3, 10),
-                        "candidate.regressor.min_child_samples": Id.zipfian(1.0, 32),
-                    },
-                ),
-            ],
+            Id.ParameterSpace(
+                regressor_repr(Id.RandomForestRegressorDF),
+                **{
+                    "regressor.min_weight_fraction_leaf": (Id.loguniform(0.01, 0.1)),
+                    "regressor.max_depth": Id.randint(3, 10),
+                    "regressor.min_samples_leaf": (Id.loguniform(0.05, 0.1)),
+                },
+            ),
+            Id.ParameterSpace(
+                regressor_repr(Id.LGBMRegressorDF),
+                **{
+                    "regressor.max_depth": Id.randint(3, 10),
+                    "regressor.min_child_samples": Id.zipfian(1.0, 32),
+                },
+            ),
         )
     )
 
@@ -300,7 +311,7 @@ def test_learner_ranker(
     assert len(report_df) > 0
     assert isinstance(report_df, pd.DataFrame)
 
-    scores_sr: pd.Series = report_df.loc[:, "mean_test_score"]
+    scores_sr: pd.Series = report_df.loc[:, ("score", "test", "mean")]
     assert all(
         score_hi >= score_lo for score_hi, score_lo in zip(scores_sr, scores_sr[1:])
     )

From 8e1f46ac4b46ccb487707f4da11ee358992d6f64 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Thu, 3 Feb 2022 12:28:34 +0100
Subject: [PATCH 081/106] TEST: eliminate test_crossfit.py

---
 test/test/facet/test_crossfit.py  | 78 -------------------------------
 test/test/facet/test_selection.py | 70 +++++++++++++++++++++++++--
 2 files changed, 67 insertions(+), 81 deletions(-)
 delete mode 100644 test/test/facet/test_crossfit.py

diff --git a/test/test/facet/test_crossfit.py b/test/test/facet/test_crossfit.py
deleted file mode 100644
index 3a504c9c..00000000
--- a/test/test/facet/test_crossfit.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import logging
-
-import pytest
-from sklearn.model_selection import GridSearchCV
-
-from sklearndf.classification import RandomForestClassifierDF
-from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
-from sklearndf.regression import RandomForestRegressorDF
-
-from ..conftest import check_ranking
-from facet.selection import LearnerRanker, MultiEstimatorParameterSpace, ParameterSpace
-from facet.validation import StratifiedBootstrapCV
-
-log = logging.getLogger(__name__)
-
-
-def test_prediction_classifier(
-    iris_sample_multi_class, cv_stratified_bootstrap: StratifiedBootstrapCV, n_jobs: int
-) -> None:
-
-    expected_learner_scores = [0.965, 0.964, 0.957, 0.956]
-
-    # define parameters and crossfit
-    ps1 = ParameterSpace(
-        ClassifierPipelineDF(classifier=RandomForestClassifierDF(random_state=42))
-    )
-    ps1.classifier.min_samples_leaf = [16, 32]
-    ps1.classifier.n_estimators = [50, 80]
-
-    ps2 = ParameterSpace(
-        RegressorPipelineDF(regressor=RandomForestRegressorDF(random_state=42))
-    )
-    ps2.regressor.min_samples_leaf = [16, 32]
-    ps2.regressor.n_estimators = [50, 80]
-
-    with pytest.raises(
-        TypeError,
-        match=(
-            "^all candidate estimators must have the same estimator type, "
-            "but got multiple types: classifier, regressor$"
-        ),
-    ):
-        # define an illegal grid list, mixing classification with regression
-        MultiEstimatorParameterSpace(ps1, ps2)
-
-    model_ranker: LearnerRanker[
-        ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV
-    ] = LearnerRanker(
-        searcher_factory=GridSearchCV,
-        parameter_space=ps1,
-        cv=cv_stratified_bootstrap,
-        scoring="f1_macro",
-        n_jobs=n_jobs,
-    )
-
-    with pytest.raises(
-        ValueError,
-        match="arg sample_weight is not supported, use arg sample.weight instead",
-    ):
-        model_ranker.fit(
-            sample=iris_sample_multi_class, sample_weight=iris_sample_multi_class.weight
-        )
-
-    model_ranker.fit(sample=iris_sample_multi_class)
-
-    ranking = model_ranker.summary_report()
-
-    log.debug(f"\n{ranking}")
-
-    check_ranking(
-        ranking=ranking,
-        is_classifier=True,
-        scores_expected=expected_learner_scores,
-        params_expected={
-            2: dict(min_samples_leaf=32, n_estimators=50),
-            3: dict(min_samples_leaf=32, n_estimators=80),
-        },
-    )
diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
index 2913f9cf..466ee202 100644
--- a/test/test/facet/test_selection.py
+++ b/test/test/facet/test_selection.py
@@ -14,7 +14,7 @@
 from pytools.expression import freeze
 from pytools.expression.atomic import Id
 from sklearndf import TransformerDF
-from sklearndf.classification import SVCDF
+from sklearndf.classification import SVCDF, RandomForestClassifierDF
 from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
 from sklearndf.regression import (
     AdaBoostRegressorDF,
@@ -26,7 +26,7 @@
 from ..conftest import check_ranking
 from facet.data import Sample
 from facet.selection import LearnerRanker, MultiEstimatorParameterSpace, ParameterSpace
-from facet.validation import BootstrapCV
+from facet.validation import BootstrapCV, StratifiedBootstrapCV
 
 log = logging.getLogger(__name__)
 
@@ -276,7 +276,7 @@ def regressor_repr(model: Id):
     ]
 
 
-def test_learner_ranker(
+def test_learner_ranker_regression(
     regressor_parameters: MultiEstimatorParameterSpace[RegressorPipelineDF],
     sample: Sample,
     n_jobs: int,
@@ -315,3 +315,67 @@ def test_learner_ranker(
     assert all(
         score_hi >= score_lo for score_hi, score_lo in zip(scores_sr, scores_sr[1:])
     )
+
+
+def test_learner_ranker_classification(
+    iris_sample_multi_class, cv_stratified_bootstrap: StratifiedBootstrapCV, n_jobs: int
+) -> None:
+
+    expected_learner_scores = [0.965, 0.964, 0.957, 0.956]
+
+    # define parameters and crossfit
+    ps1 = ParameterSpace(
+        ClassifierPipelineDF(classifier=RandomForestClassifierDF(random_state=42))
+    )
+    ps1.classifier.min_samples_leaf = [16, 32]
+    ps1.classifier.n_estimators = [50, 80]
+
+    ps2 = ParameterSpace(
+        RegressorPipelineDF(regressor=RandomForestRegressorDF(random_state=42))
+    )
+    ps2.regressor.min_samples_leaf = [16, 32]
+    ps2.regressor.n_estimators = [50, 80]
+
+    with pytest.raises(
+        TypeError,
+        match=(
+            "^all candidate estimators must have the same estimator type, "
+            "but got multiple types: classifier, regressor$"
+        ),
+    ):
+        # define an illegal grid list, mixing classification with regression
+        MultiEstimatorParameterSpace(ps1, ps2)
+
+    model_ranker: LearnerRanker[
+        ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV
+    ] = LearnerRanker(
+        searcher_factory=GridSearchCV,
+        parameter_space=ps1,
+        cv=cv_stratified_bootstrap,
+        scoring="f1_macro",
+        n_jobs=n_jobs,
+    )
+
+    with pytest.raises(
+        ValueError,
+        match="arg sample_weight is not supported, use arg sample.weight instead",
+    ):
+        model_ranker.fit(
+            sample=iris_sample_multi_class, sample_weight=iris_sample_multi_class.weight
+        )
+
+    model_ranker.fit(sample=iris_sample_multi_class)
+
+    ranking = model_ranker.summary_report()
+
+    log.debug(f"\n{ranking}")
+
+    check_ranking(
+        ranking=ranking,
+        is_classifier=True,
+        scores_expected=expected_learner_scores,
+        params_expected={
+            2: dict(min_samples_leaf=32, n_estimators=50),
+            3: dict(min_samples_leaf=32, n_estimators=80),
+        },
+    )

From fde865735bf6bc547620c24bf219cb748814f72c Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Thu, 3 Feb 2022 12:29:27 +0100
Subject: [PATCH 082/106] DEBUG: remove a debug message

---
 src/facet/selection/_selection.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index e1df9a60..0d6f1e39 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -421,7 +421,6 @@ def _get_scorer(
 
         elif isinstance(scoring, str):
             scorer = get_scorer(scoring)
-            print(f"{scoring} --> {scorer}")
 
         # noinspection PyPep8Naming
         def _scorer_fn(estimator: EstimatorDF, X: pd.DataFrame, y: pd.Series) -> float:

From 02d02fcdec9a5604255c189084df90f432bfd14f Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Thu, 3 Feb 2022 12:56:24 +0100
Subject: [PATCH 083/106] API: remove module facet.crossfit

---
 src/facet/crossfit/__init__.py  |  12 -
 src/facet/crossfit/_crossfit.py | 598 --------------------------------
 2 files changed, 610 deletions(-)
 delete mode 100644 src/facet/crossfit/__init__.py
 delete mode 100644 src/facet/crossfit/_crossfit.py

diff --git a/src/facet/crossfit/__init__.py b/src/facet/crossfit/__init__.py
deleted file mode 100644
index b7066746..00000000
--- a/src/facet/crossfit/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-"""
-Meta-estimator containing a fitted estimator for each cross-validation training
-split; used as the basis for learner selection and inspection.
-
-:class:`.LearnerCrossfit` encapsulates a fully trained pipeline.
-It contains a :class:`~.sklearndf.LearnerPipelineDF` (preprocessing and learner),
-a dataset given by a :class:`.Sample` object, and a
-cross-validator.
-The pipeline is fitted accordingly.
-"""
-
-from ._crossfit import *
diff --git a/src/facet/crossfit/_crossfit.py b/src/facet/crossfit/_crossfit.py
deleted file mode 100644
index e6e1c3b5..00000000
--- a/src/facet/crossfit/_crossfit.py
+++ /dev/null
@@ -1,598 +0,0 @@
-"""
-Core implementation of :mod:`facet.crossfit`
-"""
-
-import logging
-from abc import ABCMeta
-from copy import copy
-from typing import (
-    Any,
-    Callable,
-    Container,
-    Dict,
-    Generic,
-    Iterable,
-    Iterator,
-    List,
-    NamedTuple,
-    Optional,
-    Sequence,
-    Tuple,
-    TypeVar,
-    Union,
-)
-
-import numpy as np
-import pandas as pd
-from numpy.random.mtrand import RandomState
-from sklearn.base import BaseEstimator
-from sklearn.metrics import check_scoring
-from sklearn.model_selection import BaseCrossValidator
-
-from pytools.api import AllTracker, inheritdoc
-from pytools.fit import FittableMixin
-from pytools.parallelization import Job, JobQueue, JobRunner, ParallelizableMixin
-from sklearndf import LearnerDF, TransformerDF
-from sklearndf.pipeline import (
-    ClassifierPipelineDF,
-    LearnerPipelineDF,
-    RegressorPipelineDF,
-)
-
-from facet.data import Sample
-
-log = logging.getLogger(__name__)
-
-__all__ = ["LearnerCrossfit"]
-
-#
-# Type variables
-#
-
-T_Self = TypeVar("T_Self")
-T_LearnerPipelineDF = TypeVar("T_LearnerPipelineDF", bound=LearnerPipelineDF)
-T_ClassifierPipelineDF = TypeVar("T_ClassifierPipelineDF", bound=ClassifierPipelineDF)
-T_RegressorPipelineDF = TypeVar("T_RegressorPipelineDF", bound=RegressorPipelineDF)
-
-
-#
-# Type aliases
-#
-
-
-# a scorer generated by :func:`sklearn.metrics.make_scorer`
-Scorer = Callable[
-    [
-        # trained learner to use for scoring
-        BaseEstimator,
-        # test data that will be fed to the learner
-        pd.DataFrame,
-        # target values for X
-        Union[pd.Series, pd.DataFrame],
-        # sample weights
-        Optional[pd.Series],
-    ],
-    # result of applying score function to estimator applied to X
-    float,
-]
-
-# the result of calling a fit/score job
-FitResult = Tuple[Optional[LearnerPipelineDF], Optional[float]]
-
-
-#
-# Ensure all symbols introduced below are included in __all__
-#
-
-__tracker = AllTracker(globals())
-
-
-#
-# Class definitions
-#
-
-
-class _FitScoreParameters(NamedTuple):
-    pipeline: LearnerPipelineDF
-
-    # fit parameters
-    train_features: Optional[pd.DataFrame]
-    train_target: Union[pd.Series, pd.DataFrame, None]
-    train_weight: Optional[pd.Series]
-
-    # score parameters
-    scorer: Optional[Scorer] = None
-    score_train_split: bool = False
-    test_features: Optional[pd.DataFrame] = None
-    test_target: Union[pd.Series, pd.DataFrame, None] = None
-    test_weight: Optional[pd.Series] = None
-
-
-@inheritdoc(match="[see superclass]")
-class LearnerCrossfit(
-    FittableMixin[Sample],
-    ParallelizableMixin,
-    Generic[T_LearnerPipelineDF],
-    metaclass=ABCMeta,
-):
-    """
-    Fits a learner pipeline to all train splits of a given cross-validation strategy,
-    with optional feature shuffling.
-
-    Feature shuffling can be helpful when fitting models with a data set that contains
-    very similar features.
-    For such groups of similar features, some learners may pick features based on their
-    relative position in the training data table.
-    Feature shuffling randomizes the sequence of features for each cross-validation
-    training sample, thus ensuring that all similar features have the same chance of
-    being used across crossfits.
-
-    Feature shuffling is active by default, so that every model is trained on a random
-    permutation of the feature columns to avoid favouring one of several similar
-    features based on column sequence.
-    """
-
-    __NO_SCORING = "<no scoring>"
-
-    def __init__(
-        self,
-        pipeline: T_LearnerPipelineDF,
-        cv: BaseCrossValidator,
-        *,
-        random_state: Union[int, RandomState, None] = None,
-        n_jobs: Optional[int] = None,
-        shared_memory: Optional[bool] = None,
-        pre_dispatch: Optional[Union[str, int]] = None,
-        verbose: Optional[int] = None,
-    ) -> None:
-        """
-        :param pipeline: learner pipeline to be fitted
-        :param cv: the cross-validator generating the train splits
-        :param random_state: optional random seed or random state for shuffling the
-            feature column order
-        """
-        super().__init__(
-            n_jobs=n_jobs,
-            shared_memory=shared_memory,
-            pre_dispatch=pre_dispatch,
-            verbose=verbose,
-        )
-
-        if not isinstance(pipeline, LearnerPipelineDF):
-            raise TypeError("arg pipeline must be a LearnerPipelineDF")
-        self.pipeline: T_LearnerPipelineDF = pipeline.clone()
-
-        if not hasattr(cv, "split"):
-            raise TypeError(
-                "arg cv must be a cross-validator implementing method split()"
-            )
-        self.cv = cv
-
-        self.random_state = random_state
-
-        self._splits: Optional[Sequence[Tuple[Sequence[int], Sequence[int]]]] = None
-        self._model_by_split: Optional[Sequence[T_LearnerPipelineDF]] = None
-        self._sample: Optional[Sample] = None
-
-    __init__.__doc__ += ParallelizableMixin.__init__.__doc__
-
-    @property
-    def is_fitted(self) -> bool:
-        """[see superclass]"""
-        return self._sample is not None
-
-    @property
-    def n_splits_(self) -> int:
-        """
-        The number of fits in this crossfit.
-        """
-        self._ensure_fitted()
-        return len(self._model_by_split)
-
-    @property
-    def sample_(self) -> Sample:
-        """
-        The sample used to train this crossfit.
-        """
-        self._ensure_fitted()
-        return self._sample
-
-    def fit(self: T_Self, sample: Sample, **fit_params: Any) -> T_Self:
-        """
-        Fit the underlying pipeline to the full sample, and fit clones of the pipeline
-        to each of the train splits generated by the cross-validator.
-
-        :param sample: the sample to fit the estimators to; if the sample
-            weights these are passed on to the learner as keyword argument
-            ``sample_weight``
-        :param fit_params: optional fit parameters, to be passed on to the fit method
-            of the base estimator
-        :return: ``self``
-        """
-
-        self: LearnerCrossfit  # support type hinting in PyCharm
-
-        # un-fit this instance so we have a defined state in case of an exception
-        self._reset_fit()
-
-        self._run(self._fit_score_queue(_sample=sample, **fit_params))
-
-        return self
-
-    def score(
-        self,
-        scoring: Union[str, Callable[[float, float], float], None] = None,
-        train_scores: bool = False,
-    ) -> np.ndarray:
-        """
-        Score all models in this crossfit using the given scoring function.
-
-        The crossfit must already be fitted, see :meth:`.fit`.
-
-        :param scoring: scoring to use to score the models (see
-            :func:`~sklearn.metrics.check_scoring` for details); if the crossfit
-            was fitted using sample weights, these are passed on to the scoring
-            function as keyword argument ``sample_weight``
-        :param train_scores: if ``True``, calculate train scores instead of test
-            scores (default: ``False``)
-        :return: the resulting scores as a 1d numpy array
-        """
-
-        return self._run(
-            self._fit_score_queue(_scoring=scoring, _train_scores=train_scores)
-        )
-
-    def fit_score(
-        self,
-        sample: Sample,
-        scoring: Union[str, Callable[[float, float], float], None] = None,
-        train_scores: bool = False,
-        **fit_params: Any,
-    ) -> np.ndarray:
-        """
-        Fit then score this crossfit.
-
-        See :meth:`.fit` and :meth:`.score` for details.
-
-        :param sample: the sample to fit the estimators to; if the sample
-            weights these are passed on to the learner and scoring function as
-            keyword argument ``sample_weight``
-        :param scoring: scoring function to use to score the models
-            (see :func:`~sklearn.metrics.check_scoring` for details)
-        :param train_scores: if ``True``, calculate train scores instead of test
-            scores (default: ``False``)
-        :param fit_params: optional fit parameters, to be passed on to the fit method
-            of the learner
-        :return: the resulting scores
-        """
-
-        # un-fit this instance so we have a defined state in case of an exception
-        self._reset_fit()
-
-        return self._run(
-            self._fit_score_queue(
-                _sample=sample,
-                _scoring=scoring,
-                _train_scores=train_scores,
-                **fit_params,
-            )
-        )
-
-    def fit_score_queue(
-        self,
-        sample: Sample,
-        scoring: Union[str, Callable[[float, float], float], None] = None,
-        train_scores: bool = False,
-        **fit_params: Any,
-    ) -> JobQueue[FitResult, Optional[np.ndarray]]:
-        """
-        Create a :class:`pytools.parallelization.JobQueue` that fits then scores this
-        crossfit.
-
-        See :meth:`.fit` and :meth:`.score` for details on fitting and scoring.
-
-        :param sample: the sample to fit the estimators to; if the sample
-            weights these are passed on to the learner and scoring function as
-            keyword argument ``sample_weight``
-        :param scoring: scoring function to use to score the models
-            (see :func:`~sklearn.metrics.check_scoring` for details)
-        :param train_scores: if ``True``, calculate train scores instead of test
-            scores (default: ``False``)
-        :param fit_params: optional fit parameters, to be passed on to the fit method
-            of the learner
-        :return: the job queue
-        """
-
-        return self._fit_score_queue(
-            _sample=sample,
-            _scoring=scoring,
-            _train_scores=train_scores,
-            **fit_params,
-        )
-
-    def resize(self: T_Self, n_splits: int) -> T_Self:
-        """
-        Reduce the size of this crossfit by removing a subset of the fits.
-
-        :param n_splits: the number of fits to keep. Must be lower, or equal to, the
-            current number of fits
-        :return: ``self``
-        """
-        self: LearnerCrossfit
-
-        # ensure that arg n_split has a valid value
-        if n_splits > self.n_splits_:
-            raise ValueError(
-                f"arg n_splits={n_splits} must not be greater than the number of fits"
-                f"in the original crossfit ({self.n_splits_} fits)"
-            )
-        elif n_splits < 1:
-            raise ValueError(f"arg n_splits={n_splits} must be a positive integer")
-
-        # copy self and only keep the specified number of fits
-        new_crossfit = copy(self)
-        new_crossfit._model_by_split = self._model_by_split[:n_splits]
-        new_crossfit._splits = self._splits[:n_splits]
-        return new_crossfit
-
-    def splits(self) -> Iterator[Tuple[Sequence[int], Sequence[int]]]:
-        """
-        Get an iterator of all train/test splits used by this crossfit.
-
-        :return: an iterator of all train/test splits used by this crossfit
-        """
-        self._ensure_fitted()
-
-        # ensure we do not return more splits than we have fitted models
-        # this is relevant if this is a resized learner crossfit
-        return iter(self._splits)
-
-    def models(self) -> Iterator[T_LearnerPipelineDF]:
-        """
-        Get an iterator of all models fitted on the cross-validation train splits.
-
-        :return: an iterator of all models fitted on the cross-validation train splits
-        """
-        self._ensure_fitted()
-        return iter(self._model_by_split)
-
-    # noinspection PyPep8Naming
-    def _fit_score_queue(
-        self,
-        _sample: Optional[Sample] = None,
-        _scoring: Union[str, Callable[[float, float], float], None] = __NO_SCORING,
-        _train_scores: bool = False,
-        sample_weight: pd.Series = None,
-        **fit_params,
-    ) -> JobQueue[FitResult, Optional[np.ndarray]]:
-
-        if sample_weight is not None:
-            raise ValueError(
-                "do not use arg sample_weight to pass sample weights; "
-                "specify a weight column in class Sample instead"
-            )
-
-        do_fit = _sample is not None
-        do_score = _scoring is not LearnerCrossfit.__NO_SCORING
-
-        assert do_fit or do_score, "at least one of fitting or scoring is enabled"
-
-        pipeline = self.pipeline
-
-        if not do_fit:
-            _sample = self.sample_
-
-        sample_weight = _sample.weight
-
-        features = _sample.features
-        target = _sample.target
-
-        global_fit: Optional[Job[FitResult]]
-        if do_fit:
-            global_fit = _FitModelOnFullData(
-                parameters=_FitScoreParameters(
-                    pipeline=pipeline,
-                    train_features=features,
-                    train_target=target,
-                    train_weight=sample_weight,
-                ),
-                fit_params=fit_params,
-            )
-        else:
-            global_fit = None
-
-        # prepare scoring
-
-        scorer: Optional[Scorer]
-
-        if do_score:
-            if not isinstance(_scoring, str) and isinstance(_scoring, Container):
-                raise ValueError(
-                    "Multi-metric scoring is not supported, "
-                    "use a single scorer instead; "
-                    f"arg scoring={_scoring} was passed"
-                )
-
-            scorer = check_scoring(
-                estimator=self.pipeline.final_estimator.native_estimator,
-                scoring=_scoring,
-            )
-        else:
-            scorer = None
-
-        # calculate the splits: we need to preserve them as we cannot rely on the
-        # cross-validator being deterministic
-
-        if do_fit:
-            splits: Sequence[Tuple[Sequence[int], Sequence[int]]] = tuple(
-                self.cv.split(X=features, y=target)
-            )
-        else:
-            splits = self._splits
-
-        # generate parameter objects for fitting and/or scoring each split
-
-        def _generate_parameters() -> Iterator[_FitScoreParameters]:
-            test_scores: bool = do_score and not _train_scores
-            models: Iterable[T_LearnerPipelineDF] = (
-                iter(lambda: None, 0) if do_fit else self.models()
-            )
-            weigh_samples: bool = sample_weight is not None
-
-            for (train, test), model in zip(splits, models):
-                yield _FitScoreParameters(
-                    pipeline=pipeline.clone() if do_fit else model,
-                    train_features=(
-                        features.iloc[train] if do_fit or _train_scores else None
-                    ),
-                    train_target=target.iloc[train] if do_fit else None,
-                    train_weight=(
-                        sample_weight.iloc[train]
-                        if weigh_samples and (do_fit or _train_scores)
-                        else None
-                    ),
-                    scorer=scorer,
-                    score_train_split=_train_scores,
-                    test_features=features.iloc[test] if test_scores else None,
-                    test_target=target.iloc[test] if test_scores else None,
-                    test_weight=(
-                        sample_weight.iloc[test]
-                        if weigh_samples and test_scores
-                        else None
-                    ),
-                )
-
-        crossfit = self
-
-        # noinspection PyMissingOrEmptyDocstring
-        class _FitScoreQueue(JobQueue[FitResult, Optional[np.ndarray]]):
-            def jobs(self) -> Iterable[Job[FitResult]]:
-                splits_fit = (
-                    _FitAndScoreModelForSplit(parameters, fit_params)
-                    for parameters in _generate_parameters()
-                )
-                if do_fit:
-                    return (global_fit, *splits_fit)
-                else:
-                    return splits_fit
-
-            def on_run(self) -> None:
-                if do_fit:
-                    crossfit._reset_fit()
-
-            def aggregate(self, job_results: List[FitResult]) -> Optional[np.ndarray]:
-                models, scores = zip(*job_results)
-
-                if do_fit:
-                    crossfit.pipeline = models[0]
-                    assert scores[0] is None
-                    scores = scores[1:]
-                    crossfit._splits = splits
-                    crossfit._model_by_split = models[1:]
-                    crossfit._sample = _sample
-
-                return np.array(scores) if do_score else None
-
-            def __len__(self) -> int:
-                return len(splits) + int(do_fit)
-
-        return _FitScoreQueue()
-
-    def _run(
-        self, queue: JobQueue[FitResult, Optional[np.ndarray]]
-    ) -> Optional[np.ndarray]:
-        return JobRunner.from_parallelizable(self).run_queue(queue)
-
-    def _reset_fit(self) -> None:
-        self._sample = None
-        self._splits = None
-        self._model_by_split = None
-
-    def __len__(self) -> int:
-        return self.n_splits_
-
-
-class _BaseFitAndScore(Job[FitResult], metaclass=ABCMeta):
-    def __init__(
-        self, parameters: _FitScoreParameters, fit_params: Dict[str, Any]
-    ) -> None:
-        self.parameters = parameters
-        self.fit_params = fit_params
-
-
-class _FitAndScoreModelForSplit(_BaseFitAndScore):
-    def run(self) -> FitResult:
-        """
-        Fit and/or score a learner pipeline.
-
-        :return: a tuple with the the fitted pipeline and the score
-        """
-        parameters = self.parameters
-
-        do_fit = parameters.train_target is not None
-        do_score = parameters.scorer is not None
-
-        pipeline: LearnerPipelineDF
-
-        if do_fit:
-            pipeline = parameters.pipeline.fit(
-                X=parameters.train_features,
-                y=parameters.train_target,
-                sample_weight=parameters.train_weight,
-                **self.fit_params,
-            )
-
-        else:
-            pipeline = parameters.pipeline
-
-        score: Optional[float]
-
-        if do_score:
-            preprocessing: TransformerDF = pipeline.preprocessing
-            learner: LearnerDF = pipeline.final_estimator
-
-            if parameters.score_train_split:
-                features = parameters.train_features
-                target = parameters.train_target
-                weight = parameters.train_weight
-            else:
-                features = parameters.test_features
-                target = parameters.test_target
-                weight = parameters.test_weight
-
-            if preprocessing:
-                features = preprocessing.transform(X=features)
-
-            score = parameters.scorer(
-                learner.native_estimator, features, target, weight
-            )
-
-        else:
-            score = None
-
-        return pipeline if do_fit else None, score
-
-
-class _FitModelOnFullData(_BaseFitAndScore):
-    # noinspection PyMissingOrEmptyDocstring
-    def run(self) -> FitResult:
-        parameters = self.parameters
-        pipeline = parameters.pipeline
-
-        if parameters.train_weight is None:
-            pipeline.fit(
-                X=parameters.train_features,
-                y=parameters.train_target,
-                **self.fit_params,
-            )
-        else:
-            pipeline.fit(
-                X=parameters.train_features,
-                y=parameters.train_target,
-                sample_weight=parameters.train_weight,
-                **self.fit_params,
-            )
-        return (pipeline, None)
-
-
-__tracker.validate()

From 7991b5814fd299d70eaf656bdad301d3917e5e5f Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Thu, 3 Feb 2022 13:02:48 +0100
Subject: [PATCH 084/106] API: rename LearnerRanker param searcher_factory to
 searcher_type

---
 src/facet/selection/_selection.py | 20 ++++++++++----------
 test/test/conftest.py             |  4 ++--
 test/test/facet/test_selection.py |  8 ++++----
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index 0d6f1e39..fa48db37 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -122,7 +122,7 @@ class LearnerRanker(
 
     def __init__(
         self,
-        searcher_factory: Callable[..., T_SearchCV],
+        searcher_type: Callable[..., T_SearchCV],
         parameter_space: BaseParameterSpace,
         *,
         cv: Optional[BaseCrossValidator] = None,
@@ -142,7 +142,7 @@ def __init__(
         **searcher_params: Any,
     ) -> None:
         """
-        :param searcher_factory: a cross-validation searcher class, or any other
+        :param searcher_type: a cross-validation searcher class, or any other
             callable that instantiates a cross-validation searcher
         :param parameter_space: the parameter space to search
         :param cv: a cross validator (e.g.,
@@ -166,7 +166,7 @@ def __init__(
             verbose=verbose,
         )
 
-        self.searcher_factory = searcher_factory
+        self.searcher_type = searcher_type
         self.parameter_space = parameter_space
         self.cv = cv
         self.scoring = scoring
@@ -177,13 +177,13 @@ def __init__(
         # validate parameters for the searcher factory
         #
 
-        if not callable(searcher_factory):
+        if not callable(searcher_type):
             raise TypeError(
-                "arg searcher_factory expected to be a callable, "
-                f"but is a {type(searcher_factory).__name__}"
+                "arg searcher_type expected to be a callable, "
+                f"but is a {type(searcher_type).__name__}"
             )
 
-        searcher_factory_params = inspect.signature(searcher_factory).parameters.keys()
+        searcher_factory_params = inspect.signature(searcher_type).parameters.keys()
 
         # raise an error if the searcher params include the searcher's first two
         # positional arguments
@@ -194,7 +194,7 @@ def __init__(
         if reserved_params_overrides:
             raise ValueError(
                 "arg searcher_params must not include the first two positional "
-                "arguments of arg searcher_factory, but included: "
+                "arguments of arg searcher_type, but included: "
                 + ", ".join(reserved_params_overrides)
             )
 
@@ -205,7 +205,7 @@ def __init__(
 
         if unsupported_params:
             raise TypeError(
-                "parameters not supported by arg searcher_factory: "
+                "parameters not supported by arg searcher_type: "
                 + ", ".join(unsupported_params)
             )
 
@@ -282,7 +282,7 @@ def fit(
 
         parameter_space = self.parameter_space
         searcher: BaseSearchCV
-        searcher = self.searcher_ = self.searcher_factory(
+        searcher = self.searcher_ = self.searcher_type(
             parameter_space.estimator,
             parameter_space.parameters,
             **self._get_searcher_parameters(),
diff --git a/test/test/conftest.py b/test/test/conftest.py
index 320e57ce..18c54c9a 100644
--- a/test/test/conftest.py
+++ b/test/test/conftest.py
@@ -166,7 +166,7 @@ def regressor_ranker(
     n_jobs: int,
 ) -> LearnerRanker[RegressorPipelineDF, GridSearchCV]:
     return LearnerRanker(
-        searcher_factory=GridSearchCV,
+        searcher_type=GridSearchCV,
         parameter_space=regressor_parameters,
         cv=cv_kfold,
         scoring="r2",
@@ -473,7 +473,7 @@ def fit_classifier_ranker(
     # pipeline inspector only supports binary classification,
     # therefore filter the sample down to only 2 target classes
     return LearnerRanker(
-        searcher_factory=GridSearchCV,
+        searcher_type=GridSearchCV,
         parameter_space=parameter_space,
         cv=cv,
         scoring="f1_macro",
diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
index 466ee202..64f320da 100644
--- a/test/test/facet/test_selection.py
+++ b/test/test/facet/test_selection.py
@@ -75,7 +75,7 @@ def test_model_ranker(
     cv = BootstrapCV(n_splits=5, random_state=42)
 
     ranker: LearnerRanker[RegressorPipelineDF, GridSearchCV] = LearnerRanker(
-        searcher_factory=GridSearchCV,
+        searcher_type=GridSearchCV,
         parameter_space=regressor_parameters,
         cv=cv,
         scoring="r2",
@@ -129,7 +129,7 @@ def test_model_ranker_no_preprocessing(n_jobs) -> None:
     model_ranker: LearnerRanker[
         ClassifierPipelineDF[SVCDF], GridSearchCV
     ] = LearnerRanker(
-        searcher_factory=GridSearchCV,
+        searcher_type=GridSearchCV,
         parameter_space=parameter_space,
         cv=cv,
         n_jobs=n_jobs,
@@ -289,7 +289,7 @@ def test_learner_ranker_regression(
         ValueError,
         match=(
             "arg searcher_params must not include the first two positional arguments "
-            "of arg searcher_factory, but included: param_grid"
+            "of arg searcher_type, but included: param_grid"
         ),
     ):
         LearnerRanker(GridSearchCV, regressor_parameters, param_grid=None)
@@ -349,7 +349,7 @@ def test_learner_ranker_classification(
     model_ranker: LearnerRanker[
         ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV
     ] = LearnerRanker(
-        searcher_factory=GridSearchCV,
+        searcher_type=GridSearchCV,
         parameter_space=ps1,
         cv=cv_stratified_bootstrap,
         scoring="f1_macro",

From 968e5f1f73f0aba947ae7fdd9044fd7b57d38411 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Thu, 3 Feb 2022 13:12:08 +0100
Subject: [PATCH 085/106] API: eliminate LearnerRanker parameter random_state

---
 src/facet/selection/_selection.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index fa48db37..0cae6039 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -21,7 +21,6 @@
 
 import numpy as np
 import pandas as pd
-from numpy.random.mtrand import RandomState
 from sklearn.metrics import get_scorer
 from sklearn.model_selection import BaseCrossValidator, GridSearchCV
 
@@ -134,7 +133,6 @@ def __init__(
             ],
             None,
         ] = None,
-        random_state: Union[int, RandomState, None] = None,
         n_jobs: Optional[int] = None,
         shared_memory: Optional[bool] = None,
         pre_dispatch: Optional[Union[str, int]] = None,
@@ -151,8 +149,6 @@ def __init__(
             learners (optional; use learner's default scorer if not specified here).
             If passing a callable, the ``"score"`` will be used as the name of the
             scoring function unless the callable defines a ``__name__`` attribute
-        :param random_state: optional random seed or random state for shuffling the
-            feature column order
         %%PARALLELIZABLE_PARAMS%%
         :param searcher_params: additional parameters to be passed on to the searcher;
             must not include the first two positional arguments of the searcher
@@ -170,7 +166,6 @@ def __init__(
         self.parameter_space = parameter_space
         self.cv = cv
         self.scoring = scoring
-        self.random_state = random_state
         self.searcher_params = searcher_params
 
         #
@@ -400,7 +395,6 @@ def _get_searcher_parameters(self) -> Dict[str, Any]:
                 for k, v in dict(
                     cv=self.cv,
                     scoring=self._get_scorer(),
-                    random_state=self.random_state,
                     n_jobs=self.n_jobs,
                     shared_memory=self.shared_memory,
                     pre_dispatch=self.pre_dispatch,

From c14272b771c6736058cdc3a941f1b17e022612f6 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Thu, 3 Feb 2022 13:12:54 +0100
Subject: [PATCH 086/106] DOC: add docstrings for LearnerRanker attributes

---
 src/facet/selection/_selection.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index 0cae6039..766d91a0 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -97,6 +97,30 @@ class LearnerRanker(
     algorithm and optimize hyper-parameters.
     """
 
+    #: A cross-validation searcher class, or any other callable
+    #: that instantiates a cross-validation searcher.
+    searcher_type: Callable[..., T_SearchCV]
+
+    #: The parameter space to search.
+    parameter_space: BaseParameterSpace
+
+    #: The cross-validator to be used by the searcher.
+    cv: Optional[BaseCrossValidator]
+
+    #: The scoring function (by name, or as a callable) to be used by the searcher
+    #: (optional; use learner's default scorer if not specified here)
+    scoring: Union[
+        str,
+        Callable[
+            [EstimatorDF, pd.Series, pd.Series],
+            float,
+        ],
+        None,
+    ]
+
+    #: Additional parameters to be passed on to the searcher.
+    searcher_params: Dict[str, Any]
+
     #: The searcher used to fit this LearnerRanker; ``None`` if not fitted.
     searcher_: Optional[T_SearchCV]
 

From 3392b0766c93ddf88542f4fbcb561b85e4d18dc7 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Thu, 3 Feb 2022 13:13:11 +0100
Subject: [PATCH 087/106] DOC: tweak a docstring

---
 src/facet/selection/_selection.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index 766d91a0..e2d9ef8e 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -167,17 +167,17 @@ def __init__(
         :param searcher_type: a cross-validation searcher class, or any other
             callable that instantiates a cross-validation searcher
         :param parameter_space: the parameter space to search
-        :param cv: a cross validator (e.g.,
-            :class:`.BootstrapCV`)
-        :param scoring: a scoring function (by name, or as a callable) for evaluating
-            learners (optional; use learner's default scorer if not specified here).
-            If passing a callable, the ``"score"`` will be used as the name of the
+        :param cv: the cross-validator to be used by the searcher
+            (e.g., :class:`~sklearn.model_selection.RepeatedKFold`)
+        :param scoring: a scoring function (by name, or as a callable) to be used by the
+            searcher (optional; use learner's default scorer if not specified here).
+            If passing a callable, ``"score"`` will be used as the name of the
             scoring function unless the callable defines a ``__name__`` attribute
         %%PARALLELIZABLE_PARAMS%%
         :param searcher_params: additional parameters to be passed on to the searcher;
             must not include the first two positional arguments of the searcher
             constructor used to pass the estimator and the search space, since these
-            will be populated using arg ``parameter_space``
+            will be populated from arg ``parameter_space``
         """
         super().__init__(
             n_jobs=n_jobs,

From b88aa9d17d5e553e33eebbcb97ac0bdf8c52f880 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Thu, 3 Feb 2022 23:07:04 +0100
Subject: [PATCH 088/106] DOC: update release notes

---
 RELEASE_NOTES.rst | 37 ++++++++++++++++++++++++++++++++++---
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst
index 67a23ebb..41cb6b10 100644
--- a/RELEASE_NOTES.rst
+++ b/RELEASE_NOTES.rst
@@ -7,6 +7,19 @@ FACET 2.0
 2.0.0
 ~~~~~
 
+``facet.data``
+^^^^^^^^^^^^^^
+
+- API: class :class:`.RangePartitioner` adds optional arguments ``lower_bound`` and
+  ``upper_bound`` to method :meth:`~.RangePartitioner.fit` and no longer accepts them
+  in the class initializer
+
+``facet.inspection``
+^^^^^^^^^^^^^^^^^^^
+
+- API: :class:`.LearnerInspector` no longer depends on learner crossfits and instead
+  inspects models using a single pass of SHAP calculations, usually leading to
+  performance gains of up to a factor of 50
 - API: return :class:`.LearnerInspector` matrix outputs as :class:`.Matrix` instances
 - API: diagonals of feature synergy, redundancy, and association matrices are now
   ``nan`` instead of 1.0
@@ -17,12 +30,30 @@ FACET 2.0
   between adjacent leaves
   The old sorting behaviour of FACET 1.x can be restored using method
   :meth:`.LinkageTree.sort_by_weight`
-- API: class :class:`.RangePartitioner` adds optional arguments ``lower_bound`` and
-  ``upper_bound`` to method :meth:`~.RangePartitioner.fit` and no longer accepts them
-  in the class initializer
+
+``facet.selection``
+^^^^^^^^^^^^^^^^^^^
+
+- API: :class:`.LearnerRanker` completely rewritten to work with native *scikit-learn*
+  searchers such as :class:`.GridSearchCV` or :class:`.RandomizedSearchCV`
+- API: new classes :class:`.ParameterSpace` and :class:`MultiParameterSpace` offer an
+  easier and less error-prone method of defining search spaces for hyperparameter
+  tuning.
+
+``facet.simulation``
+^^^^^^^^^^^^^^^^^^^^
+
+- API: simulations no longer depend on learner crossfits and instead are carried out
+  as a single pass on the full dataset, using the standard error of mean predictions
+  to obtain confidence intervals that are less conservative and more realistic
 - VIZ: minor tweaks to simulation plots and reports generated by
   :class:`.SimulationDrawer`
 
+Other
+^^^^^
+
+- API: class ``LearnerCrossfit`` is no longer used in FACET 2.0 and has been removed
+
 
 FACET 1.2
 ---------

From 43a7c6c126fedaeec6fb1eb51fd61fe211855848 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 15 Feb 2022 17:36:09 +0100
Subject: [PATCH 089/106] FIX: honour arg prefix in
 MultiEstimatorParameterSpace.get_parameters()

---
 src/facet/selection/_parameters.py | 14 +++++++++++---
 test/test/facet/test_selection.py  | 23 ++++++++++++++++++-----
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index 081bae82..e09977ba 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -310,11 +310,19 @@ def __init__(self, *spaces: ParameterSpace[T_Candidate_co]) -> None:
     )
     def get_parameters(self, prefix: Optional[str] = None) -> List[ParameterDict]:
         """[see superclass]"""
+
+        if prefix is None:
+            prefix = ""
+            candidate_prefixed = CandidateEstimatorDF.PARAM_CANDIDATE
+        else:
+            prefix = f"{prefix}__"
+            candidate_prefixed = prefix + CandidateEstimatorDF.PARAM_CANDIDATE
+
         return [
             {
-                CandidateEstimatorDF.PARAM_CANDIDATE: [space.estimator],
-                CandidateEstimatorDF.PARAM_CANDIDATE_NAME: [space.get_name()],
-                **space.get_parameters(prefix=CandidateEstimatorDF.PARAM_CANDIDATE),
+                candidate_prefixed: [space.estimator],
+                prefix + CandidateEstimatorDF.PARAM_CANDIDATE_NAME: [space.get_name()],
+                **space.get_parameters(prefix=candidate_prefixed),
             }
             for space in self.spaces
         ]
diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
index 64f320da..0edc81df 100644
--- a/test/test/facet/test_selection.py
+++ b/test/test/facet/test_selection.py
@@ -36,7 +36,6 @@ def test_model_ranker(
     sample: Sample,
     n_jobs: int,
 ) -> None:
-
     expected_scores = [
         0.840,
         0.837,
@@ -105,7 +104,6 @@ def test_model_ranker(
 
 
 def test_model_ranker_no_preprocessing(n_jobs) -> None:
-
     expected_learner_scores = [0.961, 0.957, 0.957, 0.936]
 
     # define a yield-engine circular CV:
@@ -158,7 +156,6 @@ def test_model_ranker_no_preprocessing(n_jobs) -> None:
 def test_parameter_space(
     sample: Sample, simple_preprocessor: TransformerDF, n_jobs: int
 ) -> None:
-
     # distributions
 
     randint_3_10 = randint(3, 10)
@@ -275,13 +272,30 @@ def regressor_repr(model: Id):
         },
     ]
 
+    assert mps.get_parameters("my_prefix") == [
+        {
+            "my_prefix__candidate": [pipeline_1],
+            "my_prefix__candidate_name": [ps_1_name],
+            "my_prefix__candidate__regressor__max_depth": randint_3_10,
+            "my_prefix__candidate__regressor__min_samples_leaf": loguniform_0_05_0_10,
+            (
+                "my_prefix__candidate__regressor__min_weight_fraction_leaf"
+            ): loguniform_0_01_0_10,
+        },
+        {
+            "my_prefix__candidate": [pipeline_2],
+            "my_prefix__candidate_name": [ps_2_name],
+            "my_prefix__candidate__regressor__max_depth": randint_3_10,
+            "my_prefix__candidate__regressor__min_child_samples": zipfian_1_32,
+        },
+    ]
+
 
 def test_learner_ranker_regression(
     regressor_parameters: MultiEstimatorParameterSpace[RegressorPipelineDF],
     sample: Sample,
     n_jobs: int,
 ) -> None:
-
     # define the circular cross validator with just 5 splits (to speed up testing)
     cv = BootstrapCV(n_splits=5, random_state=42)
 
@@ -320,7 +334,6 @@ def test_learner_ranker_regression(
 def test_learner_ranker_classification(
     iris_sample_multi_class, cv_stratified_bootstrap: StratifiedBootstrapCV, n_jobs: int
 ) -> None:
-
     expected_learner_scores = [0.965, 0.964, 0.957, 0.956]
 
     # define parameters and crossfit

From c79b53f4b08958ac9373c462d7c5d4328c994476 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 15 Feb 2022 19:21:02 +0100
Subject: [PATCH 090/106] FIX: improve type hints of class CandidateEstimatorDF

---
 src/facet/selection/_parameters.py | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index e09977ba..46c0bdac 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -57,9 +57,8 @@
 # Type variables
 #
 
-T_Self = TypeVar("T_Self")
 T_Candidate_co = TypeVar("T_Candidate_co", covariant=True, bound=EstimatorDF)
-
+T_CandidateEstimatorDF = TypeVar("T_CandidateEstimatorDF", bound="CandidateEstimatorDF")
 
 #
 # Ensure all symbols introduced below are included in __all__
@@ -334,12 +333,7 @@ def to_expression(self) -> "Expression":
 
 
 @inheritdoc(match="""[see superclass]""")
-class CandidateEstimatorDF(
-    ClassifierDF,
-    RegressorDF,
-    TransformerDF,
-    Generic[T_Candidate_co],
-):
+class CandidateEstimatorDF(ClassifierDF, RegressorDF, TransformerDF):
     """
     Metaclass providing representation for candidate estimator to be used in
     hyperparameter search. Unifies evaluation approach for :class:`.ParameterSpace`
@@ -354,10 +348,10 @@ class CandidateEstimatorDF(
     PARAM_CANDIDATE_NAME = "candidate_name"
 
     #: The currently selected estimator candidate
-    candidate: T_Candidate_co
+    candidate: Optional[Union[ClassifierDF, RegressorDF, TransformerDF]]
 
     #: The name of the candidate
-    candidate_name: str
+    candidate_name: Optional[str]
 
     def __init__(
         self,
@@ -433,11 +427,11 @@ def fit_predict(
 
     # noinspection PyPep8Naming
     def fit(
-        self: T_Self,
+        self: T_CandidateEstimatorDF,
         X: pd.DataFrame,
         y: Optional[Union[pd.Series, pd.DataFrame]] = None,
         **fit_params: Any,
-    ) -> T_Self:
+    ) -> T_CandidateEstimatorDF:
         """[see superclass]"""
         self.candidate.fit(X, y, **fit_params)
         return self

From ec7f525c87b07b5d73652391d8e895549da8a371 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 15 Feb 2022 19:21:36 +0100
Subject: [PATCH 091/106] REFACTOR: tweak approach to get BaseSearchCV class

---
 src/facet/selection/_selection.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index e2d9ef8e..e47ecc3f 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -48,11 +48,9 @@
 
 # sklearn does not publish base class BaseSearchCV, so we pull it from the MRO
 # of GridSearchCV
-BaseSearchCV = [
-    base_class
-    for base_class in GridSearchCV.mro()
-    if base_class.__name__ == "BaseSearchCV"
-][0]
+BaseSearchCV = next(
+    filter(lambda cls: cls.__name__ == "BaseSearchCV", GridSearchCV.mro())
+)
 
 #
 # Type variables

From 33c0989023f0aba6077fe39edb0f840f64de4077 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 15 Feb 2022 19:25:38 +0100
Subject: [PATCH 092/106] DOC: update docstrings

---
 src/facet/selection/__init__.py         |  6 -----
 src/facet/selection/_parameters.py      | 30 +++++++++++++++----------
 src/facet/selection/_selection.py       | 26 ++++++++++-----------
 src/facet/selection/base/_parameters.py | 14 +++++++-----
 4 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/src/facet/selection/__init__.py b/src/facet/selection/__init__.py
index fd6ae236..23499a38 100644
--- a/src/facet/selection/__init__.py
+++ b/src/facet/selection/__init__.py
@@ -1,11 +1,5 @@
 """
 Learner selection with hyperparameter optimization.
-
-:class:`.LearnerGrid` encapsulates a :class:`~sklearndf.PipelineDF` and a grid of
-hyperparameters.
-
-:class:`.LearnerRanker` selects the best pipeline and parametrization based on the
-pipeline and hyperparameter choices provided as a list of :class:`.LearnerGrid`.
 """
 from ._parameters import *
 from ._selection import *
diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index 46c0bdac..47ac79f7 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -75,15 +75,15 @@
 @inheritdoc(match="""[see superclass]""")
 class ParameterSpace(BaseParameterSpace[T_Candidate_co], Generic[T_Candidate_co]):
     """
-    A set of parameters spanning a parameter space for optimizing the hyper-parameters
-    of a single estimator.
+    A set of parameter choices or distributions spanning a parameter space for
+    optimizing the hyper-parameters of a single estimator.
 
     Parameter spaces provide an easy approach to define and validate search spaces
     for hyper-parameter tuning of ML pipelines using `scikit-learn`'s
     :class:`~sklearn.model_selection.GridSearchCV` and
     :class:`~sklearn.model_selection.RandomizedSearchCV`.
 
-    Parameter lists or distributions to be searched can be set using attribute access,
+    Parameter choices or distributions to be searched can be set using attribute access,
     and will be validated for correct names and values.
 
     Example:
@@ -95,7 +95,7 @@ class ParameterSpace(BaseParameterSpace[T_Candidate_co], Generic[T_Candidate_co]
                 regressor=RandomForestRegressorDF(random_state=42),
                 preprocessing=simple_preprocessor,
             ),
-            candidate_name="rf_candidate"
+            name="random forest",
         )
         ps.regressor.min_weight_fraction_leaf = scipy.stats.loguniform(0.01, 0.1)
         ps.regressor.max_depth = [3, 4, 5, 7, 10]
@@ -118,7 +118,9 @@ class ParameterSpace(BaseParameterSpace[T_Candidate_co], Generic[T_Candidate_co]
     def __init__(self, estimator: T_Candidate_co, name: Optional[str] = None) -> None:
         """
         :param estimator: the estimator candidate to which to apply the parameters to
-        :param name: a name for the estimator candidate to be used in summary reports
+        :param name: a name for the estimator candidate to be used in summary reports;
+            defaults to the type of the estimator, or the type of the final estimator
+            if arg estimator is a pipeline
         """
 
         super().__init__(estimator=estimator)
@@ -301,7 +303,7 @@ def __init__(self, *spaces: ParameterSpace[T_Candidate_co]) -> None:
 
     @subsdoc(
         pattern=(
-            r"a dictionary of parameter distributions,[\n\s]*"
+            r"a dictionary of parameter choices and distributions,[\n\s]*"
             r"or a list of such dictionaries"
         ),
         replacement="a list of dictionaries of parameter distributions",
@@ -335,10 +337,13 @@ def to_expression(self) -> "Expression":
 @inheritdoc(match="""[see superclass]""")
 class CandidateEstimatorDF(ClassifierDF, RegressorDF, TransformerDF):
     """
-    Metaclass providing representation for candidate estimator to be used in
-    hyperparameter search. Unifies evaluation approach for :class:`.ParameterSpace`
-    and class:`.MultiEstimatorParameterSpace`. For the latter it provides "empty"
-    candidate where actual estimator is a hyperparameter itself.
+    A trivial wrapper for classifiers, regressors and transformers, acting
+    like a pipeline with a single step.
+
+    Used in conjunction with :class:`MultiEstimatorParameterSpace` to evaluate multiple
+    competing models: the :attr:`.candidate` parameter determines the estimator to be
+    used and is used to include multiple estimators as part of the parameter space
+    that is searched during model tuning.
     """
 
     #: name of the `candidate` parameter
@@ -347,10 +352,11 @@ class CandidateEstimatorDF(ClassifierDF, RegressorDF, TransformerDF):
     #: name of the `candidate_name` parameter
     PARAM_CANDIDATE_NAME = "candidate_name"
 
-    #: The currently selected estimator candidate
+    #: The currently selected estimator candidate.
     candidate: Optional[Union[ClassifierDF, RegressorDF, TransformerDF]]
 
-    #: The name of the candidate
+    #: The name of the candidate, used for more readable summary reports
+    #: of model tuning results.
     candidate_name: Optional[str]
 
     def __init__(
diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index e47ecc3f..e3695078 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -87,11 +87,13 @@ class LearnerRanker(
     FittableMixin[Sample], ParallelizableMixin, Generic[T_LearnerPipelineDF, T_SearchCV]
 ):
     """
-    Score and rank different parametrizations of one or more learners,
-    using cross-validation.
+    Select the best model obtained through fitting an estimator using different
+    choices of hyper-parameters and/or estimator types obtained from a
+    :class:`.ParameterSpace` or :class:`.MultiEstimatorParameterSpace`, and using
+    a given scoring metric to evaluate the performance of all resulting models.
 
-    The learner ranker can run a simultaneous grid search across multiple alternative
-    learner pipelines, supporting the ability to simultaneously select a learner
+    The learner ranker can run a simultaneous search across multiple alternative
+    estimators, supporting the ability to simultaneously select a learner
     algorithm and optimize hyper-parameters.
     """
 
@@ -240,7 +242,7 @@ def is_fitted(self) -> bool:
     @property
     def best_estimator_(self) -> T_LearnerPipelineDF:
         """
-        The pipeline which obtained the best ranking score, fitted on the entire sample.
+        The model which obtained the best ranking score, fitted on the entire sample.
         """
         self._ensure_fitted()
         searcher = self.searcher_
@@ -264,15 +266,13 @@ def fit(
         **fit_params: Any,
     ) -> T_Self:
         """
-        Rank the candidate learners and their hyper-parameter combinations using
-        crossfits from the given sample.
+        Identify the model with the best-performing hyper-parameter combination using
+        the given sample.
 
-        Other than the scikit-learn implementation of grid search, arbitrary parameters
-        can be passed on to the learner pipeline(s) to be fitted.
-
-        :param sample: the sample from which to fit the crossfits
-        :param groups:
-        :param fit_params: any fit parameters to pass on to the learner's fit method
+        :param sample: the sample used to fit and score the estimators
+        :param groups: group labels for the samples used while splitting the dataset
+            into train/test set; passed on to the ``fit`` method of the searcher
+        :param fit_params: parameters to pass on to the estimator's fit method
         :return: ``self``
         """
         self: LearnerRanker[
diff --git a/src/facet/selection/base/_parameters.py b/src/facet/selection/base/_parameters.py
index f5a6f7ca..59013dae 100644
--- a/src/facet/selection/base/_parameters.py
+++ b/src/facet/selection/base/_parameters.py
@@ -56,7 +56,8 @@ class BaseParameterSpace(HasExpressionRepr, Generic[T_Estimator], metaclass=ABCM
 
     def __init__(self, estimator: T_Estimator) -> None:
         """
-        :param estimator: the estimator for which to capture parameters
+        :param estimator: the estimator for which to specify parameter choices or
+            distributions
         """
         self._estimator = estimator
 
@@ -70,7 +71,7 @@ def estimator(self) -> T_Estimator:
     @property
     def parameters(self) -> Union[List[ParameterDict], ParameterDict]:
         """
-        The parameter sets spanning this parameter space.
+        The parameter choices and distributions that constitute this parameter space.
 
         This is a shortcut for calling method :meth:`.get_parameters` with no
         arguments.
@@ -82,14 +83,15 @@ def get_parameters(
         self, prefix: Optional[str] = None
     ) -> Union[List[ParameterDict], ParameterDict]:
         """
-        Generate a dictionary of parameter distributions,
+        Generate a dictionary of parameter choices and distributions,
         or a list of such dictionaries, compatible with `scikit-learn`'s
         :class:`~sklearn.model_selection.GridSearchCV` and
         :class:`~sklearn.model_selection.RandomizedSearchCV`.
 
-        :param prefix: an optional path prefix to prepend to all paths in the resulting
-            dictionary
-        :return: a dictionary mapping paths to estimator parameters to parameter
+        :param prefix: an optional prefix to prepend to all parameter names in the
+            resulting dictionary, separated by two underscore characters (`__`) as
+            per scikit-learn's convention for hierarchical parameter names
+        :return: a dictionary mapping parameter names to parameter
             distributions
         """
         pass

From 2ddfebe6a7ddcd28c0e4494c933607b23a137fe9 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 15 Feb 2022 21:21:48 +0100
Subject: [PATCH 093/106] REFACTOR: reorder constants for better code
 legibility

---
 src/facet/selection/_selection.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index e3695078..5a664719 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -131,11 +131,14 @@ class LearnerRanker(
         (r"(rank|mean|std)_(\w+)_time", r"time__\2__\1"),
         (r"(rank|mean|std)_(\w+)_(\w+)", r"\3__\2__\1"),
     ]
-
-    _CV_RESULT_CANDIDATE_PATTERN = re.compile(
-        r"^(?:(param__)candidate__|param__(candidate(?:_name)?)$)"
+    # noinspection PyTypeChecker
+    _CV_RESULT_PATTERNS: List[Tuple[Pattern, str]] = [
+        (re.compile(pattern), repl) for pattern, repl in _CV_RESULT_COLUMNS
+    ]
+    _CV_RESULT_CANDIDATE_PATTERN, _CV_RESULT_CANDIDATE_REPL = (
+        re.compile(r"^(?:(param__)candidate__|param__(candidate(?:_name)?)$)"),
+        r"\1\2",
     )
-    _CV_RESULT_CANDIDATE_REPL = r"\1\2"
 
     # noinspection PyTypeChecker
     _CV_RESULT_PATTERNS: List[Tuple[Pattern, str]] = [

From 917675784b0607ddf1b6c3914af092482960a28f Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 15 Feb 2022 21:26:19 +0100
Subject: [PATCH 094/106] API: support all estimators in LearnerRanker, not
 just learner pipelines

---
 src/facet/selection/_selection.py | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index 5a664719..433f0bbf 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -28,11 +28,7 @@
 from pytools.fit import FittableMixin
 from pytools.parallelization import ParallelizableMixin
 from sklearndf import EstimatorDF
-from sklearndf.pipeline import (
-    ClassifierPipelineDF,
-    LearnerPipelineDF,
-    RegressorPipelineDF,
-)
+from sklearndf.pipeline import LearnerPipelineDF
 
 from facet.data import Sample
 from facet.selection import CandidateEstimatorDF
@@ -57,11 +53,7 @@
 #
 
 T_Self = TypeVar("T_Self")
-T_LearnerPipelineDF = TypeVar(
-    "T_LearnerPipelineDF", RegressorPipelineDF, ClassifierPipelineDF
-)
-T_RegressorPipelineDF = TypeVar("T_RegressorPipelineDF", bound=RegressorPipelineDF)
-T_ClassifierPipelineDF = TypeVar("T_ClassifierPipelineDF", bound=ClassifierPipelineDF)
+T_EstimatorDF = TypeVar("T_EstimatorDF", bound=EstimatorDF)
 T_SearchCV = TypeVar("T_SearchCV", bound=BaseSearchCV)
 
 #
@@ -84,7 +76,7 @@
 
 @inheritdoc(match="[see superclass]")
 class LearnerRanker(
-    FittableMixin[Sample], ParallelizableMixin, Generic[T_LearnerPipelineDF, T_SearchCV]
+    FittableMixin[Sample], ParallelizableMixin, Generic[T_EstimatorDF, T_SearchCV]
 ):
     """
     Select the best model obtained through fitting an estimator using different
@@ -243,7 +235,7 @@ def is_fitted(self) -> bool:
         return self.searcher_ is not None
 
     @property
-    def best_estimator_(self) -> T_LearnerPipelineDF:
+    def best_estimator_(self) -> T_EstimatorDF:
         """
         The model which obtained the best ranking score, fitted on the entire sample.
         """
@@ -278,9 +270,6 @@ def fit(
         :param fit_params: parameters to pass on to the estimator's fit method
         :return: ``self``
         """
-        self: LearnerRanker[
-            T_LearnerPipelineDF, T_SearchCV
-        ]  # support type hinting in PyCharm
 
         self._reset_fit()
 

From 0a2e99fb559f372cbb237856efeea48302d88b22 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 15 Feb 2022 21:26:55 +0100
Subject: [PATCH 095/106] DOC: add comments

---
 src/facet/selection/_selection.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index 433f0bbf..e9ec1987 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -116,6 +116,8 @@ class LearnerRanker(
     #: The searcher used to fit this LearnerRanker; ``None`` if not fitted.
     searcher_: Optional[T_SearchCV]
 
+    # regular expressions and replacement patterns for selecting and renaming
+    # relevant columns from scikit-learn's cv_result_ table
     _CV_RESULT_COLUMNS = [
         (r"rank_test_(\w+)", r"\1__test__rank"),
         (r"(mean|std)_test_(\w+)", r"\2__test__\1"),
@@ -132,10 +134,8 @@ class LearnerRanker(
         r"\1\2",
     )
 
-    # noinspection PyTypeChecker
-    _CV_RESULT_PATTERNS: List[Tuple[Pattern, str]] = [
-        (re.compile(pattern), repl) for pattern, repl in _CV_RESULT_COLUMNS
-    ]
+    # Default column to sort by in the summary_report() method.
+    # This has no influence on how the best model is selected.
     _DEFAULT_REPORT_SORT_COLUMN = "rank_test_score"
 
     def __init__(

From 74146919f13e2a3911ec598dfa5121e06bce83fa Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 15 Feb 2022 21:39:06 +0100
Subject: [PATCH 096/106] DOC: update docstrings

---
 src/facet/selection/_parameters.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index 47ac79f7..19c5e05e 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -365,10 +365,11 @@ def __init__(
         candidate_name: Optional[str] = None,
     ) -> None:
         """
-        :param candidate: the candidate estimator. If ``None`` then estimators to be
-                          evaluated should be provided in the parameter grid under a
-                          "candidate" key.
-        :param candidate_name: a name for the candidate
+        :param candidate: the current estimator candidate; usually not specified on
+            class creation but set as a parameter during multi-estimator model selection
+        :param candidate_name: a name for the estimator candidate; usually not specified
+            on class creation but set as a parameter during multi-estimator model
+            selection
         """
         super().__init__()
 

From 50f0a343f980c9c2c6d8fed1af4578c0577e3cf4 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 15 Feb 2022 21:39:55 +0100
Subject: [PATCH 097/106] API: remove CandidateEstimatorDF.empty()

---
 src/facet/selection/_parameters.py | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index 19c5e05e..c9305764 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -297,7 +297,7 @@ def __init__(self, *spaces: ParameterSpace[T_Candidate_co]) -> None:
         if len(spaces) == 0:
             raise TypeError("no parameter space passed; need to pass at least one")
 
-        super().__init__(estimator=CandidateEstimatorDF.empty())
+        super().__init__(estimator=CandidateEstimatorDF())
 
         self.spaces = spaces
 
@@ -376,15 +376,6 @@ class creation but set as a parameter during multi-estimator model selection
         self.candidate = candidate
         self.candidate_name = candidate_name
 
-    @classmethod
-    def empty(cls) -> "CandidateEstimatorDF":
-        """
-        Create a new candidate estimator with no candidate set.
-
-        :return: the new candidate estimator
-        """
-        return cls()
-
     @property
     def classes_(self) -> Sequence[Any]:
         """[see superclass]"""

From b897d266f2999e612794653bcc190ebdcdc4cdcb Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 15 Feb 2022 21:40:21 +0100
Subject: [PATCH 098/106] DEBUG: add message to an assert statement

---
 src/facet/selection/_parameters.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index c9305764..635edf23 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -50,7 +50,7 @@
 ParameterDict = Dict[str, ParameterSet]
 
 rv_frozen = type(stats.uniform())
-assert rv_frozen.__name__ == "rv_frozen"
+assert rv_frozen.__name__ == "rv_frozen", "type of stats.uniform() is rv_frozen"
 
 
 #

From 380b1619d3f131443721320c2119f8cecb1bed96 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 15 Feb 2022 21:44:02 +0100
Subject: [PATCH 099/106] DOC: tweak release notes

---
 RELEASE_NOTES.rst | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst
index 41cb6b10..3499153b 100644
--- a/RELEASE_NOTES.rst
+++ b/RELEASE_NOTES.rst
@@ -10,16 +10,16 @@ FACET 2.0
 ``facet.data``
 ^^^^^^^^^^^^^^
 
-- API: class :class:`.RangePartitioner` adds optional arguments ``lower_bound`` and
-  ``upper_bound`` to method :meth:`~.RangePartitioner.fit` and no longer accepts them
-  in the class initializer
+- API: class :class:`.RangePartitioner` supports new optional arguments ``lower_bound``
+  and ``upper_bound`` in method :meth:`~.RangePartitioner.fit` and no longer accepts
+  them in the class initializer
 
 ``facet.inspection``
-^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^
 
-- API: :class:`.LearnerInspector` no longer depends on learner crossfits and instead
-  inspects models using a single pass of SHAP calculations, usually leading to
-  performance gains of up to a factor of 50
+- API: :class:`.LearnerInspector` no longer uses learner crossfits and instead inspects
+  models using a single pass of SHAP calculations, usually leading to performance gains
+  of up to a factor of 50
 - API: return :class:`.LearnerInspector` matrix outputs as :class:`.Matrix` instances
 - API: diagonals of feature synergy, redundancy, and association matrices are now
   ``nan`` instead of 1.0
@@ -28,7 +28,7 @@ FACET 2.0
   row and column order of :class:`.Matrix` objects returned by the corresponding
   ``feature_…_matrix`` methods of :class:`.LearnerInspector`, minimizing the distance
   between adjacent leaves
-  The old sorting behaviour of FACET 1.x can be restored using method
+  The old sorting behaviour of FACET 1 can be restored using method
   :meth:`.LinkageTree.sort_by_weight`
 
 ``facet.selection``
@@ -37,22 +37,22 @@ FACET 2.0
 - API: :class:`.LearnerRanker` completely rewritten to work with native *scikit-learn*
   searchers such as :class:`.GridSearchCV` or :class:`.RandomizedSearchCV`
 - API: new classes :class:`.ParameterSpace` and :class:`MultiParameterSpace` offer an
-  easier and less error-prone method of defining search spaces for hyperparameter
-  tuning.
+  a more convenient and robust mechanism for declaring options or distributions for
+  hyperparameter tuning
 
 ``facet.simulation``
 ^^^^^^^^^^^^^^^^^^^^
 
 - API: simulations no longer depend on learner crossfits and instead are carried out
-  as a single pass on the full dataset, using the standard error of mean predictions
-  to obtain confidence intervals that are less conservative and more realistic
+  as a single pass on the full dataset, using the *standard error of mean predictions*
+  to obtain confidence intervals that less conservative yet more realistic
 - VIZ: minor tweaks to simulation plots and reports generated by
   :class:`.SimulationDrawer`
 
 Other
 ^^^^^
 
-- API: class ``LearnerCrossfit`` is no longer used in FACET 2.0 and has been removed
+- API: class ``LearnerCrossfit`` is no longer used in FACET 2 and has been removed
 
 
 FACET 1.2

From 9442ae2194b6cb175ba5471065cc7f7131108b42 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Tue, 15 Feb 2022 23:19:01 +0100
Subject: [PATCH 100/106] API: rename LearnerRanker to ModelSelector

---
 RELEASE_NOTES.rst                  |  4 ++--
 src/facet/selection/_selection.py  | 16 ++++++++--------
 test/test/conftest.py              | 22 +++++++++++-----------
 test/test/facet/test_inspection.py |  6 +++---
 test/test/facet/test_selection.py  | 16 ++++++++--------
 5 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst
index 3499153b..ea617718 100644
--- a/RELEASE_NOTES.rst
+++ b/RELEASE_NOTES.rst
@@ -34,7 +34,7 @@ FACET 2.0
 ``facet.selection``
 ^^^^^^^^^^^^^^^^^^^
 
-- API: :class:`.LearnerRanker` completely rewritten to work with native *scikit-learn*
+- API: :class:`.ModelSelector` completely rewritten to work with native *scikit-learn*
   searchers such as :class:`.GridSearchCV` or :class:`.RandomizedSearchCV`
 - API: new classes :class:`.ParameterSpace` and :class:`MultiParameterSpace` offer an
   a more convenient and robust mechanism for declaring options or distributions for
@@ -124,7 +124,7 @@ by the :class:`.LearnerInspector`.
   across matrices as an indication of confidence for each calculated value.
 - API: Method :meth:`.LearnerInspector.shap_plot_data` now returns SHAP values for the
   positive class of binary classifiers.
-- API: Increase efficiency of :class:`.LearnerRanker` parallelization by adopting the
+- API: Increase efficiency of :class:`.ModelSelector` parallelization by adopting the
   new :class:`pytools.parallelization.JobRunner` API provided by :mod:`pytools`
 - BUILD: add support for :mod:`shap` 0.38 and 0.39
 
diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index e9ec1987..371bec98 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -36,7 +36,7 @@
 
 log = logging.getLogger(__name__)
 
-__all__ = ["LearnerRanker"]
+__all__ = ["ModelSelector"]
 
 #
 # Type constants
@@ -52,7 +52,7 @@
 # Type variables
 #
 
-T_Self = TypeVar("T_Self")
+T_ModelSelector = TypeVar("T_ModelSelector", bound="ModelSelector")
 T_EstimatorDF = TypeVar("T_EstimatorDF", bound=EstimatorDF)
 T_SearchCV = TypeVar("T_SearchCV", bound=BaseSearchCV)
 
@@ -75,7 +75,7 @@
 
 
 @inheritdoc(match="[see superclass]")
-class LearnerRanker(
+class ModelSelector(
     FittableMixin[Sample], ParallelizableMixin, Generic[T_EstimatorDF, T_SearchCV]
 ):
     """
@@ -113,7 +113,7 @@ class LearnerRanker(
     #: Additional parameters to be passed on to the searcher.
     searcher_params: Dict[str, Any]
 
-    #: The searcher used to fit this LearnerRanker; ``None`` if not fitted.
+    #: The searcher used to fit this ModelSelector; ``None`` if not fitted.
     searcher_: Optional[T_SearchCV]
 
     # regular expressions and replacement patterns for selecting and renaming
@@ -255,11 +255,11 @@ def best_estimator_(self) -> T_EstimatorDF:
             )
 
     def fit(
-        self: T_Self,
+        self: T_ModelSelector,
         sample: Sample,
         groups: Union[pd.Series, np.ndarray, Sequence, None] = None,
         **fit_params: Any,
-    ) -> T_Self:
+    ) -> T_ModelSelector:
         """
         Identify the model with the best-performing hyper-parameter combination using
         the given sample.
@@ -343,8 +343,8 @@ def _process(name: str) -> Optional[str]:
             if unpack_candidate:
                 # remove the "candidate" layer in the parameter output if we're dealing
                 # with a multi parameter space
-                return LearnerRanker._CV_RESULT_CANDIDATE_PATTERN.sub(
-                    LearnerRanker._CV_RESULT_CANDIDATE_REPL, name
+                return ModelSelector._CV_RESULT_CANDIDATE_PATTERN.sub(
+                    ModelSelector._CV_RESULT_CANDIDATE_REPL, name
                 )
             else:
                 return name
diff --git a/test/test/conftest.py b/test/test/conftest.py
index 18c54c9a..b6d3691d 100644
--- a/test/test/conftest.py
+++ b/test/test/conftest.py
@@ -30,7 +30,7 @@
 import facet
 from facet.data import Sample
 from facet.inspection import LearnerInspector, TreeExplainerFactory
-from facet.selection import LearnerRanker, MultiEstimatorParameterSpace, ParameterSpace
+from facet.selection import ModelSelector, MultiEstimatorParameterSpace, ParameterSpace
 from facet.validation import BootstrapCV, StratifiedBootstrapCV
 
 logging.basicConfig(level=logging.DEBUG)
@@ -164,8 +164,8 @@ def regressor_ranker(
     regressor_parameters: MultiEstimatorParameterSpace[RegressorPipelineDF],
     sample: Sample,
     n_jobs: int,
-) -> LearnerRanker[RegressorPipelineDF, GridSearchCV]:
-    return LearnerRanker(
+) -> ModelSelector[RegressorPipelineDF, GridSearchCV]:
+    return ModelSelector(
         searcher_type=GridSearchCV,
         parameter_space=regressor_parameters,
         cv=cv_kfold,
@@ -179,7 +179,7 @@ def regressor_ranker(
 
 @pytest.fixture
 def best_lgbm_model(
-    regressor_ranker: LearnerRanker[RegressorPipelineDF, GridSearchCV],
+    regressor_ranker: ModelSelector[RegressorPipelineDF, GridSearchCV],
     sample: Sample,
 ) -> RegressorPipelineDF:
     # we get the best model_evaluation which is a LGBM - for the sake of test
@@ -399,7 +399,7 @@ def iris_classifier_ranker_binary(
     iris_sample_binary: Sample,
     cv_stratified_bootstrap: StratifiedBootstrapCV,
     n_jobs: int,
-) -> LearnerRanker[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
+) -> ModelSelector[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
     return fit_classifier_ranker(
         sample=iris_sample_binary, cv=cv_stratified_bootstrap, n_jobs=n_jobs
     )
@@ -410,7 +410,7 @@ def iris_classifier_ranker_multi_class(
     iris_sample_multi_class: Sample,
     cv_stratified_bootstrap: StratifiedBootstrapCV,
     n_jobs: int,
-) -> LearnerRanker[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
+) -> ModelSelector[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
     return fit_classifier_ranker(
         sample=iris_sample_multi_class, cv=cv_stratified_bootstrap, n_jobs=n_jobs
     )
@@ -419,7 +419,7 @@ def iris_classifier_ranker_multi_class(
 @pytest.fixture
 def iris_classifier_ranker_dual_target(
     iris_sample_binary_dual_target: Sample, cv_bootstrap: BootstrapCV, n_jobs: int
-) -> LearnerRanker[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
+) -> ModelSelector[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
     return fit_classifier_ranker(
         sample=iris_sample_binary_dual_target, cv=cv_bootstrap, n_jobs=n_jobs
     )
@@ -427,14 +427,14 @@ def iris_classifier_ranker_dual_target(
 
 @pytest.fixture
 def iris_classifier_binary(
-    iris_classifier_ranker_binary: LearnerRanker[ClassifierPipelineDF, GridSearchCV],
+    iris_classifier_ranker_binary: ModelSelector[ClassifierPipelineDF, GridSearchCV],
 ) -> ClassifierPipelineDF[RandomForestClassifierDF]:
     return iris_classifier_ranker_binary.best_estimator_
 
 
 @pytest.fixture
 def iris_classifier_multi_class(
-    iris_classifier_ranker_multi_class: LearnerRanker[
+    iris_classifier_ranker_multi_class: ModelSelector[
         ClassifierPipelineDF, GridSearchCV
     ],
 ) -> ClassifierPipelineDF[RandomForestClassifierDF]:
@@ -459,7 +459,7 @@ def iris_inspector_multi_class(
 
 def fit_classifier_ranker(
     sample: Sample, cv: BaseCrossValidator, n_jobs: int
-) -> LearnerRanker[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
+) -> ModelSelector[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
     # define the parameter space
     parameter_space = ParameterSpace(
         ClassifierPipelineDF(
@@ -472,7 +472,7 @@ def fit_classifier_ranker(
 
     # pipeline inspector only supports binary classification,
     # therefore filter the sample down to only 2 target classes
-    return LearnerRanker(
+    return ModelSelector(
         searcher_type=GridSearchCV,
         parameter_space=parameter_space,
         cv=cv,
diff --git a/test/test/facet/test_inspection.py b/test/test/facet/test_inspection.py
index 190cd539..cbb55bfa 100644
--- a/test/test/facet/test_inspection.py
+++ b/test/test/facet/test_inspection.py
@@ -28,7 +28,7 @@
     LearnerInspector,
     TreeExplainerFactory,
 )
-from facet.selection import LearnerRanker
+from facet.selection import ModelSelector
 
 # noinspection PyMissingOrEmptyDocstring
 
@@ -38,7 +38,7 @@
 
 
 def test_model_inspection(
-    regressor_ranker: LearnerRanker[RegressorPipelineDF, GridSearchCV],
+    regressor_ranker: ModelSelector[RegressorPipelineDF, GridSearchCV],
     best_lgbm_model: RegressorPipelineDF,
     preprocessed_feature_names,
     regressor_inspector: LearnerInspector,
@@ -618,7 +618,7 @@ def test_model_inspection_classifier_interaction(
 
 def test_model_inspection_classifier_interaction_dual_target(
     iris_sample_binary_dual_target: Sample,
-    iris_classifier_ranker_dual_target: LearnerRanker[
+    iris_classifier_ranker_dual_target: ModelSelector[
         ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV
     ],
     iris_target_name,
diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
index 0edc81df..c54fb50d 100644
--- a/test/test/facet/test_selection.py
+++ b/test/test/facet/test_selection.py
@@ -25,7 +25,7 @@
 
 from ..conftest import check_ranking
 from facet.data import Sample
-from facet.selection import LearnerRanker, MultiEstimatorParameterSpace, ParameterSpace
+from facet.selection import ModelSelector, MultiEstimatorParameterSpace, ParameterSpace
 from facet.validation import BootstrapCV, StratifiedBootstrapCV
 
 log = logging.getLogger(__name__)
@@ -73,7 +73,7 @@ def test_model_ranker(
     # define the circular cross validator with just 5 splits (to speed up testing)
     cv = BootstrapCV(n_splits=5, random_state=42)
 
-    ranker: LearnerRanker[RegressorPipelineDF, GridSearchCV] = LearnerRanker(
+    ranker: ModelSelector[RegressorPipelineDF, GridSearchCV] = ModelSelector(
         searcher_type=GridSearchCV,
         parameter_space=regressor_parameters,
         cv=cv,
@@ -124,9 +124,9 @@ def test_model_ranker_no_preprocessing(n_jobs) -> None:
     )
     test_sample: Sample = Sample(observations=test_data, target_name="target")
 
-    model_ranker: LearnerRanker[
+    model_ranker: ModelSelector[
         ClassifierPipelineDF[SVCDF], GridSearchCV
-    ] = LearnerRanker(
+    ] = ModelSelector(
         searcher_type=GridSearchCV,
         parameter_space=parameter_space,
         cv=cv,
@@ -306,9 +306,9 @@ def test_learner_ranker_regression(
             "of arg searcher_type, but included: param_grid"
         ),
     ):
-        LearnerRanker(GridSearchCV, regressor_parameters, param_grid=None)
+        ModelSelector(GridSearchCV, regressor_parameters, param_grid=None)
 
-    ranker: LearnerRanker[RegressorPipelineDF, GridSearchCV] = LearnerRanker(
+    ranker: ModelSelector[RegressorPipelineDF, GridSearchCV] = ModelSelector(
         GridSearchCV,
         regressor_parameters,
         scoring="r2",
@@ -359,9 +359,9 @@ def test_learner_ranker_classification(
         # define an illegal grid list, mixing classification with regression
         MultiEstimatorParameterSpace(ps1, ps2)
 
-    model_ranker: LearnerRanker[
+    model_ranker: ModelSelector[
         ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV
-    ] = LearnerRanker(
+    ] = ModelSelector(
         searcher_type=GridSearchCV,
         parameter_space=ps1,
         cv=cv_stratified_bootstrap,

From c406b28c4d131c63e31323c1b8246d4fe911c75a Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 16 Feb 2022 00:02:51 +0100
Subject: [PATCH 101/106] API: move CandidateEstimatorDF to
 facet.selection.base

---
 src/facet/selection/_parameters.py      | 138 +----------------------
 src/facet/selection/_selection.py       |   3 +-
 src/facet/selection/base/_parameters.py | 139 +++++++++++++++++++++++-
 3 files changed, 139 insertions(+), 141 deletions(-)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index 635edf23..1bbcad29 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -13,7 +13,6 @@
     Iterator,
     List,
     Optional,
-    Sequence,
     Set,
     Tuple,
     Type,
@@ -21,22 +20,20 @@
     Union,
 )
 
-import pandas as pd
 from scipy import stats
 from sklearn.base import BaseEstimator
 
 from pytools.api import AllTracker, inheritdoc, subsdoc, to_list, validate_element_types
 from pytools.expression import Expression, make_expression
 from pytools.expression.atomic import Id
-from sklearndf import ClassifierDF, EstimatorDF, RegressorDF, TransformerDF
+from sklearndf import EstimatorDF
 from sklearndf.pipeline import LearnerPipelineDF, PipelineDF
 
-from .base import BaseParameterSpace
+from .base import BaseParameterSpace, CandidateEstimatorDF
 
 log = logging.getLogger(__name__)
 
 __all__ = [
-    "CandidateEstimatorDF",
     "MultiEstimatorParameterSpace",
     "ParameterSpace",
 ]
@@ -58,7 +55,6 @@
 #
 
 T_Candidate_co = TypeVar("T_Candidate_co", covariant=True, bound=EstimatorDF)
-T_CandidateEstimatorDF = TypeVar("T_CandidateEstimatorDF", bound="CandidateEstimatorDF")
 
 #
 # Ensure all symbols introduced below are included in __all__
@@ -334,136 +330,6 @@ def to_expression(self) -> "Expression":
         return Id(type(self))(*self.spaces)
 
 
-@inheritdoc(match="""[see superclass]""")
-class CandidateEstimatorDF(ClassifierDF, RegressorDF, TransformerDF):
-    """
-    A trivial wrapper for classifiers, regressors and transformers, acting
-    like a pipeline with a single step.
-
-    Used in conjunction with :class:`MultiEstimatorParameterSpace` to evaluate multiple
-    competing models: the :attr:`.candidate` parameter determines the estimator to be
-    used and is used to include multiple estimators as part of the parameter space
-    that is searched during model tuning.
-    """
-
-    #: name of the `candidate` parameter
-    PARAM_CANDIDATE = "candidate"
-
-    #: name of the `candidate_name` parameter
-    PARAM_CANDIDATE_NAME = "candidate_name"
-
-    #: The currently selected estimator candidate.
-    candidate: Optional[Union[ClassifierDF, RegressorDF, TransformerDF]]
-
-    #: The name of the candidate, used for more readable summary reports
-    #: of model tuning results.
-    candidate_name: Optional[str]
-
-    def __init__(
-        self,
-        candidate: Optional[T_Candidate_co] = None,
-        candidate_name: Optional[str] = None,
-    ) -> None:
-        """
-        :param candidate: the current estimator candidate; usually not specified on
-            class creation but set as a parameter during multi-estimator model selection
-        :param candidate_name: a name for the estimator candidate; usually not specified
-            on class creation but set as a parameter during multi-estimator model
-            selection
-        """
-        super().__init__()
-
-        self.candidate = candidate
-        self.candidate_name = candidate_name
-
-    @property
-    def classes_(self) -> Sequence[Any]:
-        """[see superclass]"""
-        return self.candidate.classes_
-
-    # noinspection PyPep8Naming
-    def predict_proba(
-        self, X: pd.DataFrame, **predict_params: Any
-    ) -> Union[pd.DataFrame, List[pd.DataFrame]]:
-        """[see superclass]"""
-        return self.candidate.predict_proba(X, **predict_params)
-
-    # noinspection PyPep8Naming
-    def predict_log_proba(
-        self, X: pd.DataFrame, **predict_params: Any
-    ) -> Union[pd.DataFrame, List[pd.DataFrame]]:
-        """[see superclass]"""
-        return self.candidate.predict_log_proba(X, **predict_params)
-
-    # noinspection PyPep8Naming
-    def decision_function(
-        self, X: pd.DataFrame, **predict_params: Any
-    ) -> Union[pd.Series, pd.DataFrame]:
-        """[see superclass]"""
-        return self.candidate.decision_function(X, **predict_params)
-
-    # noinspection PyPep8Naming
-    def score(
-        self, X: pd.DataFrame, y: pd.Series, sample_weight: Optional[pd.Series] = None
-    ) -> float:
-        """[see superclass]"""
-        return self.candidate.score(X, y, sample_weight)
-
-    # noinspection PyPep8Naming
-    def predict(
-        self, X: pd.DataFrame, **predict_params: Any
-    ) -> Union[pd.Series, pd.DataFrame]:
-        """[see superclass]"""
-        return self.candidate.predic(X, **predict_params)
-
-    # noinspection PyPep8Naming
-    def fit_predict(
-        self, X: pd.DataFrame, y: pd.Series, **fit_params: Any
-    ) -> Union[pd.Series, pd.DataFrame]:
-        """[see superclass]"""
-        return self.candidate.fit_predict(X, y, **fit_params)
-
-    # noinspection PyPep8Naming
-    def fit(
-        self: T_CandidateEstimatorDF,
-        X: pd.DataFrame,
-        y: Optional[Union[pd.Series, pd.DataFrame]] = None,
-        **fit_params: Any,
-    ) -> T_CandidateEstimatorDF:
-        """[see superclass]"""
-        self.candidate.fit(X, y, **fit_params)
-        return self
-
-    @property
-    def is_fitted(self) -> bool:
-        """[see superclass]"""
-        return self.candidate is not None and self.candidate.is_fitted
-
-    # noinspection PyPep8Naming
-    def inverse_transform(self, X: pd.DataFrame) -> pd.DataFrame:
-        """[see superclass]"""
-        return self.candidate.inverse_transform(X)
-
-    # noinspection PyPep8Naming
-    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
-        """[see superclass]"""
-        return self.candidate.transform(X)
-
-    @property
-    def _estimator_type(self) -> str:
-        # noinspection PyProtectedMember
-        return self.candidate._estimator_type
-
-    def _get_features_in(self) -> pd.Index:
-        return self.candidate.feature_names_in_
-
-    def _get_n_outputs(self) -> int:
-        return self.candidate.n_outputs_
-
-    def _get_features_original(self) -> pd.Series:
-        return self.candidate.feature_names_original_
-
-
 __tracker.validate()
 
 
diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index 371bec98..97ac9533 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -31,8 +31,7 @@
 from sklearndf.pipeline import LearnerPipelineDF
 
 from facet.data import Sample
-from facet.selection import CandidateEstimatorDF
-from facet.selection.base import BaseParameterSpace
+from facet.selection.base import BaseParameterSpace, CandidateEstimatorDF
 
 log = logging.getLogger(__name__)
 
diff --git a/src/facet/selection/base/_parameters.py b/src/facet/selection/base/_parameters.py
index 59013dae..0f9d3826 100644
--- a/src/facet/selection/base/_parameters.py
+++ b/src/facet/selection/base/_parameters.py
@@ -4,18 +4,20 @@
 
 import logging
 from abc import ABCMeta, abstractmethod
-from typing import Any, Dict, Generic, List, Optional, TypeVar, Union
+from typing import Any, Dict, Generic, List, Optional, Sequence, TypeVar, Union
 
+import pandas as pd
 from scipy import stats
 
-from pytools.api import AllTracker
+from pytools.api import AllTracker, inheritdoc
 from pytools.expression import HasExpressionRepr
-from sklearndf import EstimatorDF
+from sklearndf import ClassifierDF, EstimatorDF, RegressorDF, TransformerDF
 
 log = logging.getLogger(__name__)
 
 __all__ = [
     "BaseParameterSpace",
+    "CandidateEstimatorDF",
 ]
 
 
@@ -30,6 +32,7 @@
 # Type variables
 #
 
+T_CandidateEstimatorDF = TypeVar("T_CandidateEstimatorDF", bound="CandidateEstimatorDF")
 T_Estimator = TypeVar("T_Estimator", bound=EstimatorDF)
 
 
@@ -97,4 +100,134 @@ def get_parameters(
         pass
 
 
+@inheritdoc(match="""[see superclass]""")
+class CandidateEstimatorDF(ClassifierDF, RegressorDF, TransformerDF):
+    """
+    A trivial wrapper for classifiers, regressors and transformers, acting
+    like a pipeline with a single step.
+
+    Used in conjunction with :class:`MultiEstimatorParameterSpace` to evaluate multiple
+    competing models: the :attr:`.candidate` parameter determines the estimator to be
+    used and is used to include multiple estimators as part of the parameter space
+    that is searched during model tuning.
+    """
+
+    #: name of the `candidate` parameter
+    PARAM_CANDIDATE = "candidate"
+
+    #: name of the `candidate_name` parameter
+    PARAM_CANDIDATE_NAME = "candidate_name"
+
+    #: The currently selected estimator candidate.
+    candidate: Optional[Union[ClassifierDF, RegressorDF, TransformerDF]]
+
+    #: The name of the candidate, used for more readable summary reports
+    #: of model tuning results.
+    candidate_name: Optional[str]
+
+    def __init__(
+        self,
+        candidate: Optional[EstimatorDF] = None,
+        candidate_name: Optional[str] = None,
+    ) -> None:
+        """
+        :param candidate: the current estimator candidate; usually not specified on
+            class creation but set as a parameter during multi-estimator model selection
+        :param candidate_name: a name for the estimator candidate; usually not specified
+            on class creation but set as a parameter during multi-estimator model
+            selection
+        """
+        super().__init__()
+
+        self.candidate = candidate
+        self.candidate_name = candidate_name
+
+    @property
+    def classes_(self) -> Sequence[Any]:
+        """[see superclass]"""
+        return self.candidate.classes_
+
+    # noinspection PyPep8Naming
+    def predict_proba(
+        self, X: pd.DataFrame, **predict_params: Any
+    ) -> Union[pd.DataFrame, List[pd.DataFrame]]:
+        """[see superclass]"""
+        return self.candidate.predict_proba(X, **predict_params)
+
+    # noinspection PyPep8Naming
+    def predict_log_proba(
+        self, X: pd.DataFrame, **predict_params: Any
+    ) -> Union[pd.DataFrame, List[pd.DataFrame]]:
+        """[see superclass]"""
+        return self.candidate.predict_log_proba(X, **predict_params)
+
+    # noinspection PyPep8Naming
+    def decision_function(
+        self, X: pd.DataFrame, **predict_params: Any
+    ) -> Union[pd.Series, pd.DataFrame]:
+        """[see superclass]"""
+        return self.candidate.decision_function(X, **predict_params)
+
+    # noinspection PyPep8Naming
+    def score(
+        self, X: pd.DataFrame, y: pd.Series, sample_weight: Optional[pd.Series] = None
+    ) -> float:
+        """[see superclass]"""
+        return self.candidate.score(X, y, sample_weight)
+
+    # noinspection PyPep8Naming
+    def predict(
+        self, X: pd.DataFrame, **predict_params: Any
+    ) -> Union[pd.Series, pd.DataFrame]:
+        """[see superclass]"""
+        return self.candidate.predic(X, **predict_params)
+
+    # noinspection PyPep8Naming
+    def fit_predict(
+        self, X: pd.DataFrame, y: pd.Series, **fit_params: Any
+    ) -> Union[pd.Series, pd.DataFrame]:
+        """[see superclass]"""
+        return self.candidate.fit_predict(X, y, **fit_params)
+
+    # noinspection PyPep8Naming
+    def fit(
+        self: T_CandidateEstimatorDF,
+        X: pd.DataFrame,
+        y: Optional[Union[pd.Series, pd.DataFrame]] = None,
+        **fit_params: Any,
+    ) -> T_CandidateEstimatorDF:
+        """[see superclass]"""
+        self.candidate.fit(X, y, **fit_params)
+        return self
+
+    @property
+    def is_fitted(self) -> bool:
+        """[see superclass]"""
+        return self.candidate is not None and self.candidate.is_fitted
+
+    # noinspection PyPep8Naming
+    def inverse_transform(self, X: pd.DataFrame) -> pd.DataFrame:
+        """[see superclass]"""
+        return self.candidate.inverse_transform(X)
+
+    # noinspection PyPep8Naming
+    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
+        """[see superclass]"""
+        return self.candidate.transform(X)
+
+    @property
+    def _estimator_type(self) -> str:
+        # noinspection PyProtectedMember
+        return self.candidate._estimator_type
+
+    def _get_features_in(self) -> pd.Index:
+        return self.candidate.feature_names_in_
+
+    def _get_n_outputs(self) -> int:
+        return self.candidate.n_outputs_
+
+    def _get_features_original(self) -> pd.Series:
+        return self.candidate.feature_names_original_
+
+
 __tracker.validate()

From 8cb05f79bb469b89359ed98f3721b283747bc661 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 16 Feb 2022 00:04:24 +0100
Subject: [PATCH 102/106] DOC: tweak a docstring

---
 src/facet/selection/_selection.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index 97ac9533..458f07ab 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -78,8 +78,8 @@ class ModelSelector(
     FittableMixin[Sample], ParallelizableMixin, Generic[T_EstimatorDF, T_SearchCV]
 ):
     """
-    Select the best model obtained through fitting an estimator using different
-    choices of hyper-parameters and/or estimator types obtained from a
+    Select the best model obtained by fitting an estimator using different
+    choices of hyper-parameters and/or estimator types from a
     :class:`.ParameterSpace` or :class:`.MultiEstimatorParameterSpace`, and using
     a given scoring metric to evaluate the performance of all resulting models.
 

From 4eb299177992fc9b9f288849a5187bf012d76771 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 16 Feb 2022 10:56:28 +0100
Subject: [PATCH 103/106] DOC: update the pydata sphinx theme to ~=0.7

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 8e6ce2b9..ee9c663a 100644
--- a/environment.yml
+++ b/environment.yml
@@ -27,7 +27,7 @@ dependencies:
   - m2r ~= 0.2
   - pluggy ~= 0.13
   - pre-commit ~= 2.7
-  - pydata-sphinx-theme ~= 0.4.0
+  - pydata-sphinx-theme ~= 0.7
   - pytest ~= 5.2
   - pytest-cov ~= 2.8
   - pyyaml ~= 5.1

From 1a9679a78c58749e6199be05ca4c57ad35c9aa20 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 16 Feb 2022 11:51:05 +0100
Subject: [PATCH 104/106] DOC: tweak docstrings

---
 src/facet/selection/_parameters.py      | 21 ++++++++++++---------
 src/facet/selection/_selection.py       | 15 ++++++---------
 src/facet/selection/base/__init__.py    |  2 +-
 src/facet/selection/base/_parameters.py | 15 ++++++++-------
 4 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/src/facet/selection/_parameters.py b/src/facet/selection/_parameters.py
index 1bbcad29..8c4968fc 100644
--- a/src/facet/selection/_parameters.py
+++ b/src/facet/selection/_parameters.py
@@ -79,8 +79,8 @@ class ParameterSpace(BaseParameterSpace[T_Candidate_co], Generic[T_Candidate_co]
     :class:`~sklearn.model_selection.GridSearchCV` and
     :class:`~sklearn.model_selection.RandomizedSearchCV`.
 
-    Parameter choices or distributions to be searched can be set using attribute access,
-    and will be validated for correct names and values.
+    Parameter choices (as lists) or distributions (from :mod:`scipy.stats`) can be
+    set using attribute access, and will be validated for correct names and values.
 
     Example:
 
@@ -141,11 +141,12 @@ def get_name(self) -> str:
         """
         Get the name for this parameter space.
 
-        If no name was passed to the constructor, determine the default name as follows:
+        If no name was passed to the constructor, determine the `default name`
+        recursively as follows:
 
-            - for meta-estimators, this is the default name of the delegate estimator
-            - for pipelines, this is the default name of the final estimator
-            - for all other estimators, this is the name of the estimator's type
+        - for meta-estimators, this is the `default name` of the delegate estimator
+        - for pipelines, this is the `default name` of the final estimator
+        - for all other estimators, this is the name of the estimator's type
 
         :return: the name for this parameter space
         """
@@ -277,12 +278,14 @@ class MultiEstimatorParameterSpace(
 ):
     """
     A collection of parameter spaces, each representing a competing estimator from which
-    select the best-performing candidate with optimal hyper-parameters.
+    to select the best-performing candidate with optimal hyper-parameters.
 
-    See :class:`.ParameterSpace` for documentation on how to set up and use parameter
-    spaces.
+    See :class:`.ParameterSpace` for details on setting up and using parameter spaces.
     """
 
+    #: The parameter spaces constituting this multi-estimator parameter space.
+    spaces: Tuple[ParameterSpace[T_Candidate_co], ...]
+
     def __init__(self, *spaces: ParameterSpace[T_Candidate_co]) -> None:
         """
         :param spaces: the parameter spaces from which to select the best estimator
diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
index 458f07ab..6218008e 100644
--- a/src/facet/selection/_selection.py
+++ b/src/facet/selection/_selection.py
@@ -79,13 +79,9 @@ class ModelSelector(
 ):
     """
     Select the best model obtained by fitting an estimator using different
-    choices of hyper-parameters and/or estimator types from a
-    :class:`.ParameterSpace` or :class:`.MultiEstimatorParameterSpace`, and using
-    a given scoring metric to evaluate the performance of all resulting models.
-
-    The learner ranker can run a simultaneous search across multiple alternative
-    estimators, supporting the ability to simultaneously select a learner
-    algorithm and optimize hyper-parameters.
+    choices of hyper-parameters from a :class:`.ParameterSpace`, or even
+    simultaneously evaluating multiple competing estimators from a
+    :class:`.MultiEstimatorParameterSpace`.
     """
 
     #: A cross-validation searcher class, or any other callable
@@ -260,8 +256,9 @@ def fit(
         **fit_params: Any,
     ) -> T_ModelSelector:
         """
-        Identify the model with the best-performing hyper-parameter combination using
-        the given sample.
+        Search this model selector's parameter space to identify the model with the
+        best-performing hyper-parameter combination, using the given sample to fit and
+        score the candidate estimators.
 
         :param sample: the sample used to fit and score the estimators
         :param groups: group labels for the samples used while splitting the dataset
diff --git a/src/facet/selection/base/__init__.py b/src/facet/selection/base/__init__.py
index 40af413e..3522d9f5 100644
--- a/src/facet/selection/base/__init__.py
+++ b/src/facet/selection/base/__init__.py
@@ -1,5 +1,5 @@
 """
-Base classes for module :mod:`facet.selection`.
+Base classes and supporting classes for module :mod:`facet.selection`.
 """
 
 from ._parameters import *
diff --git a/src/facet/selection/base/_parameters.py b/src/facet/selection/base/_parameters.py
index 0f9d3826..b4f5914f 100644
--- a/src/facet/selection/base/_parameters.py
+++ b/src/facet/selection/base/_parameters.py
@@ -74,7 +74,8 @@ def estimator(self) -> T_Estimator:
     @property
     def parameters(self) -> Union[List[ParameterDict], ParameterDict]:
         """
-        The parameter choices and distributions that constitute this parameter space.
+        The parameter choices (as lists) or distributions (from :mod:`scipy.stats`)
+        that constitute this parameter space.
 
         This is a shortcut for calling method :meth:`.get_parameters` with no
         arguments.
@@ -87,15 +88,15 @@ def get_parameters(
     ) -> Union[List[ParameterDict], ParameterDict]:
         """
         Generate a dictionary of parameter choices and distributions,
-        or a list of such dictionaries, compatible with `scikit-learn`'s
-        :class:`~sklearn.model_selection.GridSearchCV` and
-        :class:`~sklearn.model_selection.RandomizedSearchCV`.
+        or a list of such dictionaries, compatible with `scikit-learn`'s CV search API
+        (e.g., :class:`~sklearn.model_selection.GridSearchCV` or
+        :class:`~sklearn.model_selection.RandomizedSearchCV`).
 
         :param prefix: an optional prefix to prepend to all parameter names in the
-            resulting dictionary, separated by two underscore characters (`__`) as
-            per scikit-learn's convention for hierarchical parameter names
+            resulting dictionary, separated by two underscore characters (``__``) as
+            per `scikit-learn`'s convention for hierarchical parameter names
         :return: a dictionary mapping parameter names to parameter
-            distributions
+            choices (as lists) or distributions (from :mod:`scipy.stats`)
         """
         pass
 

From 514efcb6830e70cf2edf52b4185fad2d9939fb29 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 16 Feb 2022 11:55:56 +0100
Subject: [PATCH 105/106] TEST: rename _ranker to _selector

---
 test/test/conftest.py              | 29 +++++++++++++++--------------
 test/test/facet/test_inspection.py | 12 ++++++------
 test/test/facet/test_selection.py  | 20 ++++++++++----------
 3 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/test/test/conftest.py b/test/test/conftest.py
index b6d3691d..57f38391 100644
--- a/test/test/conftest.py
+++ b/test/test/conftest.py
@@ -159,7 +159,7 @@ def regressor_parameters(
 
 
 @pytest.fixture
-def regressor_ranker(
+def regressor_selector(
     cv_kfold: KFold,
     regressor_parameters: MultiEstimatorParameterSpace[RegressorPipelineDF],
     sample: Sample,
@@ -179,13 +179,14 @@ def regressor_ranker(
 
 @pytest.fixture
 def best_lgbm_model(
-    regressor_ranker: ModelSelector[RegressorPipelineDF, GridSearchCV],
+    regressor_selector,
     sample: Sample,
 ) -> RegressorPipelineDF:
     # we get the best model_evaluation which is a LGBM - for the sake of test
     # performance
+    # noinspection PyTypeChecker
     best_lgbm_params: Dict[str, Any] = (
-        pd.DataFrame(regressor_ranker.searcher_.cv_results_)
+        pd.DataFrame(regressor_selector.searcher_.cv_results_)
         .pipe(
             lambda df: df.loc[df.loc[:, "param_candidate_name"] == "LGBMRegressorDF", :]
         )
@@ -395,50 +396,50 @@ def check_ranking(
 
 
 @pytest.fixture
-def iris_classifier_ranker_binary(
+def iris_classifier_selector_binary(
     iris_sample_binary: Sample,
     cv_stratified_bootstrap: StratifiedBootstrapCV,
     n_jobs: int,
 ) -> ModelSelector[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
-    return fit_classifier_ranker(
+    return fit_classifier_selector(
         sample=iris_sample_binary, cv=cv_stratified_bootstrap, n_jobs=n_jobs
     )
 
 
 @pytest.fixture
-def iris_classifier_ranker_multi_class(
+def iris_classifier_selector_multi_class(
     iris_sample_multi_class: Sample,
     cv_stratified_bootstrap: StratifiedBootstrapCV,
     n_jobs: int,
 ) -> ModelSelector[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
-    return fit_classifier_ranker(
+    return fit_classifier_selector(
         sample=iris_sample_multi_class, cv=cv_stratified_bootstrap, n_jobs=n_jobs
     )
 
 
 @pytest.fixture
-def iris_classifier_ranker_dual_target(
+def iris_classifier_selector_dual_target(
     iris_sample_binary_dual_target: Sample, cv_bootstrap: BootstrapCV, n_jobs: int
 ) -> ModelSelector[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
-    return fit_classifier_ranker(
+    return fit_classifier_selector(
         sample=iris_sample_binary_dual_target, cv=cv_bootstrap, n_jobs=n_jobs
     )
 
 
 @pytest.fixture
 def iris_classifier_binary(
-    iris_classifier_ranker_binary: ModelSelector[ClassifierPipelineDF, GridSearchCV],
+    iris_classifier_selector_binary: ModelSelector[ClassifierPipelineDF, GridSearchCV],
 ) -> ClassifierPipelineDF[RandomForestClassifierDF]:
-    return iris_classifier_ranker_binary.best_estimator_
+    return iris_classifier_selector_binary.best_estimator_
 
 
 @pytest.fixture
 def iris_classifier_multi_class(
-    iris_classifier_ranker_multi_class: ModelSelector[
+    iris_classifier_selector_multi_class: ModelSelector[
         ClassifierPipelineDF, GridSearchCV
     ],
 ) -> ClassifierPipelineDF[RandomForestClassifierDF]:
-    return iris_classifier_ranker_multi_class.best_estimator_
+    return iris_classifier_selector_multi_class.best_estimator_
 
 
 @pytest.fixture
@@ -457,7 +458,7 @@ def iris_inspector_multi_class(
 #
 
 
-def fit_classifier_ranker(
+def fit_classifier_selector(
     sample: Sample, cv: BaseCrossValidator, n_jobs: int
 ) -> ModelSelector[ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV]:
     # define the parameter space
diff --git a/test/test/facet/test_inspection.py b/test/test/facet/test_inspection.py
index cbb55bfa..76f0f795 100644
--- a/test/test/facet/test_inspection.py
+++ b/test/test/facet/test_inspection.py
@@ -38,7 +38,7 @@
 
 
 def test_model_inspection(
-    regressor_ranker: ModelSelector[RegressorPipelineDF, GridSearchCV],
+    regressor_selector,
     best_lgbm_model: RegressorPipelineDF,
     preprocessed_feature_names,
     regressor_inspector: LearnerInspector,
@@ -48,7 +48,7 @@ def test_model_inspection(
     n_jobs: int,
 ) -> None:
 
-    ranking = regressor_ranker.summary_report()
+    ranking = regressor_selector.summary_report()
 
     # define checksums for this test
     log.debug(f"\n{ranking}")
@@ -100,11 +100,11 @@ def test_model_inspection(
     DendrogramDrawer(style="text").draw(data=linkage_tree, title="Test")
 
 
-def test_binary_classifier_ranking(iris_classifier_ranker_binary) -> None:
+def test_binary_classifier_ranking(iris_classifier_selector_binary) -> None:
 
     expected_learner_scores = [0.938, 0.936, 0.936, 0.929]
 
-    ranking = iris_classifier_ranker_binary.summary_report()
+    ranking = iris_classifier_selector_binary.summary_report()
 
     log.debug(f"\n{ranking}")
 
@@ -618,13 +618,13 @@ def test_model_inspection_classifier_interaction(
 
 def test_model_inspection_classifier_interaction_dual_target(
     iris_sample_binary_dual_target: Sample,
-    iris_classifier_ranker_dual_target: ModelSelector[
+    iris_classifier_selector_dual_target: ModelSelector[
         ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV
     ],
     iris_target_name,
     n_jobs: int,
 ) -> None:
-    iris_classifier_dual_target = iris_classifier_ranker_dual_target.best_estimator_
+    iris_classifier_dual_target = iris_classifier_selector_dual_target.best_estimator_
 
     with pytest.raises(
         ValueError,
diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
index c54fb50d..a62a8b7c 100644
--- a/test/test/facet/test_selection.py
+++ b/test/test/facet/test_selection.py
@@ -31,7 +31,7 @@
 log = logging.getLogger(__name__)
 
 
-def test_model_ranker(
+def test_model_selector(
     regressor_parameters: MultiEstimatorParameterSpace[RegressorPipelineDF],
     sample: Sample,
     n_jobs: int,
@@ -103,7 +103,7 @@ def test_model_ranker(
     )
 
 
-def test_model_ranker_no_preprocessing(n_jobs) -> None:
+def test_model_selector_no_preprocessing(n_jobs) -> None:
     expected_learner_scores = [0.961, 0.957, 0.957, 0.936]
 
     # define a yield-engine circular CV:
@@ -124,7 +124,7 @@ def test_model_ranker_no_preprocessing(n_jobs) -> None:
     )
     test_sample: Sample = Sample(observations=test_data, target_name="target")
 
-    model_ranker: ModelSelector[
+    model_selector: ModelSelector[
         ClassifierPipelineDF[SVCDF], GridSearchCV
     ] = ModelSelector(
         searcher_type=GridSearchCV,
@@ -135,7 +135,7 @@ def test_model_ranker_no_preprocessing(n_jobs) -> None:
         sample=test_sample
     )
 
-    summary_report = model_ranker.summary_report()
+    summary_report = model_selector.summary_report()
     log.debug(f"\n{summary_report}")
 
     check_ranking(
@@ -291,7 +291,7 @@ def regressor_repr(model: Id):
     ]
 
 
-def test_learner_ranker_regression(
+def test_model_selector_regression(
     regressor_parameters: MultiEstimatorParameterSpace[RegressorPipelineDF],
     sample: Sample,
     n_jobs: int,
@@ -331,7 +331,7 @@ def test_learner_ranker_regression(
     )
 
 
-def test_learner_ranker_classification(
+def test_model_selector_classification(
     iris_sample_multi_class, cv_stratified_bootstrap: StratifiedBootstrapCV, n_jobs: int
 ) -> None:
     expected_learner_scores = [0.965, 0.964, 0.957, 0.956]
@@ -359,7 +359,7 @@ def test_learner_ranker_classification(
         # define an illegal grid list, mixing classification with regression
         MultiEstimatorParameterSpace(ps1, ps2)
 
-    model_ranker: ModelSelector[
+    model_selector: ModelSelector[
         ClassifierPipelineDF[RandomForestClassifierDF], GridSearchCV
     ] = ModelSelector(
         searcher_type=GridSearchCV,
@@ -373,13 +373,13 @@ def test_learner_ranker_classification(
         ValueError,
         match="arg sample_weight is not supported, use arg sample.weight instead",
     ):
-        model_ranker.fit(
+        model_selector.fit(
             sample=iris_sample_multi_class, sample_weight=iris_sample_multi_class.weight
         )
 
-    model_ranker.fit(sample=iris_sample_multi_class)
+    model_selector.fit(sample=iris_sample_multi_class)
 
-    ranking = model_ranker.summary_report()
+    ranking = model_selector.summary_report()
 
     log.debug(f"\n{ranking}")
 

From 4c81321c819a07b1f15680d09bee8be2e4755df0 Mon Sep 17 00:00:00 2001
From: Jan Ittner <github.com@ittner.eu>
Date: Wed, 16 Feb 2022 11:59:51 +0100
Subject: [PATCH 106/106] DOC: tweak release notes

---
 RELEASE_NOTES.rst | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst
index ea617718..5376ac47 100644
--- a/RELEASE_NOTES.rst
+++ b/RELEASE_NOTES.rst
@@ -34,8 +34,10 @@ FACET 2.0
 ``facet.selection``
 ^^^^^^^^^^^^^^^^^^^
 
-- API: :class:`.ModelSelector` completely rewritten to work with native *scikit-learn*
-  searchers such as :class:`.GridSearchCV` or :class:`.RandomizedSearchCV`
+- API: :class:`.ModelSelector` replaces FACET 1 class ``LearnerRanker``, and now
+  supports any CV searcher that supports `scikit-learn`'s CV search API, including
+  `scikit-learn`'s native searchers such as :class:`.GridSearchCV` or
+  :class:`.RandomizedSearchCV`
 - API: new classes :class:`.ParameterSpace` and :class:`MultiParameterSpace` offer an
   a more convenient and robust mechanism for declaring options or distributions for
   hyperparameter tuning