BCG-X-Official · j-ittner · Oct 12, 2020 · Oct 9, 2020 · Oct 10, 2020 · Oct 10, 2020
diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
@@ -19,19 +19,26 @@
     Optional,
     Sequence,
     Tuple,
+    Type,
     TypeVar,
     Union,
     cast,
 )
 
 import numpy as np
+import pandas as pd
 from numpy.random.mtrand import RandomState
+from scipy.stats import sem
 from sklearn.model_selection import BaseCrossValidator
 
 from pytools.api import AllTracker, inheritdoc, to_tuple
 from pytools.fit import FittableMixin
 from pytools.parallelization import ParallelizableMixin
-from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
+from sklearndf.pipeline import (
+    ClassifierPipelineDF,
+    LearnerPipelineDF,
+    RegressorPipelineDF,
+)
 
 from facet import Sample
 from facet.crossfit import LearnerCrossfit
@@ -63,7 +70,7 @@
 #
 
 
-class LearnerGrid(Sequence[Dict[str, Any]], Generic[T_LearnerPipelineDF]):
+class LearnerGrid(Generic[T_LearnerPipelineDF]):
     """
     A grid of hyper-parameters for tuning a learner pipeline.
 
@@ -185,12 +192,13 @@ class LearnerEvaluation(Generic[T_LearnerPipelineDF]):
     generated by a :class:`.LearnerRanker`.
     """
 
-    __slots__ = ["pipeline", "parameters", "scores", "ranking_score"]
+    __slots__ = ["pipeline", "parameters", "scoring_name", "scores", "ranking_score"]
 
     def __init__(
         self,
         pipeline: T_LearnerPipelineDF,
         parameters: Mapping[str, Any],
+        scoring_name: str,
         scores: np.ndarray,
         ranking_score: float,
     ) -> None:
@@ -210,6 +218,9 @@ def __init__(
         #: the hyper-parameters for which the learner pipeline was scored
         self.parameters = parameters
 
+        #: the name of the scoring function used to calculate the scores
+        self.scoring_name = scoring_name
+
         #: the scores of all crossfits of the learner pipeline
         self.scores = scores
 
@@ -251,13 +262,15 @@ def __init__(
             (either a single grid, or an iterable of multiple grids)
         :param cv: a cross validator (e.g., \
             :class:`.BootstrapCV`)
-        :param scoring: a scoring function (by name or a callable) for evaluating \
-            learners (optional; use learner's default scorer if not specified here)
+        :param scoring: a scoring function (by name, or as a callable) for evaluating \
+            learners (optional; use learner's default scorer if not specified here). \
+            If passing a callable, the ``"score"`` will be used as the name of the
+            scoring function unless the callable defines a ``__name__`` attribute.
         :param ranking_scorer: a function to calculate a scalar score for every \
             crossfit, taking a :class:`.CrossfitScores` and returning a float. \
             The resulting score is used to rank all crossfits (highest score is best). \
             Defaults to :meth:`.default_ranking_scorer`, calculating \
-            `mean(scores) - 2 * std(scores)`.
+            `mean(scores) - 2 * std(scores, ddof=1)`.
         :param shuffle_features: if ``True``, shuffle column order of features for \
             every crossfit (default: ``False``)
         :param random_state: optional random seed or random state for shuffling the \
@@ -276,9 +289,16 @@ def __init__(
                 f"but a {type(scoring).__name__} was given as arg scoring"
             )
 
-        self.grids: Tuple[LearnerGrid, ...] = to_tuple(
+        grids_tuple: Tuple[LearnerGrid, ...] = to_tuple(
             grids, element_type=LearnerGrid, arg_name="grids"
         )
+        if len(grids_tuple) == 0:
+            raise ValueError("arg grids must specify at least one LearnerGrid")
+        learner_type = _learner_type(grids_tuple[0].pipeline)
+        if not all(isinstance(grid.pipeline, learner_type) for grid in grids_tuple[1:]):
+            raise ValueError("arg grids mixes regressor and classifier pipelines")
+
+        self.grids = grids_tuple
         self.cv = cv
         self.scoring = scoring
         self.ranking_scorer = (
@@ -301,13 +321,13 @@ def default_ranking_scorer(scores: np.ndarray) -> float:
         """
         The default function used to rank pipelines.
 
-        Calculates `mean(scores) - 2 * std(scores)`, i.e., ranks pipelines by a
+        Calculates `mean(scores) - 2 * std(scores, ddof=1)`, i.e., ranks pipelines by a
         (pessimistic) lower bound of the expected score.
 
         :param scores: the scores for all crossfits
         :return: scalar score for ranking the pipeline
         """
-        return scores.mean() - 2 * scores.std()
+        return scores.mean() - 2 * scores.std(ddof=1)
 
     def fit(self: T, sample: Sample, **fit_params) -> T:
         """
@@ -339,12 +359,35 @@ def is_fitted(self) -> bool:
         """[see superclass]"""
         return self._ranking is not None
 
+    @property
+    def scoring_name(self) -> str:
+        """
+        The name of the scoring function used to rank the learners.
+        """
+        scoring = self.scoring
+        if isinstance(scoring, str):
+            return scoring
+        elif callable(scoring):
+            try:
+                return scoring.__name__
+            except AttributeError:
+                return "score"
+        else:
+            learner_type = _learner_type(self.grids[0].pipeline)
+            if learner_type is RegressorPipelineDF:
+                return "r2_score"
+            elif learner_type is ClassifierPipelineDF:
+                return "accuracy_score"
+            else:
+                # default case - we should not end up here but adding this for forward
+                # compatibility
+                return "score"
+
     @property
     def ranking(self) -> List[LearnerEvaluation[T_LearnerPipelineDF]]:
         """
         A list of :class:`.LearnerEvaluation` for all learners evaluated
-        by this ranker, \
-            in descending order of the ranking score.
+        by this ranker, in descending order of the ranking score.
         """
         self._ensure_fitted()
         return self._ranking
@@ -365,46 +408,42 @@ def best_model_crossfit(self) -> LearnerCrossfit[T_LearnerPipelineDF]:
         self._ensure_fitted()
         return self._best_crossfit
 
-    def summary_report(self, max_learners: Optional[int] = None) -> str:
+    def summary_report(self) -> pd.DataFrame:
         """
-        A human-readable report of the learner evaluations, sorted by ranking score in
-        descending order.
+        Create a summary table of the scores achieved by all learners in the grid
+        search, sorted by ranking score in descending order.
 
-        :param max_learners: maximum number of learners to include in the report \
-            (optional)
-
-        :return: a multi-line string with a summary of the pipeline ranking
+        :return: the summary report of the grid search as a data frame
         """
 
         self._ensure_fitted()
 
-        def _model_name(evaluation: LearnerEvaluation) -> str:
-            return type(evaluation.pipeline.final_estimator).__name__
-
-        def _parameters(params: Mapping[str, Iterable[Any]]) -> str:
-            return ",".join(
-                [
-                    f"{param_name}={param_value}"
-                    for param_name, param_value in params.items()
-                ]
-            )
-
-        ranking = self._ranking[:max_learners] if max_learners else self._ranking
-
-        name_width = max(len(_model_name(ranked_model)) for ranked_model in ranking)
+        scoring_name = self.scoring_name
+        scores_mean_name = f"{scoring_name}_mean"
+        scores_std_name = f"{scoring_name}_std"
+        scores_sem_name = f"{scoring_name}_sem"
 
-        return "\n".join(
+        return pd.DataFrame.from_records(
             [
-                f"Rank {rank + 1:2d}: "
-                f"{_model_name(evaluation):>{name_width}s}, "
-                f"ranking_score={evaluation.ranking_score:9.3g}, "
-                f"scores_mean={evaluation.scores.mean():9.3g}, "
-                f"scores_std={evaluation.scores.std():9.3g}, "
-                f"parameters={{{_parameters(evaluation.parameters)}}}"
-                "\n"
-                for rank, evaluation in enumerate(ranking)
-            ]
-        )
+                dict(
+                    type=type(evaluation.pipeline.final_estimator).__name__,
+                    ranking_score=evaluation.ranking_score,
+                    **{
+                        scores_mean_name: evaluation.scores.mean(),
+                        scores_std_name: evaluation.scores.std(ddof=1),
+                        scores_sem_name: sem(evaluation.scores, ddof=1),
+                        **evaluation.parameters,
+                    },
+                )
+                for evaluation in (
+                    sorted(
+                        self._ranking,
+                        key=lambda evaluation: evaluation.ranking_score,
+                        reverse=True,
+                    )
+                )
+            ],
+        ).rename_axis(index="rank")
 
     def _rank_learners(
         self, sample: Sample, **fit_params
@@ -426,6 +465,8 @@ def _rank_learners(
         best_score: float = -math.inf
         best_crossfit: Optional[LearnerCrossfit[T_LearnerPipelineDF]] = None
 
+        scoring_name = self.scoring_name
+
         for pipeline, parameters in configurations:
             crossfit = LearnerCrossfit(
                 pipeline=pipeline,
@@ -448,6 +489,7 @@ def _rank_learners(
                 LearnerEvaluation(
                     pipeline=pipeline,
                     parameters=parameters,
+                    scoring_name=scoring_name,
                     scores=pipeline_scoring,
                     ranking_score=ranking_score,
                 )
@@ -461,4 +503,20 @@ def _rank_learners(
         return ranking
 
 
+def _learner_type(
+    pipeline: T_LearnerPipelineDF,
+) -> Type[Union[RegressorPipelineDF, ClassifierPipelineDF]]:
+    # determine whether a learner pipeline fits a regressor or a classifier
+    for learner_type in [RegressorPipelineDF, ClassifierPipelineDF]:
+        if isinstance(pipeline, learner_type):
+            return learner_type
+    if isinstance(pipeline, LearnerPipelineDF):
+        raise TypeError(f"unknown learner pipeline type: {type(learner_type).__name__}")
+    else:
+        raise TypeError(
+            "attribute grid.pipeline is not a learner pipeline: "
+            f"{type(learner_type).__name__}"
+        )
+
+
 __tracker.validate()
diff --git a/test/test/conftest.py b/test/test/conftest.py
@@ -40,6 +40,9 @@
 # disable SHAP debugging messages
 logging.getLogger("shap").setLevel(logging.WARNING)
 
+# configure pandas text output
+pd.set_option("display.width", None)  # get display width from terminal
+pd.set_option("precision", 3)  # 3 digits precision for easier readability
 
 K_FOLDS = 5
 N_BOOTSTRAPS = 30

diff --git a/test/test/facet/test_crossfit.py b/test/test/facet/test_crossfit.py
@@ -3,7 +3,8 @@
 import pytest
 
 from sklearndf.classification import RandomForestClassifierDF
-from sklearndf.pipeline import ClassifierPipelineDF
+from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
+from sklearndf.regression import RandomForestRegressorDF
 
 from . import check_ranking
 from facet import Sample
@@ -22,11 +23,30 @@ def test_prediction_classifier(
     # define parameters and crossfit
     grids = LearnerGrid(
         pipeline=ClassifierPipelineDF(
-            classifier=RandomForestClassifierDF(random_state=42), preprocessing=None
+            classifier=RandomForestClassifierDF(random_state=42)
         ),
         learner_parameters={"min_samples_leaf": [16, 32], "n_estimators": [50, 80]},
     )
 
+    # define an illegal grid list, mixing classification with regression
+    grids_illegal = [
+        grids,
+        LearnerGrid(
+            pipeline=RegressorPipelineDF(
+                regressor=RandomForestRegressorDF(random_state=42)
+            ),
+            learner_parameters={"min_samples_leaf": [16, 32], "n_estimators": [50, 80]},
+        ),
+    ]
+
+    with pytest.raises(
+        ValueError, match="^arg grids mixes regressor and classifier pipelines$"
+    ):
+        LearnerRanker(
+            grids=grids_illegal,
+            cv=cv_stratified_bootstrap,
+        )
+
     model_ranker: LearnerRanker[
         ClassifierPipelineDF[RandomForestClassifierDF]
     ] = LearnerRanker(
@@ -44,7 +64,7 @@ def test_prediction_classifier(
     ):
         model_ranker.fit(sample=iris_sample, sample_weight=iris_sample.weight)
 
-    log.debug(f"\n{model_ranker.summary_report(max_learners=10)}")
+    log.debug(f"\n{model_ranker.summary_report()}")
 
     check_ranking(
         ranking=model_ranker.ranking,

diff --git a/test/test/facet/test_inspection.py b/test/test/facet/test_inspection.py
@@ -100,7 +100,7 @@ def test_model_inspection(
         -0.074,
     ]
 
-    log.debug(f"\n{regressor_ranker.summary_report(max_learners=10)}")
+    log.debug(f"\n{regressor_ranker.summary_report()}")
 
     check_ranking(
         ranking=regressor_ranker.ranking,
@@ -168,7 +168,7 @@ def test_binary_classifier_ranking(iris_classifier_ranker_binary) -> None:
 
     expected_learner_scores = [0.872, 0.868, 0.866, 0.859]
 
-    log.debug(f"\n{iris_classifier_ranker_binary.summary_report(max_learners=10)}")
+    log.debug(f"\n{iris_classifier_ranker_binary.summary_report()}")
     check_ranking(
         ranking=iris_classifier_ranker_binary.ranking,
         expected_scores=expected_learner_scores,

diff --git a/test/test/facet/test_selection.py b/test/test/facet/test_selection.py
@@ -107,7 +107,7 @@ def test_model_ranker(
         grids=regressor_grids, cv=cv, scoring="r2", n_jobs=n_jobs
     ).fit(sample=sample)
 
-    log.debug(f"\n{ranker.summary_report(max_learners=10)}")
+    log.debug(f"\n{ranker.summary_report()}")
 
     assert isinstance(ranker.best_model_crossfit, LearnerCrossfit)
 
@@ -168,7 +168,7 @@ def test_model_ranker_no_preprocessing(n_jobs) -> None:
         grids=models, cv=cv, n_jobs=n_jobs
     ).fit(sample=test_sample)
 
-    log.debug(f"\n{model_ranker.summary_report(max_learners=10)}")
+    log.debug(f"\n{model_ranker.summary_report()}")
 
     check_ranking(
         ranking=model_ranker.ranking,