Make it easier to access cross-validation results for individual mode…

…l calculations (#67) * Eliminate class CrossfitScores; replace all uses with np.ndarray * validate that only a single scoring function is passed to LearnerRanker * rename LearnerScores to LearnerEvaluation * don't copy the ranking list when returning it in property LearnerRanker.ranking
BCG-X-Official · Sep 22, 2020 · 37ada27 · 37ada27
1 parent 2547bae
commit 37ada27
Show file tree

Hide file tree

Showing 7 changed files with 52 additions and 93 deletions.
diff --git a/src/facet/crossfit/_crossfit.py b/src/facet/crossfit/_crossfit.py
@@ -27,7 +27,7 @@
 
 log = logging.getLogger(__name__)
 
-__all__ = ["CrossfitScores", "LearnerCrossfit", "Scorer"]
+__all__ = ["LearnerCrossfit", "Scorer"]
 
 #
 # Type variables
@@ -70,46 +70,6 @@
 #
 
 
-class CrossfitScores:
-    """"
-    Distribution of scores across all cross-validation fits `(crossfits)` of a
-    learner pipeline.
-
-    Generated by method :meth:`.LearnerCrossfit.score`.
-
-    Scores for individual fits can be accessed by iteration, or by indexing
-    (``[…]`` notation).
-
-    Supports :func:`.len`, returning the number of fits in this crossfit.
-
-    :param scores: list or 1d array of scores for all crossfits of a pipeline
-    """
-
-    def __init__(self, scores: Union[Sequence[float], np.ndarray]):
-        if isinstance(scores, list):
-            scores = np.array(scores)
-
-        if (
-            not isinstance(scores, np.ndarray)
-            or scores.dtype != float
-            or scores.ndim != 1
-        ):
-            raise TypeError("arg scores must be a list or 1d numpy array of floats")
-
-        self._scores = np.array(scores)
-
-    def __getitem__(self, item: Union[int, slice]) -> Union[float, np.ndarray]:
-        return self._scores[item]
-
-    def mean(self) -> float:
-        """:return: the mean score"""
-        return self._scores.mean()
-
-    def std(self) -> float:
-        """:return: the standard deviation of the scores"""
-        return self._scores.std()
-
-
 class _FitScoreParameters(NamedTuple):
     pipeline: T_LearnerPipelineDF
 
@@ -219,7 +179,7 @@ def score(
         self,
         scoring: Union[str, Callable[[float, float], float], None] = None,
         train_scores: bool = False,
-    ) -> CrossfitScores:
+    ) -> np.ndarray:
         """
         Score all models in this crossfit using the given scoring function.
 
@@ -231,7 +191,7 @@ def score(
             function as keyword argument ``sample_weight``
         :param train_scores: if ``True``, calculate train scores instead of test \
             scores (default: ``False``)
-        :return: the resulting scores
+        :return: the resulting scores as a 1d numpy array
         """
 
         return self._fit_score(_scoring=scoring, _train_scores=train_scores)
@@ -242,7 +202,7 @@ def fit_score(
         scoring: Union[str, Callable[[float, float], float], None] = None,
         train_scores: bool = False,
         **fit_params,
-    ) -> CrossfitScores:
+    ) -> np.ndarray:
         """
         Fit then score this crossfit.
 
@@ -276,7 +236,7 @@ def _fit_score(
         _train_scores: bool = False,
         sample_weight: pd.Series = None,
         **fit_params,
-    ) -> Optional[CrossfitScores]:
+    ) -> Optional[np.ndarray]:
 
         if sample_weight is not None:
             raise ValueError(
@@ -384,16 +344,14 @@ def _generate_parameters() -> Iterator[_FitScoreParameters]:
                 for parameters in _generate_parameters()
             )
 
-        model_by_split, scores = (
-            list(items) for items in zip(*model_and_score_by_split)
-        )
+        model_by_split, scores = zip(*model_and_score_by_split)
 
         if do_fit:
             self._splits = splits
             self._model_by_split = model_by_split
             self._sample = _sample
 
-        return CrossfitScores(scores=scores) if do_score else None
+        return np.array(scores) if do_score else None
 
     def resize(self: T, n_fits: int) -> T:
         """

diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py
@@ -10,19 +10,20 @@
 from types import MappingProxyType
 from typing import *
 
+import numpy as np
 from numpy.random.mtrand import RandomState
 from sklearn.model_selection import BaseCrossValidator
 
 from facet import Sample
-from facet.crossfit import CrossfitScores, LearnerCrossfit
+from facet.crossfit import LearnerCrossfit
 from pytools.api import AllTracker, inheritdoc, to_tuple
 from pytools.fit import FittableMixin
 from pytools.parallelization import ParallelizableMixin
 from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
 
 log = logging.getLogger(__name__)
 
-__all__ = ["LearnerGrid", "LearnerScores", "LearnerRanker"]
+__all__ = ["LearnerGrid", "LearnerEvaluation", "LearnerRanker"]
 
 #
 # Type variables
@@ -163,7 +164,7 @@ def __len__(self) -> int:
         )
 
 
-class LearnerScores(Generic[T_LearnerPipelineDF]):
+class LearnerEvaluation(Generic[T_LearnerPipelineDF]):
     """
     A collection of scores for a specific parametrisation of a learner pipeline,
     generated by a :class:`.LearnerRanker`.
@@ -175,15 +176,15 @@ def __init__(
         self,
         pipeline: T_LearnerPipelineDF,
         parameters: Mapping[str, Any],
-        scores: CrossfitScores,
+        scores: np.ndarray,
         ranking_score: float,
     ) -> None:
         """
         :param pipeline: the unfitted learner pipeline
         :param parameters: the hyper-parameters for which the learner pipeline was \
             scored, as a mapping of parameter names to parameter values
         :param scores: the scores of all crossfits of the learner pipeline
-        :param ranking_score: overall score determined by the ranking \
+        :param ranking_score: the aggregate score determined by the ranking \
             metric of the :class:`.LearnerRanker`, used for ranking the learners
         """
         super().__init__()
@@ -222,15 +223,8 @@ def __init__(
             LearnerGrid[T_LearnerPipelineDF], Iterable[LearnerGrid[T_LearnerPipelineDF]]
         ],
         cv: Optional[BaseCrossValidator],
-        scoring: Union[
-            str,
-            Callable[[float, float], float],
-            List[str],
-            Tuple[str],
-            Dict[str, Callable[[float, float], float]],
-            None,
-        ] = None,
-        ranking_scorer: Callable[[CrossfitScores], float] = None,
+        scoring: Union[str, Callable[[float, float], float], None] = None,
+        ranking_scorer: Callable[[np.ndarray], float] = None,
         shuffle_features: Optional[bool] = None,
         random_state: Union[int, RandomState, None] = None,
         n_jobs: Optional[int] = None,
@@ -243,8 +237,8 @@ def __init__(
             (either a single grid, or an iterable of multiple grids)
         :param cv: a cross validator (e.g., \
             :class:`.BootstrapCV`)
-        :param scoring: a scorer to use when doing CV within GridSearch, defaults to \
-            ``None``
+        :param scoring: a scoring function (by name or a callable) for evaluating \
+            learners (optional; use learner's default scorer if not specified here)
         :param ranking_scorer: a function to calculate a scalar score for every \
             crossfit, taking a :class:`.CrossfitScores` and returning a float. \
             The resulting score is used to rank all crossfits (highest score is best). \
@@ -262,6 +256,12 @@ def __init__(
             verbose=verbose,
         )
 
+        if scoring is not None and not (isinstance(scoring, str) or callable(scoring)):
+            raise TypeError(
+                "only a single scoring function is currently supported, "
+                f"but a {type(scoring).__name__} was given as arg scoring"
+            )
+
         self.grids: Tuple[LearnerGrid, ...] = to_tuple(
             grids, element_type=LearnerGrid, arg_name="grids"
         )
@@ -276,14 +276,14 @@ def __init__(
         self.random_state = random_state
 
         # initialise state
-        self._ranking: Optional[List[LearnerScores]] = None
+        self._ranking: Optional[List[LearnerEvaluation]] = None
         self._best_model: Optional[T_LearnerPipelineDF] = None
 
     # add parameter documentation of ParallelizableMixin
     __init__.__doc__ += ParallelizableMixin.__init__.__doc__
 
     @staticmethod
-    def default_ranking_scorer(scores: CrossfitScores) -> float:
+    def default_ranking_scorer(scores: np.ndarray) -> float:
         """
         The default function used to rank pipelines.
 
@@ -308,7 +308,7 @@ def fit(self: T, sample: Sample, **fit_params) -> T:
         """
         self: LearnerRanker[T_LearnerPipelineDF]  # support type hinting in PyCharm
 
-        ranking: List[LearnerScores[T_LearnerPipelineDF]] = self._rank_learners(
+        ranking: List[LearnerEvaluation[T_LearnerPipelineDF]] = self._rank_learners(
             sample=sample, **fit_params
         )
         ranking.sort(key=lambda le: le.ranking_score, reverse=True)
@@ -325,13 +325,14 @@ def is_fitted(self) -> bool:
         """[see superclass]"""
         return self._ranking is not None
 
-    def ranking(self) -> List[LearnerScores[T_LearnerPipelineDF]]:
+    @property
+    def ranking(self) -> List[LearnerEvaluation[T_LearnerPipelineDF]]:
         """
-        :return a ranking of all learners that were evaluated by this ranker,
-        in descending order of the ranking score.
+        A list of :class:`.LearnerEvaluation` for all learners evaluated by this ranker, \
+            in descending order of the ranking score.
         """
         self._ensure_fitted()
-        return self._ranking.copy()
+        return self._ranking
 
     @property
     def best_model(self) -> T_LearnerPipelineDF:
@@ -362,7 +363,7 @@ def summary_report(self, max_learners: Optional[int] = None) -> str:
 
         self._ensure_fitted()
 
-        def _model_name(evaluation: LearnerScores) -> str:
+        def _model_name(evaluation: LearnerEvaluation) -> str:
             return type(evaluation.pipeline.final_estimator).__name__
 
         def _parameters(params: Mapping[str, Iterable[Any]]) -> str:
@@ -392,7 +393,7 @@ def _parameters(params: Mapping[str, Iterable[Any]]) -> str:
 
     def _rank_learners(
         self, sample: Sample, **fit_params
-    ) -> List[LearnerScores[T_LearnerPipelineDF]]:
+    ) -> List[LearnerEvaluation[T_LearnerPipelineDF]]:
         ranking_scorer = self.ranking_scorer
 
         configurations: Iterable[Tuple[T_LearnerPipelineDF, Dict[str, Any]]] = (
@@ -406,7 +407,7 @@ def _rank_learners(
             for parameters in grid
         )
 
-        ranking: List[LearnerScores[T_LearnerPipelineDF]] = []
+        ranking: List[LearnerEvaluation[T_LearnerPipelineDF]] = []
         best_score: float = -math.inf
         best_crossfit: Optional[LearnerCrossfit[T_LearnerPipelineDF]] = None
 
@@ -422,14 +423,14 @@ def _rank_learners(
                 verbose=self.verbose,
             )
 
-            pipeline_scoring: CrossfitScores = crossfit.fit_score(
+            pipeline_scoring: np.ndarray = crossfit.fit_score(
                 sample=sample, scoring=self.scoring, **fit_params
             )
 
             ranking_score = ranking_scorer(pipeline_scoring)
 
             ranking.append(
-                LearnerScores(
+                LearnerEvaluation(
                     pipeline=pipeline,
                     parameters=parameters,
                     scores=pipeline_scoring,

diff --git a/test/test/conftest.py b/test/test/conftest.py
@@ -13,7 +13,7 @@
 from facet import Sample
 from facet.crossfit import LearnerCrossfit
 from facet.inspection import LearnerInspector, TreeExplainerFactory
-from facet.selection import LearnerGrid, LearnerRanker, LearnerScores
+from facet.selection import LearnerEvaluation, LearnerGrid, LearnerRanker
 from facet.validation import BootstrapCV, StratifiedBootstrapCV
 from sklearndf import TransformerDF
 from sklearndf.pipeline import RegressorPipelineDF
@@ -152,9 +152,9 @@ def best_lgbm_crossfit(
 ) -> LearnerCrossfit[RegressorPipelineDF]:
     # we get the best model_evaluation which is a LGBM - for the sake of test
     # performance
-    best_lgbm_evaluation: LearnerScores[RegressorPipelineDF] = [
+    best_lgbm_evaluation: LearnerEvaluation[RegressorPipelineDF] = [
         evaluation
-        for evaluation in regressor_ranker.ranking()
+        for evaluation in regressor_ranker.ranking
         if isinstance(evaluation.pipeline.regressor, LGBMRegressorDF)
     ][0]
 

diff --git a/test/test/facet/__init__.py b/test/test/facet/__init__.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from facet.selection import LearnerScores
+from facet.selection import LearnerEvaluation
 from sklearndf import TransformerDF
 from sklearndf.transformation import (
     ColumnTransformerDF,
@@ -38,7 +38,7 @@ def make_simple_transformer(
 
 
 def check_ranking(
-    ranking: List[LearnerScores],
+    ranking: List[LearnerEvaluation],
     expected_scores: Sequence[float],
     expected_learners: Optional[Sequence[type]],
     expected_parameters: Optional[Mapping[int, Mapping[str, Any]]],

diff --git a/test/test/facet/test_crossfit.py b/test/test/facet/test_crossfit.py
@@ -46,7 +46,7 @@ def test_prediction_classifier(
     log.debug(f"\n{model_ranker.summary_report(max_learners=10)}")
 
     check_ranking(
-        ranking=model_ranker.ranking(),
+        ranking=model_ranker.ranking,
         expected_scores=expected_learner_scores,
         expected_learners=[RandomForestClassifierDF] * 4,
         expected_parameters={

diff --git a/test/test/facet/test_inspection.py b/test/test/facet/test_inspection.py
@@ -102,7 +102,7 @@ def test_model_inspection(
     log.debug(f"\n{regressor_ranker.summary_report(max_learners=10)}")
 
     check_ranking(
-        ranking=regressor_ranker.ranking(),
+        ranking=regressor_ranker.ranking,
         expected_scores=expected_scores,
         expected_learners=None,
         expected_parameters=None,
@@ -169,7 +169,7 @@ def test_binary_classifier_ranking(iris_classifier_ranker_binary) -> None:
 
     log.debug(f"\n{iris_classifier_ranker_binary.summary_report(max_learners=10)}")
     check_ranking(
-        ranking=iris_classifier_ranker_binary.ranking(),
+        ranking=iris_classifier_ranker_binary.ranking,
         expected_scores=expected_learner_scores,
         expected_learners=[RandomForestClassifierDF] * 4,
         expected_parameters={
@@ -227,7 +227,7 @@ def test_model_inspection_classifier_binary(
     )
 
 
-def test_model_inspection_classifier_binary_single_shap_output():
+def test_model_inspection_classifier_binary_single_shap_output() -> None:
     # simulate some data
     x, y = make_classification(
         n_samples=200, n_features=5, n_informative=5, n_redundant=0, random_state=42