Skip to content

Commit

Permalink
Make it easier to access cross-validation results for individual mode…
Browse files Browse the repository at this point in the history
…l calculations (#67)

* Eliminate class CrossfitScores; replace all uses with np.ndarray
* validate that only a single scoring function is passed to LearnerRanker
* rename LearnerScores to LearnerEvaluation
* don't copy the ranking list when returning it in property LearnerRanker.ranking
  • Loading branch information
j-ittner authored Sep 22, 2020
1 parent 2547bae commit 37ada27
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 93 deletions.
56 changes: 7 additions & 49 deletions src/facet/crossfit/_crossfit.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

log = logging.getLogger(__name__)

__all__ = ["CrossfitScores", "LearnerCrossfit", "Scorer"]
__all__ = ["LearnerCrossfit", "Scorer"]

#
# Type variables
Expand Down Expand Up @@ -70,46 +70,6 @@
#


class CrossfitScores:
""""
Distribution of scores across all cross-validation fits `(crossfits)` of a
learner pipeline.
Generated by method :meth:`.LearnerCrossfit.score`.
Scores for individual fits can be accessed by iteration, or by indexing
(``[…]`` notation).
Supports :func:`.len`, returning the number of fits in this crossfit.
:param scores: list or 1d array of scores for all crossfits of a pipeline
"""

def __init__(self, scores: Union[Sequence[float], np.ndarray]):
if isinstance(scores, list):
scores = np.array(scores)

if (
not isinstance(scores, np.ndarray)
or scores.dtype != float
or scores.ndim != 1
):
raise TypeError("arg scores must be a list or 1d numpy array of floats")

self._scores = np.array(scores)

def __getitem__(self, item: Union[int, slice]) -> Union[float, np.ndarray]:
return self._scores[item]

def mean(self) -> float:
""":return: the mean score"""
return self._scores.mean()

def std(self) -> float:
""":return: the standard deviation of the scores"""
return self._scores.std()


class _FitScoreParameters(NamedTuple):
pipeline: T_LearnerPipelineDF

Expand Down Expand Up @@ -219,7 +179,7 @@ def score(
self,
scoring: Union[str, Callable[[float, float], float], None] = None,
train_scores: bool = False,
) -> CrossfitScores:
) -> np.ndarray:
"""
Score all models in this crossfit using the given scoring function.
Expand All @@ -231,7 +191,7 @@ def score(
function as keyword argument ``sample_weight``
:param train_scores: if ``True``, calculate train scores instead of test \
scores (default: ``False``)
:return: the resulting scores
:return: the resulting scores as a 1d numpy array
"""

return self._fit_score(_scoring=scoring, _train_scores=train_scores)
Expand All @@ -242,7 +202,7 @@ def fit_score(
scoring: Union[str, Callable[[float, float], float], None] = None,
train_scores: bool = False,
**fit_params,
) -> CrossfitScores:
) -> np.ndarray:
"""
Fit then score this crossfit.
Expand Down Expand Up @@ -276,7 +236,7 @@ def _fit_score(
_train_scores: bool = False,
sample_weight: pd.Series = None,
**fit_params,
) -> Optional[CrossfitScores]:
) -> Optional[np.ndarray]:

if sample_weight is not None:
raise ValueError(
Expand Down Expand Up @@ -384,16 +344,14 @@ def _generate_parameters() -> Iterator[_FitScoreParameters]:
for parameters in _generate_parameters()
)

model_by_split, scores = (
list(items) for items in zip(*model_and_score_by_split)
)
model_by_split, scores = zip(*model_and_score_by_split)

if do_fit:
self._splits = splits
self._model_by_split = model_by_split
self._sample = _sample

return CrossfitScores(scores=scores) if do_score else None
return np.array(scores) if do_score else None

def resize(self: T, n_fits: int) -> T:
"""
Expand Down
57 changes: 29 additions & 28 deletions src/facet/selection/_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,20 @@
from types import MappingProxyType
from typing import *

import numpy as np
from numpy.random.mtrand import RandomState
from sklearn.model_selection import BaseCrossValidator

from facet import Sample
from facet.crossfit import CrossfitScores, LearnerCrossfit
from facet.crossfit import LearnerCrossfit
from pytools.api import AllTracker, inheritdoc, to_tuple
from pytools.fit import FittableMixin
from pytools.parallelization import ParallelizableMixin
from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF

log = logging.getLogger(__name__)

__all__ = ["LearnerGrid", "LearnerScores", "LearnerRanker"]
__all__ = ["LearnerGrid", "LearnerEvaluation", "LearnerRanker"]

#
# Type variables
Expand Down Expand Up @@ -163,7 +164,7 @@ def __len__(self) -> int:
)


class LearnerScores(Generic[T_LearnerPipelineDF]):
class LearnerEvaluation(Generic[T_LearnerPipelineDF]):
"""
A collection of scores for a specific parametrisation of a learner pipeline,
generated by a :class:`.LearnerRanker`.
Expand All @@ -175,15 +176,15 @@ def __init__(
self,
pipeline: T_LearnerPipelineDF,
parameters: Mapping[str, Any],
scores: CrossfitScores,
scores: np.ndarray,
ranking_score: float,
) -> None:
"""
:param pipeline: the unfitted learner pipeline
:param parameters: the hyper-parameters for which the learner pipeline was \
scored, as a mapping of parameter names to parameter values
:param scores: the scores of all crossfits of the learner pipeline
:param ranking_score: overall score determined by the ranking \
:param ranking_score: the aggregate score determined by the ranking \
metric of the :class:`.LearnerRanker`, used for ranking the learners
"""
super().__init__()
Expand Down Expand Up @@ -222,15 +223,8 @@ def __init__(
LearnerGrid[T_LearnerPipelineDF], Iterable[LearnerGrid[T_LearnerPipelineDF]]
],
cv: Optional[BaseCrossValidator],
scoring: Union[
str,
Callable[[float, float], float],
List[str],
Tuple[str],
Dict[str, Callable[[float, float], float]],
None,
] = None,
ranking_scorer: Callable[[CrossfitScores], float] = None,
scoring: Union[str, Callable[[float, float], float], None] = None,
ranking_scorer: Callable[[np.ndarray], float] = None,
shuffle_features: Optional[bool] = None,
random_state: Union[int, RandomState, None] = None,
n_jobs: Optional[int] = None,
Expand All @@ -243,8 +237,8 @@ def __init__(
(either a single grid, or an iterable of multiple grids)
:param cv: a cross validator (e.g., \
:class:`.BootstrapCV`)
:param scoring: a scorer to use when doing CV within GridSearch, defaults to \
``None``
:param scoring: a scoring function (by name or a callable) for evaluating \
learners (optional; use learner's default scorer if not specified here)
:param ranking_scorer: a function to calculate a scalar score for every \
crossfit, taking a :class:`.CrossfitScores` and returning a float. \
The resulting score is used to rank all crossfits (highest score is best). \
Expand All @@ -262,6 +256,12 @@ def __init__(
verbose=verbose,
)

if scoring is not None and not (isinstance(scoring, str) or callable(scoring)):
raise TypeError(
"only a single scoring function is currently supported, "
f"but a {type(scoring).__name__} was given as arg scoring"
)

self.grids: Tuple[LearnerGrid, ...] = to_tuple(
grids, element_type=LearnerGrid, arg_name="grids"
)
Expand All @@ -276,14 +276,14 @@ def __init__(
self.random_state = random_state

# initialise state
self._ranking: Optional[List[LearnerScores]] = None
self._ranking: Optional[List[LearnerEvaluation]] = None
self._best_model: Optional[T_LearnerPipelineDF] = None

# add parameter documentation of ParallelizableMixin
__init__.__doc__ += ParallelizableMixin.__init__.__doc__

@staticmethod
def default_ranking_scorer(scores: CrossfitScores) -> float:
def default_ranking_scorer(scores: np.ndarray) -> float:
"""
The default function used to rank pipelines.
Expand All @@ -308,7 +308,7 @@ def fit(self: T, sample: Sample, **fit_params) -> T:
"""
self: LearnerRanker[T_LearnerPipelineDF] # support type hinting in PyCharm

ranking: List[LearnerScores[T_LearnerPipelineDF]] = self._rank_learners(
ranking: List[LearnerEvaluation[T_LearnerPipelineDF]] = self._rank_learners(
sample=sample, **fit_params
)
ranking.sort(key=lambda le: le.ranking_score, reverse=True)
Expand All @@ -325,13 +325,14 @@ def is_fitted(self) -> bool:
"""[see superclass]"""
return self._ranking is not None

def ranking(self) -> List[LearnerScores[T_LearnerPipelineDF]]:
@property
def ranking(self) -> List[LearnerEvaluation[T_LearnerPipelineDF]]:
"""
:return a ranking of all learners that were evaluated by this ranker,
in descending order of the ranking score.
A list of :class:`.LearnerEvaluation` for all learners evaluated by this ranker, \
in descending order of the ranking score.
"""
self._ensure_fitted()
return self._ranking.copy()
return self._ranking

@property
def best_model(self) -> T_LearnerPipelineDF:
Expand Down Expand Up @@ -362,7 +363,7 @@ def summary_report(self, max_learners: Optional[int] = None) -> str:

self._ensure_fitted()

def _model_name(evaluation: LearnerScores) -> str:
def _model_name(evaluation: LearnerEvaluation) -> str:
return type(evaluation.pipeline.final_estimator).__name__

def _parameters(params: Mapping[str, Iterable[Any]]) -> str:
Expand Down Expand Up @@ -392,7 +393,7 @@ def _parameters(params: Mapping[str, Iterable[Any]]) -> str:

def _rank_learners(
self, sample: Sample, **fit_params
) -> List[LearnerScores[T_LearnerPipelineDF]]:
) -> List[LearnerEvaluation[T_LearnerPipelineDF]]:
ranking_scorer = self.ranking_scorer

configurations: Iterable[Tuple[T_LearnerPipelineDF, Dict[str, Any]]] = (
Expand All @@ -406,7 +407,7 @@ def _rank_learners(
for parameters in grid
)

ranking: List[LearnerScores[T_LearnerPipelineDF]] = []
ranking: List[LearnerEvaluation[T_LearnerPipelineDF]] = []
best_score: float = -math.inf
best_crossfit: Optional[LearnerCrossfit[T_LearnerPipelineDF]] = None

Expand All @@ -422,14 +423,14 @@ def _rank_learners(
verbose=self.verbose,
)

pipeline_scoring: CrossfitScores = crossfit.fit_score(
pipeline_scoring: np.ndarray = crossfit.fit_score(
sample=sample, scoring=self.scoring, **fit_params
)

ranking_score = ranking_scorer(pipeline_scoring)

ranking.append(
LearnerScores(
LearnerEvaluation(
pipeline=pipeline,
parameters=parameters,
scores=pipeline_scoring,
Expand Down
6 changes: 3 additions & 3 deletions test/test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from facet import Sample
from facet.crossfit import LearnerCrossfit
from facet.inspection import LearnerInspector, TreeExplainerFactory
from facet.selection import LearnerGrid, LearnerRanker, LearnerScores
from facet.selection import LearnerEvaluation, LearnerGrid, LearnerRanker
from facet.validation import BootstrapCV, StratifiedBootstrapCV
from sklearndf import TransformerDF
from sklearndf.pipeline import RegressorPipelineDF
Expand Down Expand Up @@ -152,9 +152,9 @@ def best_lgbm_crossfit(
) -> LearnerCrossfit[RegressorPipelineDF]:
# we get the best model_evaluation which is a LGBM - for the sake of test
# performance
best_lgbm_evaluation: LearnerScores[RegressorPipelineDF] = [
best_lgbm_evaluation: LearnerEvaluation[RegressorPipelineDF] = [
evaluation
for evaluation in regressor_ranker.ranking()
for evaluation in regressor_ranker.ranking
if isinstance(evaluation.pipeline.regressor, LGBMRegressorDF)
][0]

Expand Down
4 changes: 2 additions & 2 deletions test/test/facet/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest

from facet.selection import LearnerScores
from facet.selection import LearnerEvaluation
from sklearndf import TransformerDF
from sklearndf.transformation import (
ColumnTransformerDF,
Expand Down Expand Up @@ -38,7 +38,7 @@ def make_simple_transformer(


def check_ranking(
ranking: List[LearnerScores],
ranking: List[LearnerEvaluation],
expected_scores: Sequence[float],
expected_learners: Optional[Sequence[type]],
expected_parameters: Optional[Mapping[int, Mapping[str, Any]]],
Expand Down
2 changes: 1 addition & 1 deletion test/test/facet/test_crossfit.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_prediction_classifier(
log.debug(f"\n{model_ranker.summary_report(max_learners=10)}")

check_ranking(
ranking=model_ranker.ranking(),
ranking=model_ranker.ranking,
expected_scores=expected_learner_scores,
expected_learners=[RandomForestClassifierDF] * 4,
expected_parameters={
Expand Down
6 changes: 3 additions & 3 deletions test/test/facet/test_inspection.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def test_model_inspection(
log.debug(f"\n{regressor_ranker.summary_report(max_learners=10)}")

check_ranking(
ranking=regressor_ranker.ranking(),
ranking=regressor_ranker.ranking,
expected_scores=expected_scores,
expected_learners=None,
expected_parameters=None,
Expand Down Expand Up @@ -169,7 +169,7 @@ def test_binary_classifier_ranking(iris_classifier_ranker_binary) -> None:

log.debug(f"\n{iris_classifier_ranker_binary.summary_report(max_learners=10)}")
check_ranking(
ranking=iris_classifier_ranker_binary.ranking(),
ranking=iris_classifier_ranker_binary.ranking,
expected_scores=expected_learner_scores,
expected_learners=[RandomForestClassifierDF] * 4,
expected_parameters={
Expand Down Expand Up @@ -227,7 +227,7 @@ def test_model_inspection_classifier_binary(
)


def test_model_inspection_classifier_binary_single_shap_output():
def test_model_inspection_classifier_binary_single_shap_output() -> None:
# simulate some data
x, y = make_classification(
n_samples=200, n_features=5, n_informative=5, n_redundant=0, random_state=42
Expand Down
Loading

0 comments on commit 37ada27

Please sign in to comment.