Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

generate LearnerRanker summary reports as data frames, not text #95

Merged
merged 9 commits into from
Oct 12, 2020
144 changes: 101 additions & 43 deletions src/facet/selection/_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,26 @@
Optional,
Sequence,
Tuple,
Type,
TypeVar,
Union,
cast,
)

import numpy as np
import pandas as pd
from numpy.random.mtrand import RandomState
from scipy.stats import sem
from sklearn.model_selection import BaseCrossValidator

from pytools.api import AllTracker, inheritdoc, to_tuple
from pytools.fit import FittableMixin
from pytools.parallelization import ParallelizableMixin
from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
from sklearndf.pipeline import (
ClassifierPipelineDF,
LearnerPipelineDF,
RegressorPipelineDF,
)

from facet import Sample
from facet.crossfit import LearnerCrossfit
Expand Down Expand Up @@ -63,7 +70,7 @@
#


class LearnerGrid(Sequence[Dict[str, Any]], Generic[T_LearnerPipelineDF]):
class LearnerGrid(Generic[T_LearnerPipelineDF]):
"""
A grid of hyper-parameters for tuning a learner pipeline.

Expand Down Expand Up @@ -185,12 +192,13 @@ class LearnerEvaluation(Generic[T_LearnerPipelineDF]):
generated by a :class:`.LearnerRanker`.
"""

__slots__ = ["pipeline", "parameters", "scores", "ranking_score"]
__slots__ = ["pipeline", "parameters", "scoring_name", "scores", "ranking_score"]

def __init__(
self,
pipeline: T_LearnerPipelineDF,
parameters: Mapping[str, Any],
scoring_name: str,
scores: np.ndarray,
ranking_score: float,
) -> None:
Expand All @@ -210,6 +218,9 @@ def __init__(
#: the hyper-parameters for which the learner pipeline was scored
self.parameters = parameters

#: the name of the scoring function used to calculate the scores
self.scoring_name = scoring_name

#: the scores of all crossfits of the learner pipeline
self.scores = scores

Expand Down Expand Up @@ -251,13 +262,15 @@ def __init__(
(either a single grid, or an iterable of multiple grids)
:param cv: a cross validator (e.g., \
:class:`.BootstrapCV`)
:param scoring: a scoring function (by name or a callable) for evaluating \
learners (optional; use learner's default scorer if not specified here)
:param scoring: a scoring function (by name, or as a callable) for evaluating \
learners (optional; use learner's default scorer if not specified here). \
If passing a callable, the ``"score"`` will be used as the name of the
scoring function unless the callable defines a ``__name__`` attribute.
:param ranking_scorer: a function to calculate a scalar score for every \
crossfit, taking a :class:`.CrossfitScores` and returning a float. \
The resulting score is used to rank all crossfits (highest score is best). \
Defaults to :meth:`.default_ranking_scorer`, calculating \
`mean(scores) - 2 * std(scores)`.
`mean(scores) - 2 * std(scores, ddof=1)`.
:param shuffle_features: if ``True``, shuffle column order of features for \
every crossfit (default: ``False``)
:param random_state: optional random seed or random state for shuffling the \
Expand All @@ -276,9 +289,16 @@ def __init__(
f"but a {type(scoring).__name__} was given as arg scoring"
)

self.grids: Tuple[LearnerGrid, ...] = to_tuple(
grids_tuple: Tuple[LearnerGrid, ...] = to_tuple(
grids, element_type=LearnerGrid, arg_name="grids"
)
if len(grids_tuple) == 0:
raise ValueError("arg grids must specify at least one LearnerGrid")
learner_type = _learner_type(grids_tuple[0].pipeline)
if not all(isinstance(grid.pipeline, learner_type) for grid in grids_tuple[1:]):
raise ValueError("arg grids mixes regressor and classifier pipelines")

self.grids = grids_tuple
self.cv = cv
self.scoring = scoring
self.ranking_scorer = (
Expand All @@ -301,13 +321,13 @@ def default_ranking_scorer(scores: np.ndarray) -> float:
"""
The default function used to rank pipelines.

Calculates `mean(scores) - 2 * std(scores)`, i.e., ranks pipelines by a
Calculates `mean(scores) - 2 * std(scores, ddof=1)`, i.e., ranks pipelines by a
(pessimistic) lower bound of the expected score.

:param scores: the scores for all crossfits
:return: scalar score for ranking the pipeline
"""
return scores.mean() - 2 * scores.std()
return scores.mean() - 2 * scores.std(ddof=1)

def fit(self: T, sample: Sample, **fit_params) -> T:
"""
Expand Down Expand Up @@ -339,12 +359,35 @@ def is_fitted(self) -> bool:
"""[see superclass]"""
return self._ranking is not None

@property
def scoring_name(self) -> str:
"""
The name of the scoring function used to rank the learners.
"""
scoring = self.scoring
if isinstance(scoring, str):
return scoring
elif callable(scoring):
try:
return scoring.__name__
except AttributeError:
return "score"
else:
learner_type = _learner_type(self.grids[0].pipeline)
if learner_type is RegressorPipelineDF:
return "r2_score"
elif learner_type is ClassifierPipelineDF:
return "accuracy_score"
else:
# default case - we should not end up here but adding this for forward
# compatibility
return "score"

@property
def ranking(self) -> List[LearnerEvaluation[T_LearnerPipelineDF]]:
"""
A list of :class:`.LearnerEvaluation` for all learners evaluated
by this ranker, \
in descending order of the ranking score.
by this ranker, in descending order of the ranking score.
"""
self._ensure_fitted()
return self._ranking
Expand All @@ -365,46 +408,42 @@ def best_model_crossfit(self) -> LearnerCrossfit[T_LearnerPipelineDF]:
self._ensure_fitted()
return self._best_crossfit

def summary_report(self, max_learners: Optional[int] = None) -> str:
def summary_report(self) -> pd.DataFrame:
"""
A human-readable report of the learner evaluations, sorted by ranking score in
descending order.
Create a summary table of the scores achieved by all learners in the grid
search, sorted by ranking score in descending order.

:param max_learners: maximum number of learners to include in the report \
(optional)

:return: a multi-line string with a summary of the pipeline ranking
:return: the summary report of the grid search as a data frame
"""

self._ensure_fitted()

def _model_name(evaluation: LearnerEvaluation) -> str:
return type(evaluation.pipeline.final_estimator).__name__

def _parameters(params: Mapping[str, Iterable[Any]]) -> str:
return ",".join(
[
f"{param_name}={param_value}"
for param_name, param_value in params.items()
]
)

ranking = self._ranking[:max_learners] if max_learners else self._ranking

name_width = max(len(_model_name(ranked_model)) for ranked_model in ranking)
scoring_name = self.scoring_name
scores_mean_name = f"{scoring_name}_mean"
scores_std_name = f"{scoring_name}_std"
scores_sem_name = f"{scoring_name}_sem"

return "\n".join(
return pd.DataFrame.from_records(
[
f"Rank {rank + 1:2d}: "
f"{_model_name(evaluation):>{name_width}s}, "
f"ranking_score={evaluation.ranking_score:9.3g}, "
f"scores_mean={evaluation.scores.mean():9.3g}, "
f"scores_std={evaluation.scores.std():9.3g}, "
f"parameters={{{_parameters(evaluation.parameters)}}}"
"\n"
for rank, evaluation in enumerate(ranking)
]
)
dict(
type=type(evaluation.pipeline.final_estimator).__name__,
ranking_score=evaluation.ranking_score,
**{
scores_mean_name: evaluation.scores.mean(),
scores_std_name: evaluation.scores.std(ddof=1),
scores_sem_name: sem(evaluation.scores, ddof=1),
**evaluation.parameters,
},
)
for evaluation in (
sorted(
self._ranking,
key=lambda evaluation: evaluation.ranking_score,
reverse=True,
)
)
],
).rename_axis(index="rank")

def _rank_learners(
self, sample: Sample, **fit_params
Expand All @@ -426,6 +465,8 @@ def _rank_learners(
best_score: float = -math.inf
best_crossfit: Optional[LearnerCrossfit[T_LearnerPipelineDF]] = None

scoring_name = self.scoring_name

for pipeline, parameters in configurations:
crossfit = LearnerCrossfit(
pipeline=pipeline,
Expand All @@ -448,6 +489,7 @@ def _rank_learners(
LearnerEvaluation(
pipeline=pipeline,
parameters=parameters,
scoring_name=scoring_name,
scores=pipeline_scoring,
ranking_score=ranking_score,
)
Expand All @@ -461,4 +503,20 @@ def _rank_learners(
return ranking


def _learner_type(
pipeline: T_LearnerPipelineDF,
) -> Type[Union[RegressorPipelineDF, ClassifierPipelineDF]]:
# determine whether a learner pipeline fits a regressor or a classifier
for learner_type in [RegressorPipelineDF, ClassifierPipelineDF]:
if isinstance(pipeline, learner_type):
return learner_type
if isinstance(pipeline, LearnerPipelineDF):
raise TypeError(f"unknown learner pipeline type: {type(learner_type).__name__}")
else:
raise TypeError(
"attribute grid.pipeline is not a learner pipeline: "
f"{type(learner_type).__name__}"
)


__tracker.validate()
3 changes: 3 additions & 0 deletions test/test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@
# disable SHAP debugging messages
logging.getLogger("shap").setLevel(logging.WARNING)

# configure pandas text output
pd.set_option("display.width", None) # get display width from terminal
pd.set_option("precision", 3) # 3 digits precision for easier readability

K_FOLDS = 5
N_BOOTSTRAPS = 30
Expand Down
26 changes: 23 additions & 3 deletions test/test/facet/test_crossfit.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import pytest

from sklearndf.classification import RandomForestClassifierDF
from sklearndf.pipeline import ClassifierPipelineDF
from sklearndf.pipeline import ClassifierPipelineDF, RegressorPipelineDF
from sklearndf.regression import RandomForestRegressorDF

from . import check_ranking
from facet import Sample
Expand All @@ -22,11 +23,30 @@ def test_prediction_classifier(
# define parameters and crossfit
grids = LearnerGrid(
pipeline=ClassifierPipelineDF(
classifier=RandomForestClassifierDF(random_state=42), preprocessing=None
classifier=RandomForestClassifierDF(random_state=42)
),
learner_parameters={"min_samples_leaf": [16, 32], "n_estimators": [50, 80]},
)

# define an illegal grid list, mixing classification with regression
grids_illegal = [
grids,
LearnerGrid(
pipeline=RegressorPipelineDF(
regressor=RandomForestRegressorDF(random_state=42)
),
learner_parameters={"min_samples_leaf": [16, 32], "n_estimators": [50, 80]},
),
]

with pytest.raises(
ValueError, match="^arg grids mixes regressor and classifier pipelines$"
):
LearnerRanker(
grids=grids_illegal,
cv=cv_stratified_bootstrap,
)

model_ranker: LearnerRanker[
ClassifierPipelineDF[RandomForestClassifierDF]
] = LearnerRanker(
Expand All @@ -44,7 +64,7 @@ def test_prediction_classifier(
):
model_ranker.fit(sample=iris_sample, sample_weight=iris_sample.weight)

log.debug(f"\n{model_ranker.summary_report(max_learners=10)}")
log.debug(f"\n{model_ranker.summary_report()}")

check_ranking(
ranking=model_ranker.ranking,
Expand Down
4 changes: 2 additions & 2 deletions test/test/facet/test_inspection.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def test_model_inspection(
-0.074,
]

log.debug(f"\n{regressor_ranker.summary_report(max_learners=10)}")
log.debug(f"\n{regressor_ranker.summary_report()}")

check_ranking(
ranking=regressor_ranker.ranking,
Expand Down Expand Up @@ -168,7 +168,7 @@ def test_binary_classifier_ranking(iris_classifier_ranker_binary) -> None:

expected_learner_scores = [0.872, 0.868, 0.866, 0.859]

log.debug(f"\n{iris_classifier_ranker_binary.summary_report(max_learners=10)}")
log.debug(f"\n{iris_classifier_ranker_binary.summary_report()}")
check_ranking(
ranking=iris_classifier_ranker_binary.ranking,
expected_scores=expected_learner_scores,
Expand Down
4 changes: 2 additions & 2 deletions test/test/facet/test_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def test_model_ranker(
grids=regressor_grids, cv=cv, scoring="r2", n_jobs=n_jobs
).fit(sample=sample)

log.debug(f"\n{ranker.summary_report(max_learners=10)}")
log.debug(f"\n{ranker.summary_report()}")

assert isinstance(ranker.best_model_crossfit, LearnerCrossfit)

Expand Down Expand Up @@ -168,7 +168,7 @@ def test_model_ranker_no_preprocessing(n_jobs) -> None:
grids=models, cv=cv, n_jobs=n_jobs
).fit(sample=test_sample)

log.debug(f"\n{model_ranker.summary_report(max_learners=10)}")
log.debug(f"\n{model_ranker.summary_report()}")

check_ranking(
ranking=model_ranker.ranking,
Expand Down