diff --git a/test/test/conftest.py b/test/test/conftest.py index 57f38391..a811663c 100644 --- a/test/test/conftest.py +++ b/test/test/conftest.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd import pytest -from numpy.testing import assert_array_almost_equal, assert_array_equal +from numpy.testing import assert_allclose, assert_array_equal from sklearn import datasets from sklearn.model_selection import BaseCrossValidator, GridSearchCV, KFold from sklearn.utils import Bunch @@ -71,7 +71,7 @@ def n_jobs() -> int: @pytest.fixture def cv_kfold() -> KFold: # define a CV - return KFold(n_splits=K_FOLDS) + return KFold(n_splits=K_FOLDS, shuffle=True, random_state=42) @pytest.fixture @@ -357,14 +357,14 @@ def check_ranking( only required for multi estimator search """ - col_score = COL_SCORE # + ("-",) * (ranking.columns.nlevels - len(COL_SCORE)) - scores_actual: pd.Series = ranking.loc[:, col_score].values[: len(scores_expected)] - assert_array_almost_equal( + scores_actual: pd.Series = ranking.loc[:, COL_SCORE].values[: len(scores_expected)] + + assert_allclose( scores_actual, scores_expected, - decimal=3, + rtol=0.01, err_msg=( - f"unexpected scores: " f"got {scores_actual} but expected {scores_expected}" + f"unexpected scores: got {scores_actual} but expected {scores_expected}" ), ) diff --git a/test/test/facet/test_inspection.py b/test/test/facet/test_inspection.py index 76f0f795..69fe8fd5 100644 --- a/test/test/facet/test_inspection.py +++ b/test/test/facet/test_inspection.py @@ -3,7 +3,7 @@ """ import logging import warnings -from typing import List, Optional, TypeVar, Union +from typing import List, Optional, Set, TypeVar, Union import numpy as np import pandas as pd @@ -11,10 +11,9 @@ from numpy.testing import assert_allclose from pandas.testing import assert_frame_equal, assert_series_equal from sklearn.datasets import make_classification -from sklearn.model_selection import GridSearchCV, KFold +from sklearn.model_selection import GridSearchCV from pytools.viz.dendrogram import DendrogramDrawer, DendrogramReportStyle -from sklearndf import TransformerDF from sklearndf.classification import ( GradientBoostingClassifierDF, RandomForestClassifierDF, @@ -37,31 +36,26 @@ T = TypeVar("T") -def test_model_inspection( - regressor_selector, - best_lgbm_model: RegressorPipelineDF, - preprocessed_feature_names, - regressor_inspector: LearnerInspector, - cv_kfold: KFold, - sample: Sample, - simple_preprocessor: TransformerDF, - n_jobs: int, -) -> None: - - ranking = regressor_selector.summary_report() - - # define checksums for this test - log.debug(f"\n{ranking}") - +def test_regressor_selector( + regressor_selector: ModelSelector[RegressorPipelineDF, GridSearchCV] +): check_ranking( - ranking=ranking, + ranking=regressor_selector.summary_report(), is_classifier=False, scores_expected=( - [0.693, 0.689, 0.677, 0.661, 0.615, 0.615, 0.367, 0.281, 0.281, 0.281] + [0.820, 0.818, 0.808, 0.806, 0.797, 0.797, 0.652, 0.651, 0.651, 0.651] ), params_expected=None, ) + +def test_model_inspection( + best_lgbm_model: RegressorPipelineDF, + preprocessed_feature_names: Set[str], + regressor_inspector: LearnerInspector, + sample: Sample, + n_jobs: int, +) -> None: shap_values: pd.DataFrame = regressor_inspector.shap_values() # the length of rows in shap_values should be equal to the unique observation