diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 6be51537..20652e52 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -1,6 +1,26 @@ Release Notes ============= +FACET 2.0 +--------- + +2.0.0 +~~~~~ + +- API: return :class:`.LearnerInspector` matrix outputs as :class:`.Matrix` instances +- API: diagonals of feature synergy, redundancy, and association matrices are now + ``nan`` instead of 1.0 +- API: the leaf order of :class:`.LinkageTree` objects generated by + ``feature_…_linkage`` methods of :class:`.LearnerInspector` is now the same as the + row and column order of :class:`.Matrix` objects returned by the corresponding + ``feature_…_matrix`` methods of :class:`.LearnerInspector`, minimizing the distance + between adjacent leaves + The old sorting behaviour of FACET 1.x can be restored using method + :meth:`.LinkageTree.sort_by_weight` +- VIZ: minor tweaks to simulation plots and reports generated by + :class:`.SimulationDrawer` + + FACET 1.2 --------- @@ -22,7 +42,7 @@ the baseline of a simulation. ~~~~~ - BUILD: added support for *sklearndf* 1.2 and *scikit-learn* 0.24 -- API: new optional parameter `subsample` in method +- API: new optional parameter ``subsample`` in method :meth:`.BaseUnivariateSimulator.simulate_feature` can be used to specify a subsample to be used in the simulation (but simulating using a crossfit based on the full sample) @@ -45,7 +65,7 @@ by the :class:`.LearnerInspector`. ~~~~~ - API: SHAP interaction vectors can (in part) also be influenced by redundancy among - features. This can inflate quantificatios of synergy, especially in cases where two + features. This can inflate quantifications of synergy, especially in cases where two variables are highly redundant. FACET now corrects interaction vectors for redundancy prior to calculating synergy. Technically we ensure that each interaction vector is orthogonal w.r.t the main effect vectors of both associated features. @@ -66,7 +86,8 @@ FACET 1.0 1.0.3 ~~~~~ -- FIX: restrict package requirements to *gamma-pytools* 1.0.* and *sklearndf* 1.0.x, since FACET 1.0 is not compatible with *gamma-pytools* 1.1.* +- FIX: restrict package requirements to *gamma-pytools* 1.0.* and *sklearndf* 1.0.x, + since FACET 1.0 is not compatible with *gamma-pytools* 1.1.* 1.0.2 ~~~~~ diff --git a/sphinx/source/tutorial/Classification_with_Facet.ipynb b/sphinx/source/tutorial/Classification_with_Facet.ipynb index 31cdbdff..28887433 100644 --- a/sphinx/source/tutorial/Classification_with_Facet.ipynb +++ b/sphinx/source/tutorial/Classification_with_Facet.ipynb @@ -203,7 +203,7 @@ "metadata": {}, "outputs": [], "source": [ - "from pytools.viz.dendrogram import DendrogramDrawer, LinkageTree\n", + "from pytools.viz.dendrogram import DendrogramDrawer\n", "from pytools.viz.matrix import MatrixDrawer" ] }, @@ -3152,10 +3152,10 @@ "evalue": "name 'TableOne' is not defined", "output_type": "error", "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 18\u001b[0m ]\n\u001b[1;32m 19\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 20\u001b[0;31m mytable = TableOne(\n\u001b[0m\u001b[1;32m 21\u001b[0m \u001b[0mprediab_eda\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprediab_eda\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Pre_diab\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_list\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'TableOne' is not defined" + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mNameError\u001B[0m Traceback (most recent call last)", + "\u001B[0;32m\u001B[0m in \u001B[0;36m\u001B[0;34m\u001B[0m\n\u001B[1;32m 18\u001B[0m ]\n\u001B[1;32m 19\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 20\u001B[0;31m mytable = TableOne(\n\u001B[0m\u001B[1;32m 21\u001B[0m \u001B[0mprediab_eda\u001B[0m\u001B[0;34m,\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 22\u001B[0m \u001B[0mcolumns\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mprediab_eda\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcolumns\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mdrop\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m\"Pre_diab\"\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mto_list\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m,\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;31mNameError\u001B[0m: name 'TableOne' is not defined" ] } ], @@ -3294,4 +3294,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/src/facet/inspection/_inspection.py b/src/facet/inspection/_inspection.py index d80f67d1..81ba2588 100644 --- a/src/facet/inspection/_inspection.py +++ b/src/facet/inspection/_inspection.py @@ -7,13 +7,13 @@ import numpy as np import pandas as pd -from scipy.cluster.hierarchy import leaves_list, linkage, optimal_leaf_ordering -from scipy.spatial.distance import squareform +from scipy.cluster import hierarchy +from scipy.spatial import distance from pytools.api import AllTracker, inheritdoc +from pytools.data import LinkageTree, Matrix from pytools.fit import FittableMixin from pytools.parallelization import ParallelizableMixin -from pytools.viz.dendrogram import LinkageTree from sklearndf import ClassifierDF, LearnerDF, RegressorDF from sklearndf.pipeline import LearnerPipelineDF @@ -491,7 +491,7 @@ def feature_synergy_matrix( symmetrical: bool = False, aggregation: Optional[str] = AGG_MEAN, clustered: bool = True, - ) -> Union[pd.DataFrame, List[pd.DataFrame]]: + ) -> Union[Matrix, List[Matrix]]: """ Calculate the feature synergy matrix. @@ -540,6 +540,7 @@ def feature_synergy_matrix( affinity_symmetrical=explainer.synergy( symmetrical=True, absolute=False, std=False ), + affinity_metric="synergy", clustered=clustered, ) @@ -550,7 +551,7 @@ def feature_redundancy_matrix( symmetrical: bool = False, aggregation: Optional[str] = AGG_MEAN, clustered: bool = True, - ) -> Union[pd.DataFrame, List[pd.DataFrame]]: + ) -> Union[Matrix, List[Matrix]]: """ Calculate the feature redundancy matrix. @@ -599,6 +600,7 @@ def feature_redundancy_matrix( affinity_symmetrical=explainer.redundancy( symmetrical=True, absolute=False, std=False ), + affinity_metric="redundancy", clustered=clustered, ) @@ -609,7 +611,7 @@ def feature_association_matrix( symmetrical: bool = False, aggregation: Optional[str] = AGG_MEAN, clustered: bool = True, - ) -> Union[pd.DataFrame, List[pd.DataFrame]]: + ) -> Union[Matrix, List[Matrix]]: """ Calculate the feature association matrix. @@ -662,6 +664,7 @@ def feature_association_matrix( affinity_symmetrical=global_explainer.association( symmetrical=True, absolute=False, std=False ), + affinity_metric="association", clustered=clustered, ) @@ -725,7 +728,7 @@ def feature_association_linkage(self) -> Union[LinkageTree, List[LinkageTree]]: ) ) - def feature_interaction_matrix(self) -> Union[pd.DataFrame, List[pd.DataFrame]]: + def feature_interaction_matrix(self) -> Union[Matrix, List[Matrix]]: """ Calculate relative shap interaction values for all feature pairings. @@ -825,7 +828,9 @@ def feature_interaction_matrix(self) -> Union[pd.DataFrame, List[pd.DataFrame]]: )[np.newaxis, :, :] # create a data frame from the feature matrix - return self.__feature_matrix_to_df(interaction_matrix) + return self.__arrays_to_matrix( + interaction_matrix, value_label="relative shap interaction" + ) def shap_plot_data(self) -> ShapPlotData: """ @@ -882,9 +887,9 @@ def shap_plot_data(self) -> ShapPlotData: sample=sample, ) - def __feature_matrix_to_df( - self, matrix: np.ndarray - ) -> Union[pd.DataFrame, List[pd.DataFrame]]: + def __arrays_to_matrix( + self, matrix: np.ndarray, value_label: str + ) -> Union[Matrix, List[Matrix]]: # transform a matrix of shape (n_outputs, n_features, n_features) # to a data frame @@ -897,27 +902,38 @@ def __feature_matrix_to_df( # convert array to data frame(s) with features as row and column indices if len(matrix) == 1: - return pd.DataFrame( - data=matrix[0], index=feature_index, columns=feature_index + return self.__array_to_matrix( + matrix[0], + feature_importance=self.feature_importance(), + value_label=value_label, ) else: return [ - pd.DataFrame(data=m, index=feature_index, columns=feature_index) - for m in matrix + self.__array_to_matrix( + m, + feature_importance=feature_importance, + value_label=f"{value_label} ({output_name})", + ) + for m, (_, feature_importance), output_name in zip( + matrix, self.feature_importance().items(), self.output_names_ + ) ] - @staticmethod def __feature_affinity_matrix( + self, affinity_matrices: List[pd.DataFrame], affinity_symmetrical: np.ndarray, + affinity_metric: str, clustered: bool, - ): + ) -> Matrix: if clustered: - affinity_matrices = LearnerInspector.__sort_affinity_matrices( + affinity_matrices = self.__sort_affinity_matrices( affinity_matrices=affinity_matrices, symmetrical_affinity_matrices=affinity_symmetrical, ) - return LearnerInspector.__isolate_single_frame(affinity_matrices) + return self.__isolate_single_frame( + affinity_matrices, affinity_metric=affinity_metric + ) @staticmethod def __sort_affinity_matrices( @@ -928,22 +944,14 @@ def __sort_affinity_matrices( fn_linkage = LearnerInspector.__linkage_matrix_from_affinity_matrix_for_output return [ - affinity_matrix.iloc[feature_order, feature_order] + (lambda feature_order: affinity_matrix.iloc[feature_order, feature_order])( + feature_order=hierarchy.leaves_list( + Z=fn_linkage(feature_affinity_matrix=symmetrical_affinity_matrix) + ) + ) for affinity_matrix, symmetrical_affinity_matrix in zip( affinity_matrices, symmetrical_affinity_matrices ) - for feature_order in ( - leaves_list( - Z=optimal_leaf_ordering( - Z=fn_linkage( - feature_affinity_matrix=symmetrical_affinity_matrix - ), - y=symmetrical_affinity_matrix, - ) - ) - # reverse the index list so larger values tend to end up on top - [::-1], - ) ] @staticmethod @@ -989,7 +997,8 @@ def __linkages_from_affinity_matrices( return [ self.__linkage_tree_from_affinity_matrix_for_output( - feature_affinity_for_output, feature_importance_for_output + feature_affinity_for_output, + feature_importance_for_output, ) for feature_affinity_for_output, ( _, @@ -1037,10 +1046,20 @@ def __linkage_matrix_from_affinity_matrix_for_output( # (1 = closest, 0 = most distant) # compress the distance matrix (required by SciPy) - compressed_distance_vector = squareform(1 - abs(feature_affinity_matrix)) + distance_matrix = 1.0 - abs(feature_affinity_matrix) + np.fill_diagonal(distance_matrix, 0.0) + compressed_distance_matrix: np.ndarray = distance.squareform(distance_matrix) # calculate the linkage matrix - return linkage(y=compressed_distance_vector, method="single") + leaf_ordering: np.ndarray = hierarchy.optimal_leaf_ordering( + Z=hierarchy.linkage(y=compressed_distance_matrix, method="single"), + y=compressed_distance_matrix, + ) + + # reverse the leaf ordering, so that larger values tend to end up on top + leaf_ordering[:, [1, 0]] = leaf_ordering[:, [0, 1]] + + return leaf_ordering def _ensure_shap_interaction(self) -> None: if not self._shap_interaction: @@ -1050,14 +1069,69 @@ def _ensure_shap_interaction(self) -> None: "enable calculations involving SHAP interaction values." ) - @staticmethod def __isolate_single_frame( + self, frames: List[pd.DataFrame], - ) -> Union[pd.DataFrame, List[pd.DataFrame]]: + affinity_metric: str, + ) -> Union[Matrix, List[Matrix]]: + feature_importance = self.feature_importance() + if len(frames) == 1: - return frames[0] + assert isinstance(feature_importance, pd.Series) + return self.__frame_to_matrix( + frames[0], + affinity_metric=affinity_metric, + feature_importance=feature_importance, + ) else: - return frames + return [ + self.__frame_to_matrix( + frame, + affinity_metric=affinity_metric, + feature_importance=frame_importance, + feature_importance_category=str(frame_name), + ) + for frame, (frame_name, frame_importance) in zip( + frames, feature_importance.items() + ) + ] + + @staticmethod + def __array_to_matrix( + a: np.ndarray, + *, + feature_importance: pd.Series, + value_label: str, + ) -> Matrix: + return Matrix( + a, + names=(feature_importance.index, feature_importance.index), + weights=(feature_importance, feature_importance), + value_label=value_label, + name_labels=("feature", "feature"), + ) + + @staticmethod + def __frame_to_matrix( + frame: pd.DataFrame, + *, + affinity_metric: str, + feature_importance: pd.Series, + feature_importance_category: Optional[str] = None, + ) -> Matrix: + return Matrix.from_frame( + frame, + weights=( + feature_importance.reindex(frame.index), + feature_importance.reindex(frame.columns), + ), + value_label=( + f"{affinity_metric} ({feature_importance_category})" + if feature_importance_category + else affinity_metric + ), + name_labels=("primary feature", "associated feature"), + ) @staticmethod def __validate_aggregation_method(aggregation: str) -> None: diff --git a/src/facet/inspection/_shap_global_explanation.py b/src/facet/inspection/_shap_global_explanation.py index 50acf41b..ee6903e3 100644 --- a/src/facet/inspection/_shap_global_explanation.py +++ b/src/facet/inspection/_shap_global_explanation.py @@ -119,8 +119,8 @@ def from_relative_affinity( where=affinity_abs_sym_ij_2x > 0.0, ) - # re-set the diagonal to 1.0 in case of rounding errors - fill_diagonal(affinity_rel_sym_ij, 1.0) + # affinity of a feature with itself is undefined + fill_diagonal(affinity_rel_sym_ij, np.nan) # return the AffinityMatrices object return AffinityMatrix( diff --git a/src/facet/inspection/_shap_projection.py b/src/facet/inspection/_shap_projection.py index 0f791868..beb01708 100644 --- a/src/facet/inspection/_shap_projection.py +++ b/src/facet/inspection/_shap_projection.py @@ -113,8 +113,8 @@ def _calculate_association(context: ShapContext) -> AffinityMatrix: # calculate association as the coefficient of determination for p[i] and p[j] ass_ij = cov_p_i_p_j_over_var_p_i * transpose(cov_p_i_p_j_over_var_p_i) - # we define the association of a feature with itself as 1 - fill_diagonal(ass_ij, 1.0) + # association of a feature with itself is undefined + fill_diagonal(ass_ij, np.nan) return AffinityMatrix.from_relative_affinity( affinity_rel_ij=ass_ij, std_p_i=sqrt(var_p_i) @@ -253,8 +253,8 @@ def _calculate_synergy_redundancy( # this is the coefficient of determination of the interaction vector syn_ij = cov_p_i_p_ij_over_var_p_i * cov_p_i_p_ij_over_var_p_ij - # we define the synergy of a feature with itself as 1 - fill_diagonal(syn_ij, 1.0) + # synergy of a feature with itself is undefined + fill_diagonal(syn_ij, np.nan) # # Redundancy: red[i, j] @@ -291,8 +291,8 @@ def _calculate_synergy_redundancy( # scale to accommodate variance already explained by synergy red_ij *= 1 - syn_ij - # we define the redundancy of a feature with itself as 1 - fill_diagonal(red_ij, 1.0) + # redundancy of a feature with itself is undefined + fill_diagonal(red_ij, np.nan) # # SHAP decomposition as relative contributions of diff --git a/src/facet/simulation/viz/_style.py b/src/facet/simulation/viz/_style.py index d1f92912..048a3617 100644 --- a/src/facet/simulation/viz/_style.py +++ b/src/facet/simulation/viz/_style.py @@ -144,15 +144,18 @@ def draw_uplift( x = range(len(partitions)) else: x = partitions + + # get axes and color scheme ax = self.ax + colors = self.colors # plot the confidence bounds and the median - (line_min,) = ax.plot(x, outputs_lower_bound, color=self.colors.accent_3) - (line_median,) = ax.plot(x, outputs_median, color=self.colors.accent_2) - (line_max,) = ax.plot(x, outputs_upper_bound, color=self.colors.accent_3) + (line_min,) = ax.plot(x, outputs_lower_bound, color=colors.accent_3) + (line_median,) = ax.plot(x, outputs_median, color=colors.accent_2) + (line_max,) = ax.plot(x, outputs_upper_bound, color=colors.accent_3) # add a horizontal line at the baseline - line_base = ax.axhline(y=baseline, linewidth=0.5, color=self.colors.accent_1) + line_base = ax.axhline(y=baseline, linewidth=0.5, color=colors.accent_1) # add a legend labels = self._legend(confidence_level=confidence_level) @@ -160,7 +163,7 @@ def draw_uplift( ax.legend(handles, labels) # label the y axis - ax.set_ylabel(output_unit) + ax.set_ylabel(output_unit, color=colors.foreground) # format and label the x axis ax.tick_params( @@ -373,12 +376,6 @@ def draw_histogram( ) ) - def finalize_drawing(self, **kwargs: Any) -> None: - """[see superclass]""" - super().finalize_drawing(**kwargs) - # print two trailing line breaks - self.out.write("\n") - @staticmethod def _partition_format(is_categorical: bool) -> str: if is_categorical: diff --git a/test/test/facet/test_inspection.py b/test/test/facet/test_inspection.py index bcfa99b3..41d09613 100644 --- a/test/test/facet/test_inspection.py +++ b/test/test/facet/test_inspection.py @@ -8,6 +8,7 @@ import numpy as np import pandas as pd import pytest +from numpy.testing import assert_allclose from pandas.testing import assert_frame_equal, assert_series_equal from sklearn.datasets import make_classification from sklearn.model_selection import KFold @@ -175,16 +176,17 @@ def test_model_inspection_classifier_binary( association_matrix = model_inspector.feature_association_matrix( clustered=True, symmetrical=True ) - assert association_matrix.values == pytest.approx( + assert_allclose( + association_matrix.values, np.array( [ - [1.000, 0.692, 0.195, 0.052], - [0.692, 1.000, 0.290, 0.041], - [0.195, 0.290, 1.000, 0.081], - [0.052, 0.041, 0.081, 1.000], + [np.nan, 0.692, 0.195, 0.052], + [0.692, np.nan, 0.290, 0.041], + [0.195, 0.290, np.nan, 0.081], + [0.052, 0.041, 0.081, np.nan], ] ), - abs=0.02, + atol=0.02, ) except AssertionError as error: print_expected_matrix(error=error) @@ -260,7 +262,8 @@ def test_model_inspection_classifier_multi_class( assert feature_importance.columns.equals( pd.Index(iris_inspector_multi_class.output_names_, name="class") ) - assert feature_importance.values == pytest.approx( + assert_allclose( + feature_importance.values, np.array( [ [0.125, 0.085, 0.104], @@ -269,7 +272,7 @@ def test_model_inspection_classifier_multi_class( [0.432, 0.441, 0.425], ] ), - abs=0.02, + atol=0.02, ) # Shap decomposition matrices (feature dependencies) @@ -279,62 +282,61 @@ def test_model_inspection_classifier_multi_class( clustered=False ) - assert np.hstack([m.values for m in synergy_matrix]) == pytest.approx( + assert_allclose( + np.hstack([m.values for m in synergy_matrix]), np.array( [ - [1.000, 0.009, 0.057, 0.055, 1.000, 0.042] - + [0.418, 0.418, 1.000, 0.004, 0.085, 0.097], - [0.101, 1.000, 0.052, 0.072, 0.094, 1.000] - + [0.117, 0.156, 0.090, 1.000, 0.237, 0.258], - [0.003, 0.001, 1.000, 0.002, 0.027, 0.005] - + [1.000, 0.041, 0.012, 0.004, 1.000, 0.031], - [0.002, 0.000, 0.001, 1.000, 0.029, 0.005] - + [0.043, 1.000, 0.015, 0.005, 0.036, 1.000], + [np.nan, 0.009, 0.057, 0.055, np.nan, 0.042] + + [0.418, 0.418, np.nan, 0.004, 0.085, 0.097], + [0.101, np.nan, 0.052, 0.072, 0.094, np.nan] + + [0.117, 0.156, 0.090, np.nan, 0.237, 0.258], + [0.003, 0.001, np.nan, 0.002, 0.027, 0.005] + + [np.nan, 0.041, 0.012, 0.004, np.nan, 0.031], + [0.002, 0.000, 0.001, np.nan, 0.029, 0.005] + + [0.043, np.nan, 0.015, 0.005, 0.036, np.nan], ] ), - abs=0.02, + atol=0.02, ) redundancy_matrix = iris_inspector_multi_class.feature_redundancy_matrix( clustered=False ) - assert np.hstack([m.values for m in redundancy_matrix]) == ( - pytest.approx( - np.array( - [ - [1.000, 0.087, 0.643, 0.656, 1.000, 0.065] - + [0.265, 0.234, 1.000, 0.034, 0.594, 0.505], - [0.082, 1.000, 0.297, 0.292, 0.064, 1.000] - + [0.117, 0.171, 0.031, 1.000, 0.024, 0.021], - [0.682, 0.314, 1.000, 0.996, 0.471, 0.130] - + [1.000, 0.743, 0.642, 0.031, 1.000, 0.761], - [0.695, 0.315, 0.997, 1.000, 0.406, 0.194] - + [0.741, 1.000, 0.550, 0.028, 0.756, 1.000], - ] - ), - abs=0.02, - ) + assert_allclose( + np.hstack([m.values for m in redundancy_matrix]), + np.array( + [ + [np.nan, 0.087, 0.643, 0.656, np.nan, 0.065] + + [0.265, 0.234, np.nan, 0.034, 0.594, 0.505], + [0.082, np.nan, 0.297, 0.292, 0.064, np.nan] + + [0.117, 0.171, 0.031, np.nan, 0.024, 0.021], + [0.682, 0.314, np.nan, 0.996, 0.471, 0.130] + + [np.nan, 0.743, 0.642, 0.031, np.nan, 0.761], + [0.695, 0.315, 0.997, np.nan, 0.406, 0.194] + + [0.741, np.nan, 0.550, 0.028, 0.756, np.nan], + ] + ), + atol=0.02, ) association_matrix = iris_inspector_multi_class.feature_association_matrix( clustered=False ) - assert np.hstack([m.values for m in association_matrix]) == ( - pytest.approx( - np.array( - [ - [1.000, 0.077, 0.662, 0.670, 1.000, 0.046] - + [0.370, 0.334, 1.000, 0.031, 0.634, 0.550], - [0.077, 1.000, 0.301, 0.295, 0.046, 1.000] - + [0.127, 0.173, 0.031, 1.000, 0.025, 0.020], - [0.662, 0.301, 1.000, 0.998, 0.370, 0.127] - + [1.000, 0.783, 0.634, 0.025, 1.000, 0.790], - [0.670, 0.295, 0.998, 1.000, 0.334, 0.173] - + [0.783, 1.000, 0.550, 0.020, 0.790, 1.000], - ] - ), - abs=0.02, - ) + assert_allclose( + np.hstack([m.values for m in association_matrix]), + np.array( + [ + [np.nan, 0.077, 0.662, 0.670, np.nan, 0.046] + + [0.370, 0.334, np.nan, 0.031, 0.634, 0.550], + [0.077, np.nan, 0.301, 0.295, 0.046, np.nan] + + [0.127, 0.173, 0.031, np.nan, 0.025, 0.020], + [0.662, 0.301, np.nan, 0.998, 0.370, 0.127] + + [np.nan, 0.783, 0.634, 0.025, np.nan, 0.790], + [0.670, 0.295, 0.998, np.nan, 0.334, 0.173] + + [0.783, np.nan, 0.550, 0.020, 0.790, np.nan], + ] + ), + atol=0.02, ) except AssertionError as error: print_expected_matrix(error=error, split=True) @@ -495,204 +497,211 @@ def test_model_inspection_classifier_interaction( synergy_matrix = model_inspector.feature_synergy_matrix( clustered=False, symmetrical=True ) - assert synergy_matrix.values == pytest.approx( + assert_allclose( + synergy_matrix.values, np.array( [ - [1.000, 0.011, 0.006, 0.007], - [0.011, 1.000, 0.006, 0.007], - [0.006, 0.006, 1.000, 0.003], - [0.007, 0.007, 0.003, 1.000], + [np.nan, 0.011, 0.006, 0.007], + [0.011, np.nan, 0.006, 0.007], + [0.006, 0.006, np.nan, 0.003], + [0.007, 0.007, 0.003, np.nan], ] ), - abs=0.02, + atol=0.02, ) - assert model_inspector.feature_synergy_matrix( - absolute=True, symmetrical=True - ).values == pytest.approx( + assert_allclose( + model_inspector.feature_synergy_matrix( + absolute=True, symmetrical=True + ).values, np.array( [ - [0.425, 0.001, 0.002, 0.001], - [0.001, 0.019, 0.000, 0.002], - [0.002, 0.000, 0.068, 0.002], - [0.001, 0.002, 0.002, 0.488], + [np.nan, 0.001, 0.002, 0.001], + [0.001, np.nan, 0.000, 0.002], + [0.002, 0.000, np.nan, 0.002], + [0.001, 0.002, 0.002, np.nan], ] ), - abs=0.02, + atol=0.02, ) synergy_matrix = model_inspector.feature_synergy_matrix(clustered=True) - assert synergy_matrix.values == pytest.approx( + assert_allclose( + synergy_matrix.values, np.array( [ - [1.000, 0.000, 0.001, 0.004], - [0.149, 1.000, 0.045, 0.157], - [0.040, 0.004, 1.000, 0.044], - [0.003, 0.001, 0.001, 1.000], + [np.nan, 0.000, 0.001, 0.004], + [0.149, np.nan, 0.045, 0.157], + [0.040, 0.004, np.nan, 0.044], + [0.003, 0.001, 0.001, np.nan], ] ), - abs=0.02, + atol=0.02, ) - assert model_inspector.feature_synergy_matrix( - absolute=True - ).values == pytest.approx( + assert_allclose( + model_inspector.feature_synergy_matrix(absolute=True).values, np.array( [ - [0.425, 0.000, 0.000, 0.001], - [0.003, 0.019, 0.001, 0.003], - [0.003, 0.000, 0.068, 0.003], - [0.001, 0.000, 0.001, 0.488], + [np.nan, 0.000, 0.000, 0.001], + [0.003, np.nan, 0.001, 0.003], + [0.003, 0.000, np.nan, 0.003], + [0.001, 0.000, 0.001, np.nan], ] ), - abs=0.02, + atol=0.02, ) - assert model_inspector_full_sample.feature_synergy_matrix( - clustered=True - ).values == pytest.approx( + assert_allclose( + model_inspector_full_sample.feature_synergy_matrix(clustered=True).values, np.array( [ - [1.000, 0.000, 0.000, 0.001], - [0.386, 1.000, 0.108, 0.314], - [0.005, 0.002, 1.000, 0.059], - [0.002, 0.000, 0.001, 1.000], + [np.nan, 0.000, 0.000, 0.001], + [0.386, np.nan, 0.108, 0.314], + [0.005, 0.002, np.nan, 0.059], + [0.002, 0.000, 0.001, np.nan], ] ), - abs=0.02, + atol=0.02, ) redundancy_matrix = model_inspector.feature_redundancy_matrix( clustered=False, symmetrical=True ) - assert redundancy_matrix.values == pytest.approx( + assert_allclose( + redundancy_matrix.values, np.array( [ - [1.000, 0.080, 0.316, 0.208], - [0.080, 1.000, 0.036, 0.044], - [0.316, 0.036, 1.000, 0.691], - [0.208, 0.044, 0.691, 1.000], + [np.nan, 0.080, 0.316, 0.208], + [0.080, np.nan, 0.036, 0.044], + [0.316, 0.036, np.nan, 0.691], + [0.208, 0.044, 0.691, np.nan], ] ), - abs=0.02, + atol=0.02, ) - assert model_inspector.feature_redundancy_matrix( - absolute=True, symmetrical=True - ).values == pytest.approx( + assert_allclose( + model_inspector.feature_redundancy_matrix( + absolute=True, symmetrical=True + ).values, np.array( [ - [0.425, 0.316, 0.052, 0.010], - [0.316, 0.488, 0.087, 0.009], - [0.052, 0.087, 0.068, 0.004], - [0.010, 0.009, 0.004, 0.019], + [np.nan, 0.316, 0.052, 0.010], + [0.316, np.nan, 0.087, 0.009], + [0.052, 0.087, np.nan, 0.004], + [0.010, 0.009, 0.004, np.nan], ] ), - abs=0.02, + atol=0.02, ) redundancy_matrix = model_inspector.feature_redundancy_matrix(clustered=True) - assert redundancy_matrix.values == pytest.approx( + assert_allclose( + redundancy_matrix.values, np.array( [ - [1.000, 0.691, 0.209, 0.045], - [0.692, 1.000, 0.317, 0.037], - [0.201, 0.303, 1.000, 0.081], - [0.040, 0.031, 0.076, 1.000], + [np.nan, 0.691, 0.209, 0.045], + [0.692, np.nan, 0.317, 0.037], + [0.201, 0.303, np.nan, 0.081], + [0.040, 0.031, 0.076, np.nan], ] ), - abs=0.02, + atol=0.02, ) - assert model_inspector.feature_redundancy_matrix( - absolute=True - ).values == pytest.approx( + assert_allclose( + model_inspector.feature_redundancy_matrix(absolute=True).values, np.array( [ - [0.425, 0.294, 0.092, 0.020], - [0.337, 0.488, 0.154, 0.017], - [0.013, 0.020, 0.068, 0.006], - [0.001, 0.001, 0.001, 0.019], + [np.nan, 0.294, 0.092, 0.020], + [0.337, np.nan, 0.154, 0.017], + [0.013, 0.020, np.nan, 0.006], + [0.001, 0.001, 0.001, np.nan], ] ), - abs=0.02, + atol=0.02, ) - assert model_inspector_full_sample.feature_redundancy_matrix( - clustered=True - ).values == pytest.approx( + assert_allclose( + model_inspector_full_sample.feature_redundancy_matrix( + clustered=True + ).values, np.array( [ - [1.000, 0.677, 0.384, 0.003], - [0.676, 1.000, 0.465, 0.000], - [0.382, 0.438, 1.000, 0.013], - [0.002, 0.000, 0.012, 1.000], + [np.nan, 0.677, 0.384, 0.003], + [0.676, np.nan, 0.465, 0.000], + [0.382, 0.438, np.nan, 0.013], + [0.002, 0.000, 0.012, np.nan], ] ), - abs=0.02, + atol=0.02, ) association_matrix = model_inspector.feature_association_matrix( clustered=False, symmetrical=True ) - assert association_matrix.values == pytest.approx( + assert_allclose( + association_matrix.values, np.array( [ - [1.000, 0.074, 0.309, 0.205], - [0.074, 1.000, 0.030, 0.040], - [0.309, 0.030, 1.000, 0.694], - [0.205, 0.040, 0.694, 1.000], + [np.nan, 0.074, 0.309, 0.205], + [0.074, np.nan, 0.030, 0.040], + [0.309, 0.030, np.nan, 0.694], + [0.205, 0.040, 0.694, np.nan], ] ), - abs=0.02, + atol=0.02, ) - assert model_inspector.feature_association_matrix( - absolute=True, symmetrical=True - ).values == pytest.approx( + assert_allclose( + model_inspector.feature_association_matrix( + absolute=True, symmetrical=True + ).values, np.array( [ - [0.425, 0.317, 0.051, 0.009], - [0.317, 0.488, 0.085, 0.007], - [0.051, 0.085, 0.068, 0.003], - [0.009, 0.007, 0.003, 0.019], + [np.nan, 0.317, 0.051, 0.009], + [0.317, np.nan, 0.085, 0.007], + [0.051, 0.085, np.nan, 0.003], + [0.009, 0.007, 0.003, np.nan], ] ), - abs=0.02, + atol=0.02, ) association_matrix = model_inspector.feature_association_matrix(clustered=True) - assert association_matrix.values == pytest.approx( + assert_allclose( + association_matrix.values, np.array( [ - [1.000, 0.694, 0.205, 0.040], - [0.694, 1.000, 0.309, 0.030], - [0.205, 0.309, 1.000, 0.074], - [0.040, 0.030, 0.074, 1.000], + [np.nan, 0.694, 0.205, 0.040], + [0.694, np.nan, 0.309, 0.030], + [0.205, 0.309, np.nan, 0.074], + [0.040, 0.030, 0.074, np.nan], ] ), - abs=0.02, + atol=0.02, ) - assert model_inspector.feature_association_matrix( - absolute=True - ).values == pytest.approx( + assert_allclose( + model_inspector.feature_association_matrix(absolute=True).values, np.array( [ - [0.425, 0.295, 0.090, 0.018], - [0.338, 0.488, 0.150, 0.014], - [0.013, 0.020, 0.068, 0.005], - [0.001, 0.001, 0.001, 0.019], + [np.nan, 0.295, 0.090, 0.018], + [0.338, np.nan, 0.150, 0.014], + [0.013, 0.020, np.nan, 0.005], + [0.001, 0.001, 0.001, np.nan], ] ), - abs=0.02, + atol=0.02, ) - assert model_inspector_full_sample.feature_association_matrix( - clustered=True - ).values == pytest.approx( + assert_allclose( + model_inspector_full_sample.feature_association_matrix( + clustered=True + ).values, np.array( [ - [1.000, 0.678, 0.383, 0.001], - [0.678, 1.000, 0.447, 0.000], - [0.383, 0.447, 1.000, 0.009], - [0.001, 0.000, 0.009, 1.000], + [np.nan, 0.678, 0.383, 0.001], + [0.678, np.nan, 0.447, 0.000], + [0.383, 0.447, np.nan, 0.009], + [0.001, 0.000, 0.009, np.nan], ] ), - abs=0.02, + atol=0.02, ) except AssertionError as error: @@ -761,12 +770,14 @@ def test_shap_plot_data( def print_expected_matrix(error: AssertionError, split: bool = False): - # used to print expected output for copy/paste into assertion statement + # print expected output for copy/paste into assertion statement import re matrix: List[List[float]] = eval( - re.search(r"array\(([^)]+)\)", error.args[0])[1].replace(r"\n", "\n") + re.search(r"array\(([^)]+)\)", error.args[0])[1] + .replace(r"\n", "\n") + .replace("nan", "np.nan") ) print("==== matrix assertion failed ====\nExpected Matrix:") @@ -778,7 +789,7 @@ def print_expected_matrix(error: AssertionError, split: bool = False): if split and i == halfpoint: txt += "] + [" elif i > 0: - txt += "," - txt += f"{x:.3f}" + txt += ", " + txt += "np.nan" if np.isnan(x) else f"{x:.3f}" print(txt + "],") print("]") diff --git a/test/test/facet/test_shap_decomposition.py b/test/test/facet/test_shap_decomposition.py index f2591cd6..5ec3fe89 100644 --- a/test/test/facet/test_shap_decomposition.py +++ b/test/test/facet/test_shap_decomposition.py @@ -32,17 +32,13 @@ def test_shap_decomposition_matrices( ): matrix_full_name = f"feature {matrix_name} matrix" n_features = len(feature_names) - assert len(matrix) == n_features, f"rows in {matrix_full_name}" - assert len(matrix.columns) == n_features, f"columns in {matrix_full_name}" + assert matrix.values.shape[0] == n_features, f"rows in {matrix_full_name}" + assert matrix.values.shape[1] == n_features, f"columns in {matrix_full_name}" # check values - for c in matrix.columns: - assert ( - 0.0 - <= matrix.fillna(0).loc[:, c].min() - <= matrix.fillna(0).loc[:, c].max() - <= 1.0 - ), f"Values of [0.0, 1.0] in {matrix_full_name}" + assert ( + np.nanmin(matrix.values) >= 0.0 and np.nanmax(matrix.values) <= 1.0 + ), f"Values of [0.0, 1.0] in {matrix_full_name}" #