Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move SHAP explainers out of experimental #3596

Merged
merged 20 commits into from
Apr 2, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
32487bc
ENH Plot tests and test cleanups
dantegd Mar 10, 2021
b61f6a0
DBG Add conda-forge shap to CI for testing
dantegd Mar 10, 2021
f5e188d
FIX comment print to fix pep8
dantegd Mar 10, 2021
a1b5583
DBG Add pip shap to CI for testing
dantegd Mar 10, 2021
af3dc79
Merge branch '019-enh-shap-plots' of github.com:dantegd/cuml into 019…
dantegd Mar 10, 2021
ed059e5
Merge branch-0.19 into 019-enh-shap-plots
dantegd Mar 25, 2021
6c55212
ENH Multiple enhancements
dantegd Mar 25, 2021
0a377d5
Merge branch 'branch-0.19' of https://github.com/rapidsai/cuml into 0…
dantegd Mar 30, 2021
b29eaed
ENH Move explainers out of experimental
dantegd Mar 30, 2021
6f2dfaa
FIX Remove not needed function
dantegd Mar 30, 2021
d5761fe
ENH Multiple enhancements, corrections to remove experimental and add…
dantegd Apr 1, 2021
688b2a0
FIX The smallest copyright fix so far...
dantegd Apr 1, 2021
68475ba
Update python/cuml/explainer/kernel_shap.pyx
dantegd Apr 1, 2021
8b467d2
Update python/cuml/explainer/kernel_shap.pyx
dantegd Apr 1, 2021
e48ddaf
Update python/cuml/explainer/kernel_shap.pyx
dantegd Apr 1, 2021
0b5a4b0
Update python/cuml/explainer/permutation_shap.pyx
dantegd Apr 1, 2021
8785f19
FIX Add more samples to pytests and fix a bug in permutation shap
dantegd Apr 1, 2021
3c51ae1
FIX gpu ci build script and stray print
dantegd Apr 2, 2021
d834754
Merge branch 'branch-0.19' of https://github.com/rapidsai/cuml into 0…
dantegd Apr 2, 2021
4b10e49
FIX temporarily xfail hellinger pytest
dantegd Apr 2, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions python/cuml/explainer/kernel_shap.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,13 @@ class KernelExplainer(SHAPBase):
CPU based models, where speedups can still be achieved, but those can be
capped by factors like data transfers and the speed of the models.


KenelExplainer is algorithmically similar and based on the Python SHAP
package kernel explainer:
KernelExplainer is based on the Python SHAP
package's KernelExplainer class:
https://github.com/slundberg/shap/blob/master/shap/explainers/_kernel.py

Current characteristics of the GPU version:

* Unlike the SHAP package, nsamples is a parameter at the
* Unlike the SHAP package, ``nsamples`` is a parameter at the
initialization of the explainer and there is a small initialization
time.
* Only tabular data is supported for now, via passing the background
Expand Down
5 changes: 3 additions & 2 deletions python/cuml/explainer/permutation_shap.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ cdef extern from "cuml/explainer/permutation_shap.hpp" namespace "ML":

class PermutationExplainer(SHAPBase):
"""
GPU accelerated of SHAP's permutation explainer
GPU accelerated version of SHAP's PermutationExplainer

cuML's SHAP based explainers accelerate the algorithmic part of SHAP.
They are optimized to be used with fast GPU based models, like those in
Expand Down Expand Up @@ -344,6 +344,7 @@ class PermutationExplainer(SHAPBase):

self.handle.sync()

shap_values[0][idx] = shap_values[0][idx] / (2 * npermutations)
for i in range(self.model_dimensions):
shap_values[i][idx] = shap_values[i][idx] / (2 * npermutations)

self.total_time = self.total_time + (time.time() - total_timer)
4 changes: 2 additions & 2 deletions python/cuml/test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def exact_shap_regression_dataset():
return create_synthetic_dataset(generator=skl_make_reg,
n_samples=101,
n_features=11,
test_size=1,
test_size=3,
random_state_generator=42,
random_state_train_test_split=42,
noise=0.1)
Expand All @@ -109,6 +109,6 @@ def exact_shap_classification_dataset():
return create_synthetic_dataset(generator=skl_make_clas,
n_samples=101,
n_features=11,
test_size=1,
test_size=3,
random_state_generator=42,
random_state_train_test_split=42)
48 changes: 34 additions & 14 deletions python/cuml/test/explainer/test_explainer_kernel_shap.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,14 @@ def test_exact_regression_datasets(exact_shap_regression_dataset, model):
explained_dataset=X_test,
explainer=KernelExplainer
)

assert_and_log(
shap_values,
golden_regression_results[model],
mod.predict(X_test),
explainer.expected_value
)
for i in range(3):
print(i)
assert_and_log(
shap_values[i],
golden_regression_results[model][i],
mod.predict(X_test[i].reshape(1, X_test.shape[1])),
explainer.expected_value
)


def test_exact_classification_datasets(exact_shap_classification_dataset):
Expand Down Expand Up @@ -331,17 +332,36 @@ def test_l1_regularization(exact_shap_regression_dataset, l1_type):
# and confirmed with SHAP package.
golden_regression_results = {
cuml.LinearRegression: [
-3.6001968e-01, -1.0214063e+02, 1.2992077e+00, -6.3079113e+01,
2.5177002e-04, -2.3135548e+00, -1.0176431e+02, 3.3992329e+00,
4.1034698e+01, 7.1334076e+01, -1.6048431e+00
[-1.3628216e+00, -1.0234555e+02, 1.3433075e-01, -6.1763966e+01,
2.6035309e-04, -3.4455872e+00, -1.0159061e+02, 3.4058199e+00,
4.1598396e+01, 7.2152481e+01, -2.1964417e+00],
[-8.6558792e+01, 8.9456577e+00, -3.6405910e+01, 1.0574381e+01,
-4.1580200e-04, -5.8939896e+01, 4.8407948e+01, 1.4475842e+00,
-2.0742226e+01, 6.6378265e+01, -3.5134201e+01],
[-1.3722158e+01, -2.9430325e+01, -8.0079269e+01, 1.2096907e+02,
1.0681152e-03, -5.4266449e+01, -3.1012087e+01, -7.9640961e-01,
7.7072838e+01, 1.5370981e+01, -2.4032040e+01]
],
cuml.KNeighborsRegressor: [
3.3001919, -46.435326, -5.2908664, -34.01667, -5.917948, -14.939089,
-46.88066, -3.1448324, 11.431797, 49.297226, 5.9906464
[4.3210926, -47.497078, -4.523407, -35.49657, -5.5174675, -14.158726,
-51.303787, -2.6457424, 12.230529, 52.345207, 6.3014755],
[-52.036957, 2.4158602, -20.302296, 15.428952, 5.9823637,
-20.046719, 22.46046, -4.762917, -6.20145, 37.457417,
5.3511925],
[-8.803419, -7.4095736, -48.113777, 57.21296, 1.0490589,
-37.94751, -20.748789, -0.22258139, 28.204493, 4.5492225,
0.5797138]
],
cuml.SVR: [
0.04022658, -1.019261, 0.03412837, -0.7708928, -0.01342008,
-0.10700871, -1.2565054, 0.49404335, 0.4250477, 1.0444777, 0.01112604
[3.53810340e-02, -8.11021507e-01, 3.34369540e-02, -8.68727207e-01,
1.06804073e-03, -1.14741415e-01, -1.35545099e+00, 3.87545109e-01,
4.43311602e-01, 1.08623052e+00, 2.65314579e-02],
[-1.39247358e+00, 5.91157824e-02, -4.33764964e-01, 1.04503572e-01,
-4.41753864e-03, -1.09017754e+00, 5.90143979e-01, 1.08445108e-01,
-2.26831138e-01, 9.69056726e-01, -1.18437767e-01],
[-1.28573015e-01, -2.33658075e-01, -1.02735841e+00, 1.47447693e+00,
-1.99043751e-03, -1.11328888e+00, -4.66209412e-01, -1.02243885e-01,
8.18460345e-01, 2.20144764e-01, -9.62769389e-02]
]
}

Expand Down
32 changes: 19 additions & 13 deletions python/cuml/test/explainer/test_explainer_permutation_shap.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ def test_regression_datasets(exact_shap_regression_dataset, model):
fx = mod.predict(X_test)
exp_v = explainer.expected_value

assert (np.sum(cp.asnumpy(shap_values)) - abs(fx - exp_v)) <= 1e-5
for i in range(3):
assert (np.sum(
cp.asnumpy(shap_values[i])) - abs(fx[i] - exp_v)) <= 1e-5


def test_exact_classification_datasets(exact_shap_classification_dataset):
Expand All @@ -71,16 +73,18 @@ def test_exact_classification_datasets(exact_shap_classification_dataset):
model=mod.predict_proba,
background_dataset=X_train,
explained_dataset=X_test,
explainer=PermutationExplainer
explainer=PermutationExplainer,
)

fx = mod.predict_proba(X_test)[0]
fx = mod.predict_proba(X_test)
exp_v = explainer.expected_value

assert (np.sum(cp.asnumpy(
shap_values[0])) - abs(fx[0] - exp_v[0])) <= 1e-5
assert (np.sum(cp.asnumpy(
shap_values[1])) - abs(fx[1] - exp_v[1])) <= 1e-5
for i in range(3):
print(i, fx[i][1], shap_values[1][i])
assert (np.sum(cp.asnumpy(
shap_values[0][i])) - abs(fx[i][0] - exp_v[0])) <= 1e-5
assert (np.sum(cp.asnumpy(
shap_values[1][i])) - abs(fx[i][1] - exp_v[1])) <= 1e-5


@pytest.mark.parametrize("dtype", [np.float32, np.float64])
Expand All @@ -90,7 +94,7 @@ def test_exact_classification_datasets(exact_shap_classification_dataset):
cuml.SVR])
@pytest.mark.parametrize("npermutations", [5, 50])
def test_different_parameters(dtype, n_features, n_background, model,
npermutations, ):
npermutations):
cp.random.seed(42)
X_train, X_test, y_train, y_test = create_synthetic_dataset(
n_samples=n_background + 5,
Expand Down Expand Up @@ -135,19 +139,21 @@ def test_not_shuffled_explanation(exact_shap_regression_dataset):
data=X_train)

shap_values = explainer.shap_values(
X_test,
X_test[0],
npermutations=1,
testing=True
)

print(shap_values)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Stray print


assert np.allclose(shap_values, not_shuffled_shap_values,
rtol=1e-04, atol=1e-04)


# Test against exact shap values for linear regression
# 1 permutation should give exact result
def test_permutation(exact_shap_regression_dataset):
X_train, X_test, y_train, y_test = exact_shap_regression_dataset
X_train, _, y_train, _ = exact_shap_regression_dataset
# Train arbitrary model to get some coefficients
mod = cuml.LinearRegression().fit(X_train, y_train)
# Single background and foreground instance
Expand Down Expand Up @@ -182,7 +188,7 @@ def test_permutation(exact_shap_regression_dataset):
# of SHAP's permutationExplainer that did not shuffle the indexes for the
# permutations, giving us a test of the calculations in our implementation
not_shuffled_shap_values = [
-3.60017776e-01, -1.02140656e+02, 1.29915714e+00, -6.30791473e+01,
2.47955322e-04, -2.31356430e+00, -1.01764305e+02, 3.39929199e+00,
4.10347061e+01, 7.13340759e+01, -1.60478973e+00
-1.3628101e+00, -1.0234560e+02, 1.3428497e-01, -6.1764000e+01,
2.6702881e-04, -3.4455948e+00, -1.0159061e+02, 3.4058895e+00,
4.1598404e+01, 7.2152489e+01, -2.1964169e+00,
]