rapidsai · rapids-bot · Apr 2, 2021 · Mar 10, 2021 · Mar 10, 2021 · Mar 10, 2021
@@ -80,14 +80,13 @@ class KernelExplainer(SHAPBase):
     CPU based models, where speedups can still be achieved, but those can be
     capped by factors like data transfers and the speed of the models.
 
-
-    KenelExplainer is algorithmically similar and based on the Python SHAP
-    package kernel explainer:
+    KernelExplainer is based on the Python SHAP
+    package's KernelExplainer class:
     https://github.com/slundberg/shap/blob/master/shap/explainers/_kernel.py
 
     Current characteristics of the GPU version:
 
-     * Unlike the SHAP package, nsamples is a parameter at the
+     * Unlike the SHAP package, ``nsamples`` is a parameter at the
        initialization of the explainer and there is a small initialization
        time.
      * Only tabular data is supported for now, via passing the background

@@ -74,7 +74,7 @@ cdef extern from "cuml/explainer/permutation_shap.hpp" namespace "ML":
 
 class PermutationExplainer(SHAPBase):
     """
-    GPU accelerated of SHAP's permutation explainer
+    GPU accelerated version of SHAP's PermutationExplainer
 
     cuML's SHAP based explainers accelerate the algorithmic part of SHAP.
     They are optimized to be used with fast GPU based models, like those in
@@ -344,6 +344,7 @@ class PermutationExplainer(SHAPBase):
 
                 self.handle.sync()
 
-        shap_values[0][idx] = shap_values[0][idx] / (2 * npermutations)
+        for i in range(self.model_dimensions):
+            shap_values[i][idx] = shap_values[i][idx] / (2 * npermutations)
 
         self.total_time = self.total_time + (time.time() - total_timer)
@@ -98,7 +98,7 @@ def exact_shap_regression_dataset():
     return create_synthetic_dataset(generator=skl_make_reg,
                                     n_samples=101,
                                     n_features=11,
-                                    test_size=1,
+                                    test_size=3,
                                     random_state_generator=42,
                                     random_state_train_test_split=42,
                                     noise=0.1)
@@ -109,6 +109,6 @@ def exact_shap_classification_dataset():
     return create_synthetic_dataset(generator=skl_make_clas,
                                     n_samples=101,
                                     n_features=11,
-                                    test_size=1,
+                                    test_size=3,
                                     random_state_generator=42,
                                     random_state_train_test_split=42)
@@ -81,13 +81,14 @@ def test_exact_regression_datasets(exact_shap_regression_dataset, model):
             explained_dataset=X_test,
             explainer=KernelExplainer
         )
-
-        assert_and_log(
-            shap_values,
-            golden_regression_results[model],
-            mod.predict(X_test),
-            explainer.expected_value
-        )
+        for i in range(3):
+            print(i)
+            assert_and_log(
+                shap_values[i],
+                golden_regression_results[model][i],
+                mod.predict(X_test[i].reshape(1, X_test.shape[1])),
+                explainer.expected_value
+            )
 
 
 def test_exact_classification_datasets(exact_shap_classification_dataset):
@@ -331,17 +332,36 @@ def test_l1_regularization(exact_shap_regression_dataset, l1_type):
 # and confirmed with SHAP package.
 golden_regression_results = {
     cuml.LinearRegression: [
-        -3.6001968e-01, -1.0214063e+02, 1.2992077e+00, -6.3079113e+01,
-        2.5177002e-04, -2.3135548e+00, -1.0176431e+02, 3.3992329e+00,
-        4.1034698e+01, 7.1334076e+01, -1.6048431e+00
+        [-1.3628216e+00, -1.0234555e+02, 1.3433075e-01, -6.1763966e+01,
+         2.6035309e-04, -3.4455872e+00, -1.0159061e+02, 3.4058199e+00,
+         4.1598396e+01, 7.2152481e+01, -2.1964417e+00],
+        [-8.6558792e+01, 8.9456577e+00, -3.6405910e+01, 1.0574381e+01,
+         -4.1580200e-04, -5.8939896e+01, 4.8407948e+01, 1.4475842e+00,
+         -2.0742226e+01, 6.6378265e+01, -3.5134201e+01],
+        [-1.3722158e+01, -2.9430325e+01, -8.0079269e+01, 1.2096907e+02,
+         1.0681152e-03, -5.4266449e+01, -3.1012087e+01, -7.9640961e-01,
+         7.7072838e+01, 1.5370981e+01, -2.4032040e+01]
     ],
     cuml.KNeighborsRegressor: [
-        3.3001919, -46.435326, -5.2908664, -34.01667, -5.917948, -14.939089,
-        -46.88066, -3.1448324, 11.431797, 49.297226, 5.9906464
+        [4.3210926, -47.497078, -4.523407, -35.49657, -5.5174675, -14.158726,
+         -51.303787, -2.6457424, 12.230529, 52.345207, 6.3014755],
+        [-52.036957, 2.4158602, -20.302296, 15.428952, 5.9823637,
+         -20.046719, 22.46046, -4.762917, -6.20145, 37.457417,
+         5.3511925],
+        [-8.803419, -7.4095736, -48.113777, 57.21296, 1.0490589,
+         -37.94751, -20.748789, -0.22258139, 28.204493, 4.5492225,
+         0.5797138]
     ],
     cuml.SVR: [
-        0.04022658, -1.019261, 0.03412837, -0.7708928, -0.01342008,
-        -0.10700871, -1.2565054, 0.49404335, 0.4250477, 1.0444777, 0.01112604
+        [3.53810340e-02, -8.11021507e-01, 3.34369540e-02, -8.68727207e-01,
+         1.06804073e-03, -1.14741415e-01, -1.35545099e+00, 3.87545109e-01,
+         4.43311602e-01, 1.08623052e+00, 2.65314579e-02],
+        [-1.39247358e+00, 5.91157824e-02, -4.33764964e-01, 1.04503572e-01,
+         -4.41753864e-03, -1.09017754e+00, 5.90143979e-01, 1.08445108e-01,
+         -2.26831138e-01, 9.69056726e-01, -1.18437767e-01],
+        [-1.28573015e-01, -2.33658075e-01, -1.02735841e+00, 1.47447693e+00,
+         -1.99043751e-03, -1.11328888e+00, -4.66209412e-01, -1.02243885e-01,
+         8.18460345e-01, 2.20144764e-01, -9.62769389e-02]
     ]
 }
 

@@ -56,7 +56,9 @@ def test_regression_datasets(exact_shap_regression_dataset, model):
         fx = mod.predict(X_test)
         exp_v = explainer.expected_value
 
-        assert (np.sum(cp.asnumpy(shap_values)) - abs(fx - exp_v)) <= 1e-5
+        for i in range(3):
+            assert (np.sum(
+                cp.asnumpy(shap_values[i])) - abs(fx[i] - exp_v)) <= 1e-5
 
 
 def test_exact_classification_datasets(exact_shap_classification_dataset):
@@ -71,16 +73,18 @@ def test_exact_classification_datasets(exact_shap_classification_dataset):
             model=mod.predict_proba,
             background_dataset=X_train,
             explained_dataset=X_test,
-            explainer=PermutationExplainer
+            explainer=PermutationExplainer,
         )
 
-        fx = mod.predict_proba(X_test)[0]
+        fx = mod.predict_proba(X_test)
         exp_v = explainer.expected_value
 
-        assert (np.sum(cp.asnumpy(
-            shap_values[0])) - abs(fx[0] - exp_v[0])) <= 1e-5
-        assert (np.sum(cp.asnumpy(
-            shap_values[1])) - abs(fx[1] - exp_v[1])) <= 1e-5
+        for i in range(3):
+            print(i, fx[i][1], shap_values[1][i])
+            assert (np.sum(cp.asnumpy(
+                shap_values[0][i])) - abs(fx[i][0] - exp_v[0])) <= 1e-5
+            assert (np.sum(cp.asnumpy(
+                shap_values[1][i])) - abs(fx[i][1] - exp_v[1])) <= 1e-5
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.float64])
@@ -90,7 +94,7 @@ def test_exact_classification_datasets(exact_shap_classification_dataset):
                                    cuml.SVR])
 @pytest.mark.parametrize("npermutations", [5, 50])
 def test_different_parameters(dtype, n_features, n_background, model,
-                              npermutations, ):
+                              npermutations):
     cp.random.seed(42)
     X_train, X_test, y_train, y_test = create_synthetic_dataset(
         n_samples=n_background + 5,
@@ -135,19 +139,21 @@ def test_not_shuffled_explanation(exact_shap_regression_dataset):
         data=X_train)
 
     shap_values = explainer.shap_values(
-        X_test,
+        X_test[0],
         npermutations=1,
         testing=True
     )
 
+    print(shap_values)
+
     assert np.allclose(shap_values, not_shuffled_shap_values,
                        rtol=1e-04, atol=1e-04)
 
 
 # Test against exact shap values for linear regression
 # 1 permutation should give exact result
 def test_permutation(exact_shap_regression_dataset):
-    X_train, X_test, y_train, y_test = exact_shap_regression_dataset
+    X_train, _, y_train, _ = exact_shap_regression_dataset
     # Train arbitrary model to get some coefficients
     mod = cuml.LinearRegression().fit(X_train, y_train)
     # Single background and foreground instance
@@ -182,7 +188,7 @@ def test_permutation(exact_shap_regression_dataset):
 # of SHAP's permutationExplainer that did not shuffle the indexes for the
 # permutations, giving us a test of the calculations in our implementation
 not_shuffled_shap_values = [
-    -3.60017776e-01, -1.02140656e+02, 1.29915714e+00, -6.30791473e+01,
-    2.47955322e-04, -2.31356430e+00, -1.01764305e+02, 3.39929199e+00,
-    4.10347061e+01, 7.13340759e+01, -1.60478973e+00
+    -1.3628101e+00, -1.0234560e+02, 1.3428497e-01, -6.1764000e+01,
+    2.6702881e-04, -3.4455948e+00, -1.0159061e+02, 3.4058895e+00,
+    4.1598404e+01, 7.2152489e+01, -2.1964169e+00,
 ]