From 094bf251f61e3edb3667efcccdc56f1ab2d2bf7d Mon Sep 17 00:00:00 2001 From: Clive Cox Date: Tue, 1 Sep 2020 19:17:29 +0100 Subject: [PATCH] Add TreeShap explainer --- components/alibi-explain-server/Makefile | 11 + .../alibiexplainer/explainer.py | 6 +- .../alibiexplainer/parser.py | 45 +++ .../alibiexplainer/tree_shap.py | 28 ++ components/alibi-explain-server/setup.py | 4 +- .../tests/test_tree_shap.py | 25 ++ notebooks/explainer_examples.ipynb | 313 +++++++++++++++++- .../v1/seldondeployment_types.go | 1 + 8 files changed, 422 insertions(+), 11 deletions(-) create mode 100644 components/alibi-explain-server/alibiexplainer/tree_shap.py create mode 100644 components/alibi-explain-server/tests/test_tree_shap.py diff --git a/components/alibi-explain-server/Makefile b/components/alibi-explain-server/Makefile index f34d8fd6a9..fa7b81adbd 100644 --- a/components/alibi-explain-server/Makefile +++ b/components/alibi-explain-server/Makefile @@ -181,3 +181,14 @@ run_explainer_integratedgradients_docker: curl_explain_imdb: curl -d '{"data": {"ndarray":[[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 591, 202, 14, 31, 6, 717, 10, 10, 2, 2, 5, 4, 360, 7, 4, 177, 5760, 394, 354, 4, 123, 9, 1035, 1035, 1035, 10, 10, 13, 92, 124, 89, 488, 7944, 100, 28, 1668, 14, 31, 23, 27, 7479, 29, 220, 468, 8, 124, 14, 286, 170, 8, 157, 46, 5, 27, 239, 16, 179, 2, 38, 32, 25, 7944, 451, 202, 14, 6, 717]]}}' -X POST http://localhost:8080/api/v1.0/explain -H "Content-Type: application/json" + + +# +# Test Tree Shap +# + +run_explainer_treeshap: + python -m alibiexplainer --model_name adult --protocol seldon.http --storage_uri gs://seldon-models/xgboost/adult/tree_shap_py36_0.5.2 TreeShap + +run_explainer_treeshap_docker: + docker run --rm -d --name "explainer" --network=host -p 8080:8080 seldonio/${IMAGE}:${VERSION} --model_name adult --protocol seldon.http --storage_uri gs://seldon-models/xgboost/adult/tree_shap_py36_0.5.2 TreeShap diff --git a/components/alibi-explain-server/alibiexplainer/explainer.py b/components/alibi-explain-server/alibiexplainer/explainer.py index bba3196844..4038b435a6 100644 --- a/components/alibi-explain-server/alibiexplainer/explainer.py +++ b/components/alibi-explain-server/alibiexplainer/explainer.py @@ -26,6 +26,7 @@ from alibiexplainer.anchor_tabular import AnchorTabular from alibiexplainer.anchor_text import AnchorText from alibiexplainer.kernel_shap import KernelShap +from alibiexplainer.tree_shap import TreeShap from alibiexplainer.integrated_gradients import IntegratedGradients from alibiexplainer.explainer_wrapper import ExplainerWrapper from alibiexplainer.proto import prediction_pb2 @@ -59,6 +60,7 @@ class ExplainerMethod(Enum): anchor_text = "AnchorText" kernel_shap = "KernelShap" integrated_gradients = "IntegratedGradients" + tree_shap = "TreeShap" def __str__(self): return self.value @@ -93,6 +95,8 @@ def __init__(self, self.wrapper = KernelShap(self._predict_fn, explainer, **config) elif self.method is ExplainerMethod.integrated_gradients: self.wrapper = IntegratedGradients(keras_model, **config) + elif self.method is ExplainerMethod.tree_shap: + self.wrapper = TreeShap(explainer, **config) else: raise NotImplementedError @@ -135,7 +139,7 @@ def _predict_fn(self, arr: Union[np.ndarray, List]) -> np.ndarray: def explain(self, request: Dict) -> Any: if self.method is ExplainerMethod.anchor_tabular or self.method is ExplainerMethod.anchor_images or \ self.method is ExplainerMethod.anchor_text or self.method is ExplainerMethod.kernel_shap or \ - self.method is ExplainerMethod.integrated_gradients: + self.method is ExplainerMethod.integrated_gradients or self.method is ExplainerMethod.tree_shap: if self.protocol == Protocol.tensorflow_http: explanation: Explanation = self.wrapper.explain(request["instances"]) else: diff --git a/components/alibi-explain-server/alibiexplainer/parser.py b/components/alibi-explain-server/alibiexplainer/parser.py index 5a62bd458d..650c14ea04 100644 --- a/components/alibi-explain-server/alibiexplainer/parser.py +++ b/components/alibi-explain-server/alibiexplainer/parser.py @@ -268,6 +268,51 @@ def parse_args(sys_args): dest="explainer.internal_batch_size", default=argparse.SUPPRESS, ) + + # TreeShap Arguments + parser_tree_shap = subparsers.add_parser(str(ExplainerMethod.tree_shap)) + addCommonParserArgs(parser_tree_shap) + + parser_tree_shap.add_argument( + "--interactions", + type=str2bool, + action=GroupedAction, + dest="explainer.interactions", + default=argparse.SUPPRESS, + ) + + parser_tree_shap.add_argument( + "--approximate", + type=str2bool, + action=GroupedAction, + dest="explainer.approximate", + default=argparse.SUPPRESS, + ) + + parser_tree_shap.add_argument( + "--check_additivity", + type=str2bool, + action=GroupedAction, + dest="explainer.check_additivity", + default=argparse.SUPPRESS, + ) + + parser_tree_shap.add_argument( + "--tree_limit", + type=int, + action=GroupedAction, + dest="explainer.tree_limit", + default=argparse.SUPPRESS, + ) + + parser_tree_shap.add_argument( + "--summarise_result", + type=str2bool, + action=GroupedAction, + dest="explainer.summarise_result", + default=argparse.SUPPRESS, + ) + args, _ = parser.parse_known_args(sys_args) argdDict = vars(args).copy() diff --git a/components/alibi-explain-server/alibiexplainer/tree_shap.py b/components/alibi-explain-server/alibiexplainer/tree_shap.py new file mode 100644 index 0000000000..0783cd49dd --- /dev/null +++ b/components/alibi-explain-server/alibiexplainer/tree_shap.py @@ -0,0 +1,28 @@ +import logging +import numpy as np +import alibi +from alibi.api.interfaces import Explanation +from alibiexplainer.explainer_wrapper import ExplainerWrapper +from alibiexplainer.constants import SELDON_LOGLEVEL +from typing import List, Optional + +logging.basicConfig(level=SELDON_LOGLEVEL) + + +class TreeShap(ExplainerWrapper): + def __init__( + self, + explainer: Optional[alibi.explainers.TreeShap], + **kwargs + ): + if explainer is None: + raise Exception("Tree Shap requires a built explainer") + self.tree_shap = explainer + self.kwargs = kwargs + + def explain(self, inputs: List) -> Explanation: + arr = np.array(inputs) + logging.info("Tree Shap call with %s", self.kwargs) + logging.info("kernel shap data shape %s",arr.shape) + shap_exp = self.tree_shap.explain(arr, **self.kwargs) + return shap_exp \ No newline at end of file diff --git a/components/alibi-explain-server/setup.py b/components/alibi-explain-server/setup.py index 28d365565d..47d88aba25 100644 --- a/components/alibi-explain-server/setup.py +++ b/components/alibi-explain-server/setup.py @@ -38,7 +38,9 @@ "requests>=2.22.0", "joblib>=0.13.2", "dill>=0.3.0", - "grpcio>=1.22.0" + "grpcio>=1.22.0", + "xgboost==1.0.2", + "shap==0.35.0" ], tests_require=tests_require, extras_require={'test': tests_require} diff --git a/components/alibi-explain-server/tests/test_tree_shap.py b/components/alibi-explain-server/tests/test_tree_shap.py new file mode 100644 index 0000000000..ea43645c83 --- /dev/null +++ b/components/alibi-explain-server/tests/test_tree_shap.py @@ -0,0 +1,25 @@ +from alibiexplainer.tree_shap import TreeShap +import kfserving +import os +import dill +from alibi.datasets import fetch_adult +import numpy as np +import json +ADULT_EXPLAINER_URI = "gs://seldon-models/xgboost/adult/tree_shap_py36_0.5.2" +EXPLAINER_FILENAME = "explainer.dill" + + +def test_kernel_shap(): + os.environ.clear() + alibi_model = os.path.join( + kfserving.Storage.download(ADULT_EXPLAINER_URI), EXPLAINER_FILENAME + ) + with open(alibi_model, "rb") as f: + alibi_model = dill.load(f) + tree_shap = TreeShap(alibi_model) + adult = fetch_adult() + X_test = adult.data[30001:, :] + np.random.seed(0) + explanation = tree_shap.explain(X_test[0:1].tolist()) + exp_json = json.loads(explanation.to_json()) + print(exp_json) \ No newline at end of file diff --git a/notebooks/explainer_examples.ipynb b/notebooks/explainer_examples.ipynb index 26fa3bec2c..1e96672d0b 100644 --- a/notebooks/explainer_examples.ipynb +++ b/notebooks/explainer_examples.ipynb @@ -11,20 +11,23 @@ "\n", "Seldon provides the following out-of-the-box pre-packaged explainers:\n", "* Anchor Tabular Explainer \n", - " * AI Explainer that uses the [anchor technique](https://docs.seldon.io/projects/alibi/en/latest/methods/Anchors.html) for tabular data\n", + " * A black box Explainer that uses the [anchor technique](https://docs.seldon.io/projects/alibi/en/latest/methods/Anchors.html) for tabular data\n", " * It basically answers the question of what are the most \"powerul\" or \"important\" features in a tabular prediction\n", "* Anchor Image Explainer\n", - " * AI Explainer that uses the [anchor technique](https://docs.seldon.io/projects/alibi/en/latest/methods/Anchors.html) for image data\n", + " * A black box Explainer that uses the [anchor technique](https://docs.seldon.io/projects/alibi/en/latest/methods/Anchors.html) for image data\n", " * It basically answers the question of what are the most \"powerul\" or \"important\" pixels in an image prediction\n", "* Anchor Text Explainer\n", - " * AI Explainer that uses the [anchor technique](https://docs.seldon.io/projects/alibi/en/latest/methods/Anchors.html) for text data\n", + " * A black box Explainer that uses the [anchor technique](https://docs.seldon.io/projects/alibi/en/latest/methods/Anchors.html) for text data\n", " * It basically answers the question of what are the most \"powerul\" or \"important\" tokens in a text prediction\n", - "* Counterfactual Explainer\n", - " * AI Explainer that uses the [counterfactual technique](https://docs.seldon.io/projects/alibi/en/latest/methods/CF.html) for any type of data\n", - " * It basically provides insight of what are the minimum changes you can do to an input to change the prediction to a different class\n", - "* Contrastive Explainer\n", - " * AI explainer that uses the [Contrastive Explanations](https://docs.seldon.io/projects/alibi/en/latest/methods/CEM.html) technique for any type of data\n", - " * It basically provides insights of what are the minimum changes you can do to an input to change the prediction to change the prediction or the minimum components of the input to make it the same prediction" + "* Kernel Shap Explainer\n", + " * A black box Explainer that uses the [kernel shap technique](https://docs.seldon.io/projects/alibi/en/latest/methods/Anchors.html) for tabular data\n", + " * It provides postive and negative feature attributions that contributed to the predictions\n", + "* Integrated Gradient Explainer\n", + " * A white box explainer that uses the [Integrated Gradients technique](https://docs.seldon.io/projects/alibi/en/latest/methods/Anchors.html) for Keras models\n", + " * It provides importance values for each feature\n", + "* Tree Shap Explainer\n", + " * A white box explainer that uses the [TreeShap technqiue](https://docs.seldon.io/projects/alibi/en/latest/methods/Anchors.html) for tree based models\n", + " * It provides positive and negative feature attributions that contributed to the predictions" ] }, { @@ -2058,6 +2061,298 @@ "!kubectl delete -f resources/mnist_rest_explainer.yaml" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## XGBoost Model with TreeShap Explainer" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting resources/income_explainer.yaml\n" + ] + } + ], + "source": [ + "%%writefile resources/income_explainer.yaml\n", + "apiVersion: machinelearning.seldon.io/v1alpha2\n", + "kind: SeldonDeployment\n", + "metadata:\n", + " name: income\n", + "spec:\n", + " predictors:\n", + " - graph:\n", + " children: []\n", + " implementation: XGBOOST_SERVER\n", + " modelUri: gs://seldon-models/xgboost/adult/model_1.0.2\n", + " name: income-model\n", + " explainer:\n", + " type: TreeShap\n", + " modelUri: gs://seldon-models/xgboost/adult/tree_shap_py36_0.5.2\n", + " name: default\n", + " replicas: 1" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "seldondeployment.machinelearning.seldon.io/income created\r\n" + ] + } + ], + "source": [ + "!kubectl apply -f resources/income_explainer.yaml" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Waiting for deployment \"income-default-0-income-model\" rollout to finish: 0 of 1 updated replicas are available...\n", + "deployment \"income-default-0-income-model\" successfully rolled out\n" + ] + } + ], + "source": [ + "!kubectl rollout status deploy/$(kubectl get deploy -l seldon-deployment-id=income -o jsonpath='{.items[0].metadata.name}')" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "from seldon_core.seldon_client import SeldonClient\n", + "import numpy as np\n", + "sc = SeldonClient(deployment_name=\"income\",namespace=\"seldon\", gateway=\"ambassador\", gateway_endpoint=\"localhost:8003\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use python client library to get a prediction." + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'data': {'names': [], 'tensor': {'shape': [1], 'values': [-1.2381880283355713]}}, 'meta': {}}\n" + ] + } + ], + "source": [ + "data = np.array([[52, 4, 0, 2, 8, 4, 2, 0, 0, 0, 60, 9]])\n", + "r = sc.predict(data=data)\n", + "print(r.response)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use python client library to get an explanation." + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "import json\n", + "data = np.array([[52, 4, 0, 2, 8, 4, 2, 0, 0, 0, 60, 9]])\n", + "res = sc.explain(deployment_name=\"income\", predictor=\"default\", data=data)\n", + "explanation = res.response\n", + "explanationStr = json.dumps(explanation)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "from alibi.api.interfaces import Explanation\n", + "explanation = Explanation.from_json(explanationStr)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "explanation.shap_values = np.array(explanation.shap_values)\n", + "explanation.raw[\"instances\"] = np.array(explanation.raw[\"instances\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "def decode_data(X, feature_names, category_map):\n", + " \"\"\"\n", + " Given an encoded data matrix `X` returns a matrix where the \n", + " categorical levels have been replaced by human readable categories.\n", + " \"\"\"\n", + " \n", + " # expect 2D array\n", + " if len(X.shape) == 1:\n", + " X = X.reshape(1, -1)\n", + " \n", + " X_new = np.zeros(X.shape, dtype=object)\n", + " # Check if a column is categorical and replace it with values from category map\n", + " for idx, name in enumerate(feature_names):\n", + " categories = category_map.get(str(idx), None)\n", + " if categories:\n", + " for j, category in enumerate(categories):\n", + " encoded_vals = X[:, idx] == j\n", + " X_new[encoded_vals, idx] = category\n", + " else:\n", + " X_new[:, idx] = X[:, idx]\n", + " \n", + " return X_new" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "decoded_features = decode_data(data,explanation.feature_names,explanation.categorical_names)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import shap\n", + "shap.initjs()" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + " Visualization omitted, Javascript library not loaded!
\n", + " Have you run `initjs()` in this notebook? If this notebook was from another\n", + " user you must also trust this notebook (File -> Trust notebook). If you are viewing\n", + " this notebook on github the Javascript has been stripped for security. If you are using\n", + " JupyterLab this error is because a JupyterLab extension has not yet been written.\n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "shap.force_plot(\n", + " explanation.expected_value[0], # 0 is a class index but we have single-output model\n", + " explanation.shap_values[0][0, :] , \n", + " decoded_features, \n", + " explanation.feature_names,\n", + ")" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/operator/apis/machinelearning.seldon.io/v1/seldondeployment_types.go b/operator/apis/machinelearning.seldon.io/v1/seldondeployment_types.go index c75940820a..96d7a88ab4 100644 --- a/operator/apis/machinelearning.seldon.io/v1/seldondeployment_types.go +++ b/operator/apis/machinelearning.seldon.io/v1/seldondeployment_types.go @@ -265,6 +265,7 @@ const ( AlibiKernelShapExplainer AlibiExplainerType = "KernelShap" AlibiIntegratedGradientsExplainer AlibiExplainerType = "IntegratedGradients" AlibiALEExplainer AlibiExplainerType = "ALE" + AlibiTreeShap AlibiExplainerType = "TreeShap" ) type Explainer struct {