From 1d0a58b911ecaa6ad12bccd3243cae8f8a6afbed Mon Sep 17 00:00:00 2001
From: Maggie Hei <mehei@microsoft.com>
Date: Thu, 14 Jan 2021 01:03:16 -0500
Subject: [PATCH 1/8] add doc for interpretability

---
 doc/reference.rst             |  1 +
 doc/spec/interpretability.rst | 95 +++++++++++++++++++++++++++++++++++
 doc/spec/references.rst       |  7 ++-
 doc/spec/spec.rst             |  1 +
 econml/_shap.py               | 11 ++++
 5 files changed, 114 insertions(+), 1 deletion(-)
 create mode 100644 doc/spec/interpretability.rst

diff --git a/doc/reference.rst b/doc/reference.rst
index 7913d741e..d77de3d17 100644
--- a/doc/reference.rst
+++ b/doc/reference.rst
@@ -27,6 +27,7 @@ Private Module Reference
     econml._ortho_learner
     econml._cate_estimator
     econml._causal_tree
+    econml._shap
     econml.dml._rlearner
     econml.grf._base_grf
     econml.grf._base_grftree
diff --git a/doc/spec/interpretability.rst b/doc/spec/interpretability.rst
new file mode 100644
index 000000000..0601d0bd4
--- /dev/null
+++ b/doc/spec/interpretability.rst
@@ -0,0 +1,95 @@
+Interpretability
+================
+
+Our package offers multiple interpretability tools to better understand the final model CATE.
+
+
+Tree Interpreter
+----------------
+
+Tree Interpreter provides a presentation-ready summary of the key features that explain the biggest differences in responsiveness to an intervention.
+
+:class:`.SingleTreeCateInterpreter` trains a single shallow decision tree for the treatment effect :math:`\theta(X)` you learnt from any of
+our available CATE estimators on a small set of feature :math:`X` that you are interested to learn heterogeneity from. The model will split on the cutoff
+points that maximize the treatment effect difference in each leaf. Finally each leaf will be a subgroup of customers respond to a treatment differently
+with other leaves. 
+
+For instance: 
+
+.. testsetup::
+
+    import numpy as np
+    X = np.random.choice(np.arange(5), size=(100,3))
+    Y = np.random.normal(size=(100,2))
+    y = np.random.normal(size=(100,))
+    T = np.random.choice(np.arange(3), size=(100,2))
+    t = T[:,0]
+    W = np.random.normal(size=(100,2))
+    
+
+.. testcode::
+
+    from econml.cate_interpreter import SingleTreeCateInterpreter
+    from econml.dml import LinearDML
+    est = LinearDML()
+    est.fit(y, t, X=X, W=W)
+    intrp = SingleTreeCateInterpreter(include_model_uncertainty=True, max_depth=2, min_samples_leaf=10)
+    # We interpret the CATE model's behavior based on the features used for heterogeneity
+    intrp.interpret(est, X)
+    # Plot the tree
+    intrp.plot(feature_names=['A', 'B', 'C'], fontsize=12)
+
+Policy Interpreter
+------------------
+Policy Interpreter offers similar functionality but taking cost into consideration. 
+
+Instead of training a tree model regressor on :math:`\theta(X)`, :class:`.SingleTreePolicyInterpreter` trains a tree model classifier by using whether the effect is above the cost as
+label. Finally we could make simple rules about what kind of customers we should target on in order to maximum the outcome of interest.
+
+
+For instance: 
+
+.. testcode::
+
+    from econml.cate_interpreter import SingleTreePolicyInterpreter
+    # We find a tree-based treatment policy based on the CATE model
+    # sample_treatment_costs is the cost of treatment. Policy will treat if effect is above this cost.
+    intrp = SingleTreePolicyInterpreter(risk_level=0.05, max_depth=2, min_samples_leaf=1,min_impurity_decrease=.001)
+    intrp.interpret(est, X, sample_treatment_costs=0.2)
+    # Plot the tree
+    intrp.plot(feature_names=['A', 'B', 'C'], fontsize=12)
+
+
+SHAP
+----
+
+`SHAP <https://shap.readthedocs.io/en/latest/>`_ is a popular open source library for interpreting black-box machine learning
+models using the Shapley values methodology (see e.g. [Lundberg2017]_).
+
+Similar to how black-box predictive machine learning models can be explained with SHAP, we can also explain black-box effect
+heterogeneity models. This approach provides an explanation as to why a heterogeneous causal effect model produced larger or
+smaller effect values for particular segments of the population. Which were the features that lead to such differentiation?
+This question is easy to address when the model is succinctly described, such as the case of linear heterogneity models, 
+where one can simply investigate the coefficients of the model. However, it becomes hard when one starts using more expressive
+models, such as Random Forests and Causal Forests to model effect hetergoeneity. SHAP values can be of immense help to
+understand the leading factors of effect hetergoeneity that the model picked up from the training data.
+
+Our package offers seamless integration with the SHAP library. Every CATE estimator has a method `shap_values`, which returns the
+SHAP value explanation of the estimators output for every treatment and outcome pair. These values can then be visualized with
+the plethora of visualizations that the SHAP library offers. Moreover, whenever possible our library invokes fast specialized
+algorithms from the SHAP library, for each type of final model, which can greatly reduce computation times.
+
+For instance:
+
+.. testcode::
+
+    import shap
+    from econml.dml import CausalForestDML
+    est = CausalForestDML()
+    est.fit(Y, T, X=X, W=W)
+    shap_values = est.shap_values(X)
+    # local view: explain hetergoeneity for a given observation
+    ind=0
+    shap.plots.force(shap_values["Y0"]["T0"][ind]
+    # global view: explain hetergoeneity for a sample of dataset
+    shap.summary_plot(shap_values['Y0']['T0'])
diff --git a/doc/spec/references.rst b/doc/spec/references.rst
index 3ba507a85..0692af351 100644
--- a/doc/spec/references.rst
+++ b/doc/spec/references.rst
@@ -108,4 +108,9 @@ References
 .. [Friedberg2018]
     Friedberg, R., Tibshirani, J., Athey, S., & Wager, S. (2018).
     Local linear forests.
-    arXiv preprint arXiv:1807.11408.
\ No newline at end of file
+    arXiv preprint arXiv:1807.11408.
+
+.. [Lundberg2017]
+    Lundberg, S.,  Lee, S. (2017).
+    A Unified Approach to Interpreting Model Predictions.
+    URL https://arxiv.org/abs/1705.07874
\ No newline at end of file
diff --git a/doc/spec/spec.rst b/doc/spec/spec.rst
index 33d689358..693854193 100644
--- a/doc/spec/spec.rst
+++ b/doc/spec/spec.rst
@@ -20,6 +20,7 @@ The EconML Python SDK, developed by the ALICE team at MSR New England, incorpora
     estimation
     estimation_iv
     inference
+    interpretability
     references
 
 .. todo::
diff --git a/econml/_shap.py b/econml/_shap.py
index 594e31213..b387d2d76 100644
--- a/econml/_shap.py
+++ b/econml/_shap.py
@@ -1,6 +1,17 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
+"""Helper functions to get shap values for different cate estimators.
+
+References
+----------
+Scott Lundberg, Su-In Lee (2017)
+    A Unified Approach to Interpreting Model Predictions.
+    NeurIPS, https://arxiv.org/abs/1705.07874
+
+
+"""
+
 import shap
 from collections import defaultdict
 import numpy as np

From 589cae923408735246243ca486a8a11d94428f17 Mon Sep 17 00:00:00 2001
From: Maggie Hei <mehei@microsoft.com>
Date: Thu, 14 Jan 2021 01:28:23 -0500
Subject: [PATCH 2/8] fix doc test failure

---
 doc/spec/interpretability.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/spec/interpretability.rst b/doc/spec/interpretability.rst
index 0601d0bd4..d5ccadbdc 100644
--- a/doc/spec/interpretability.rst
+++ b/doc/spec/interpretability.rst
@@ -55,7 +55,7 @@ For instance:
     # We find a tree-based treatment policy based on the CATE model
     # sample_treatment_costs is the cost of treatment. Policy will treat if effect is above this cost.
     intrp = SingleTreePolicyInterpreter(risk_level=0.05, max_depth=2, min_samples_leaf=1,min_impurity_decrease=.001)
-    intrp.interpret(est, X, sample_treatment_costs=0.2)
+    intrp.interpret(est, X, sample_treatment_costs=0.05)
     # Plot the tree
     intrp.plot(feature_names=['A', 'B', 'C'], fontsize=12)
 
@@ -90,6 +90,6 @@ For instance:
     shap_values = est.shap_values(X)
     # local view: explain hetergoeneity for a given observation
     ind=0
-    shap.plots.force(shap_values["Y0"]["T0"][ind]
+    shap.plots.force(shap_values["Y0"]["T0"][ind], matplotlib=True)
     # global view: explain hetergoeneity for a sample of dataset
     shap.summary_plot(shap_values['Y0']['T0'])

From 8e927eb77b8a00edfd57bb151c1dce5df84246de Mon Sep 17 00:00:00 2001
From: Maggie Hei <mehei@microsoft.com>
Date: Thu, 14 Jan 2021 13:52:20 -0500
Subject: [PATCH 3/8] address pr comments

---
 doc/spec/interpretability.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/spec/interpretability.rst b/doc/spec/interpretability.rst
index d5ccadbdc..40a366e9a 100644
--- a/doc/spec/interpretability.rst
+++ b/doc/spec/interpretability.rst
@@ -9,10 +9,10 @@ Tree Interpreter
 
 Tree Interpreter provides a presentation-ready summary of the key features that explain the biggest differences in responsiveness to an intervention.
 
-:class:`.SingleTreeCateInterpreter` trains a single shallow decision tree for the treatment effect :math:`\theta(X)` you learnt from any of
+:class:`.SingleTreeCateInterpreter` trains a single shallow decision tree for the treatment effect :math:`\theta(X)` you learned from any of
 our available CATE estimators on a small set of feature :math:`X` that you are interested to learn heterogeneity from. The model will split on the cutoff
-points that maximize the treatment effect difference in each leaf. Finally each leaf will be a subgroup of customers respond to a treatment differently
-with other leaves. 
+points that maximize the treatment effect difference in each leaf. Finally each leaf will be a subgroup of samples that respond to a treatment differently
+from other leaves. 
 
 For instance: 
 
@@ -43,8 +43,8 @@ Policy Interpreter
 ------------------
 Policy Interpreter offers similar functionality but taking cost into consideration. 
 
-Instead of training a tree model regressor on :math:`\theta(X)`, :class:`.SingleTreePolicyInterpreter` trains a tree model classifier by using whether the effect is above the cost as
-label. Finally we could make simple rules about what kind of customers we should target on in order to maximum the outcome of interest.
+Instead of training a tree model regressor on :math:`\theta(X)`, :class:`.SingleTreePolicyInterpreter` trains a tree model classifier by using whether
+the effect is above the cost as label. This results in simple rules to segment the samples in order to maximize the outcome of interest.
 
 
 For instance: 

From 4786ea49574d06a55f91097ca18d098beac6dc45 Mon Sep 17 00:00:00 2001
From: Maggie Hei <mehei@microsoft.com>
Date: Fri, 15 Jan 2021 12:12:02 -0500
Subject: [PATCH 4/8] address pr comment

---
 azure-pipelines.yml           | 3 ---
 doc/spec/interpretability.rst | 8 ++++++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 21d6b4bc0..d17a69d1e 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -66,9 +66,6 @@ jobs:
         - script: 'pip install sklearn-contrib-lightning'
           displayName: 'Install lightning'
           
-        - script: 'pip install --force-reinstall --no-cache-dir shap'
-          displayName: 'Install public shap'
-
         - script: 'python setup.py build_sphinx -W'
           displayName: 'Build documentation'
 
diff --git a/doc/spec/interpretability.rst b/doc/spec/interpretability.rst
index 40a366e9a..6350a6afe 100644
--- a/doc/spec/interpretability.rst
+++ b/doc/spec/interpretability.rst
@@ -43,8 +43,12 @@ Policy Interpreter
 ------------------
 Policy Interpreter offers similar functionality but taking cost into consideration. 
 
-Instead of training a tree model regressor on :math:`\theta(X)`, :class:`.SingleTreePolicyInterpreter` trains a tree model classifier by using whether
-the effect is above the cost as label. This results in simple rules to segment the samples in order to maximize the outcome of interest.
+Instead of fitting a tree to learn groups that have a different treatment effect, :class:`.SingleTreePolicyInterpreter` tries to split the samples into different treatment groups.
+So in the case of binary treatments it tries to create sub-groups such that all samples within the group have either all positive effect or all negative effect. Thus it tries to
+separate responders from non-responders, as opposed to trying to find groups that have different levels of response.
+
+This way you can construct an interpretable personalized policy where you treat the groups with a postive effect and don't treat the group with a negative effect.
+Our policy tree provides the recommended treatment at each leaf node.
 
 
 For instance: 

From 2215619bcf0b80bc1114ea45d38f3e1b1fd50f14 Mon Sep 17 00:00:00 2001
From: Maggie Hei <mehei@microsoft.com>
Date: Fri, 15 Jan 2021 12:29:54 -0500
Subject: [PATCH 5/8] add shap dependency back and change doc test code to work
 with this version of shap

---
 azure-pipelines.yml           | 3 +++
 doc/spec/interpretability.rst | 4 ++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index d17a69d1e..21d6b4bc0 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -66,6 +66,9 @@ jobs:
         - script: 'pip install sklearn-contrib-lightning'
           displayName: 'Install lightning'
           
+        - script: 'pip install --force-reinstall --no-cache-dir shap'
+          displayName: 'Install public shap'
+
         - script: 'python setup.py build_sphinx -W'
           displayName: 'Build documentation'
 
diff --git a/doc/spec/interpretability.rst b/doc/spec/interpretability.rst
index 6350a6afe..78ecddeaf 100644
--- a/doc/spec/interpretability.rst
+++ b/doc/spec/interpretability.rst
@@ -88,8 +88,8 @@ For instance:
 .. testcode::
 
     import shap
-    from econml.dml import CausalForestDML
-    est = CausalForestDML()
+    from econml.dml import LinearDML
+    est = LinearDML()
     est.fit(Y, T, X=X, W=W)
     shap_values = est.shap_values(X)
     # local view: explain hetergoeneity for a given observation

From 5539cc2576aeda6f3dd43c6db12c47631a8623cf Mon Sep 17 00:00:00 2001
From: Maggie Hei <mehei@microsoft.com>
Date: Fri, 15 Jan 2021 14:03:51 -0500
Subject: [PATCH 6/8] change dependency of shap on doc test

---
 azure-pipelines.yml           | 4 ++--
 doc/spec/interpretability.rst | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 21d6b4bc0..aae60beec 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -66,8 +66,8 @@ jobs:
         - script: 'pip install sklearn-contrib-lightning'
           displayName: 'Install lightning'
           
-        - script: 'pip install --force-reinstall --no-cache-dir shap'
-          displayName: 'Install public shap'
+        - script: 'pip install git+https://github.com/slundberg/shap.git@d1d2700acc0259f211934373826d5ff71ad514de'
+          displayName: 'Install specific version of shap'
 
         - script: 'python setup.py build_sphinx -W'
           displayName: 'Build documentation'
diff --git a/doc/spec/interpretability.rst b/doc/spec/interpretability.rst
index 78ecddeaf..63f3cf790 100644
--- a/doc/spec/interpretability.rst
+++ b/doc/spec/interpretability.rst
@@ -58,8 +58,8 @@ For instance:
     from econml.cate_interpreter import SingleTreePolicyInterpreter
     # We find a tree-based treatment policy based on the CATE model
     # sample_treatment_costs is the cost of treatment. Policy will treat if effect is above this cost.
-    intrp = SingleTreePolicyInterpreter(risk_level=0.05, max_depth=2, min_samples_leaf=1,min_impurity_decrease=.001)
-    intrp.interpret(est, X, sample_treatment_costs=0.05)
+    intrp = SingleTreePolicyInterpreter(risk_level=None, max_depth=2, min_samples_leaf=1,min_impurity_decrease=.001)
+    intrp.interpret(est, X, sample_treatment_costs=0.02)
     # Plot the tree
     intrp.plot(feature_names=['A', 'B', 'C'], fontsize=12)
 
@@ -90,7 +90,7 @@ For instance:
     import shap
     from econml.dml import LinearDML
     est = LinearDML()
-    est.fit(Y, T, X=X, W=W)
+    est.fit(y, t, X=X, W=W)
     shap_values = est.shap_values(X)
     # local view: explain hetergoeneity for a given observation
     ind=0

From 6d8cd3e9fdb32a2b5ff566b6542a5e39841dc62a Mon Sep 17 00:00:00 2001
From: Maggie Hei <mehei@microsoft.com>
Date: Fri, 15 Jan 2021 16:24:54 -0500
Subject: [PATCH 7/8] change dependency to the new release of shap

---
 azure-pipelines.yml | 5 +----
 setup.cfg           | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index aae60beec..5972cca25 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -64,10 +64,7 @@ jobs:
           displayName: 'Install graphviz'
 
         - script: 'pip install sklearn-contrib-lightning'
-          displayName: 'Install lightning'
-          
-        - script: 'pip install git+https://github.com/slundberg/shap.git@d1d2700acc0259f211934373826d5ff71ad514de'
-          displayName: 'Install specific version of shap'
+          displayName: 'Install lightning'          
 
         - script: 'python setup.py build_sphinx -W'
           displayName: 'Build documentation'
diff --git a/setup.cfg b/setup.cfg
index 7ddd51631..6dc1fe85f 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -50,7 +50,7 @@ install_requires =
     graphviz
     matplotlib
     pandas < 1.1
-    shap @ git+https://github.com/slundberg/shap.git
+    shap ~= 0.38.1
 test_suite = econml.tests
 tests_require =
     pytest

From d8fc4d705236d4d00cd304a5331abf4d7c58551e Mon Sep 17 00:00:00 2001
From: Maggie Hei <mehei@microsoft.com>
Date: Fri, 15 Jan 2021 16:48:24 -0500
Subject: [PATCH 8/8] add shap dependency back on doc test

---
 azure-pipelines.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 5972cca25..0e2163be5 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -64,7 +64,10 @@ jobs:
           displayName: 'Install graphviz'
 
         - script: 'pip install sklearn-contrib-lightning'
-          displayName: 'Install lightning'          
+          displayName: 'Install lightning'        
+
+        - script: 'pip install git+https://github.com/slundberg/shap.git@d1d2700acc0259f211934373826d5ff71ad514de'
+          displayName: 'Install specific version of shap'  
 
         - script: 'python setup.py build_sphinx -W'
           displayName: 'Build documentation'