py-why · kbattocchi · May 19, 2021 · May 13, 2021 · May 14, 2021 · May 14, 2021
diff --git a/README.md b/README.md
@@ -45,10 +45,12 @@ For information on use cases and background material on causal inference and het
 
 # News
 
-**May 8, 2021:** Release v0.11.0, see release notes [here](https://github.com/Microsoft/EconML/releases/tag/v0.11.0)
+**May 18, 2021:** Release v0.11.1, see release notes [here](https://github.com/Microsoft/EconML/releases/tag/v0.11.1)
 
 <details><summary>Previous releases</summary>
 
+**May 8, 2021:** Release v0.11.0, see release notes [here](https://github.com/Microsoft/EconML/releases/tag/v0.11.0)
+
 **March 22, 2021:** Release v0.10.0, see release notes [here](https://github.com/Microsoft/EconML/releases/tag/v0.10.0)
 
 **March 11, 2021:** Release v0.9.2, see release notes [here](https://github.com/Microsoft/EconML/releases/tag/v0.9.2)
@@ -552,12 +554,13 @@ To see more complex examples, go to the [notebooks](https://github.com/Microsoft
 You can get started by cloning this repository. We use 
 [setuptools](https://setuptools.readthedocs.io/en/latest/index.html) for building and distributing our package.
 We rely on some recent features of setuptools, so make sure to upgrade to a recent version with
-`pip install setuptools --upgrade`.  Then from your local copy of the repository you can run `python setup.py develop` to get started.
+`pip install setuptools --upgrade`.  Then from your local copy of the repository you can run `pip install -e .` to get started (but depending on what you're doing you might want to install with extras instead, like `pip install -e .[plt]` if you want to use matplotlib integration, or you can use  `pip install -e .[all]` to include all extras).
 
 ## Running the tests
 
-This project uses [pytest](https://docs.pytest.org/) for testing.  To run tests locally after installing the package, 
-you can use `python setup.py pytest`.
+This project uses [pytest](https://docs.pytest.org/) for testing.  To run tests locally after installing the package, you can use `pip install pytest-runner` followed by `python setup.py pytest`.
+
+We have added pytest marks to some tests to make it easier to run a subset, and you can set the PYTEST_ADDOPTS environment variable to take advantage of this.  For instance, you can set it to `-m "not (notebook or automl)"` to skip notebook and automl tests that have some additional dependencies. 
 
 ## Generating the documentation
 

diff --git a/econml/__init__.py b/econml/__init__.py
@@ -26,4 +26,4 @@
            'dowhy',
            '__version__']
 
-__version__ = '0.11.0'
+__version__ = '0.11.1'
diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py
@@ -193,9 +193,9 @@ def predict(self, X, y, W=None):
     return nuisances, model_list, np.sort(fitted_inds.astype(int)), (scores if calculate_scores else None)
 
 
-CachedValues = namedtuple('_CachedValues', ['nuisances',
-                                            'Y', 'T', 'X', 'W', 'Z', 'sample_weight', 'freq_weight',
-                                            'sample_var', 'groups'])
+CachedValues = namedtuple('CachedValues', ['nuisances',
+                                           'Y', 'T', 'X', 'W', 'Z', 'sample_weight', 'freq_weight',
+                                           'sample_var', 'groups'])
 
 
 class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator):
@@ -236,7 +236,7 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator):
 
         .. math ::
             \\mathbb{E}_n[\\ell(V; \\theta(X), \\hat{h}(V))]\
-            = \\frac{1}{n} \\sum_{i=1}^n \\sum_i \\ell(V_i; \\theta(X_i), \\hat{U}_i)
+            = \\frac{1}{n} \\sum_{i=1}^n \\ell(V_i; \\theta(X_i), \\hat{U}_i)
 
         The method is a bit more general in that the final step does not need to be a loss minimization step.
         The class takes as input a model for fitting an estimate of the nuisance h given a set of samples

diff --git a/econml/solutions/causal_analysis/_causal_analysis.py b/econml/solutions/causal_analysis/_causal_analysis.py
@@ -45,6 +45,7 @@ class _CausalInsightsConstants:
     CausalComputationTypeKey = 'causal_computation_type'
     ConfoundingIntervalKey = 'confounding_interval'
     ViewKey = 'view'
+    InitArgsKey = 'init_args'
 
     ALL = [RawFeatureNameKey,
            EngineeredNameKey,
@@ -59,7 +60,8 @@ class _CausalInsightsConstants:
            Version,
            CausalComputationTypeKey,
            ConfoundingIntervalKey,
-           ViewKey]
+           ViewKey,
+           InitArgsKey]
 
 
 def _get_default_shared_insights_output():
@@ -78,6 +80,7 @@ def _get_default_shared_insights_output():
         _CausalInsightsConstants.Version: '1.0',
         _CausalInsightsConstants.CausalComputationTypeKey: "simple",
         _CausalInsightsConstants.ConfoundingIntervalKey: None,
+        _CausalInsightsConstants.InitArgsKey: {}
     }
 
 
@@ -198,6 +201,26 @@ def get_feature_names(self, names=None):
             return rest
 
 
+# Convert python objects to (possibly nested) types that can easily be represented as literals
+def _sanitize(obj):
+    if obj is None or isinstance(obj, (bool, int, str, float)):
+        return obj
+    elif isinstance(obj, dict):
+        return {_sanitize(key): _sanitize(obj[key]) for key in obj}
+    else:
+        try:
+            return [_sanitize(item) for item in obj]
+        except e:
+            raise ValueError(f"Could not sanitize input {obj}")
+
+
+# named tuple type for storing results inside CausalAnalysis class;
+# must be lifted to module level to enable pickling
+_result = namedtuple("_result", field_names=[
+    "feature_index", "feature_name", "feature_baseline", "feature_levels", "hinds",
+    "X_transformer", "W_transformer", "estimator", "global_inference"])
+
+
 class CausalAnalysis:
     """
     Note: this class is experimental and the API may evolve over our next few releases.
@@ -248,10 +271,6 @@ class CausalAnalysis:
         Degree of parallelism to use when training models via joblib.Parallel
     """
 
-    _result_data = namedtuple("_result", field_names=[
-        "feature_index", "feature_name", "feature_baseline", "feature_levels", "hinds",
-        "X_transformer", "W_transformer", "estimator", "global_inference"])
-
     def __init__(self, feature_inds, categorical, heterogeneity_inds=None, feature_names=None, classification=False,
                  upper_bound_on_cat_expansion=5, nuisance_models='linear', heterogeneity_model='linear', n_jobs=-1):
         self.feature_inds = feature_inds
@@ -306,11 +325,13 @@ def fit(self, X, y, warm_start=False):
                 raise ValueError(
                     f"Can't warm start: previous X had {self._d_x} columns, new X has {X.shape[1]} columns")
 
-        # TODO: implement check for upper bound on categoricals
-
         # work with numeric feature indices, so that we can easily compare with categorical ones
         train_inds = _get_column_indices(X, self.feature_inds)
 
+        if len(train_inds) == 0:
+            raise ValueError(
+                "No features specified. At least one feature index must be specified so that a model can be trained.")
+
         heterogeneity_inds = self.heterogeneity_inds
         if heterogeneity_inds is None:
             heterogeneity_inds = [None for ind in train_inds]
@@ -348,7 +369,7 @@ def fit(self, X, y, warm_start=False):
             new_inds = [ind for ind in train_inds if (ind not in self._cache or
                                                       heterogeneity_inds[ind] != self._cache[ind][1].hinds)]
         else:
-            new_inds = train_inds
+            new_inds = list(train_inds)
 
             self._cache = {}  # store mapping from feature to insights, results
 
@@ -387,10 +408,39 @@ def fit(self, X, y, warm_start=False):
         # start with empty results and default shared insights
         self._results = []
         self._shared = _get_default_shared_insights_output()
+        self._shared[_CausalInsightsConstants.InitArgsKey] = {
+            'feature_inds': _sanitize(self.feature_inds),
+            'categorical': _sanitize(self.categorical),
+            'heterogeneity_inds': _sanitize(self.heterogeneity_inds),
+            'feature_names': _sanitize(self.feature_names),
+            'classification': _sanitize(self.classification),
+            'upper_bound_on_cat_expansion': _sanitize(self.upper_bound_on_cat_expansion),
+            'nuisance_models': _sanitize(self.nuisance_models),
+            'heterogeneity_model': _sanitize(self.heterogeneity_model),
+            'n_jobs': _sanitize(self.n_jobs)
+        }
 
         # convert categorical indicators to numeric indices
         categorical_inds = _get_column_indices(X, self.categorical)
 
+        # check for indices over the categorical expansion bound
+        over_bound_inds = []
+        for ind in new_inds:
+            n_cats = len(np.unique(_safe_indexing(X, ind, axis=1)))
+            if ind in categorical_inds and n_cats > self.upper_bound_on_cat_expansion:
+                warnings.warn(f"Column {ind} has more than {self.upper_bound_on_cat_expansion} values "
+                              "so no heterogeneity model will be fit for it; increase 'upper_bound_on_cat_expansion' "
+                              "to change this behavior.")
+                # can't remove in place while iterating over new_inds, so store in separate list
+                over_bound_inds.append(ind)
+        for ind in over_bound_inds:
+            new_inds.remove(ind)
+            # also remove from train_inds so we don't try to access the result later
+            train_inds.remove(ind)
+            if len(train_inds) == 0:
+                raise ValueError("No features remain; increase the upper_bound_on_cat_expansion so that at least "
+                                 "one feature model can be trained.")
+
         def process_feature(name, feat_ind):
             discrete_treatment = feat_ind in categorical_inds
             hinds = heterogeneity_inds[feat_ind]
@@ -487,15 +537,15 @@ def process_feature(name, feat_ind):
                     _CausalInsightsConstants.CategoricalColumnKey: [name],
                     _CausalInsightsConstants.EngineeredNameKey: [name]
                 }
-            result = CausalAnalysis._result_data(feature_index=feat_ind,
-                                                 feature_name=name,
-                                                 feature_baseline=baseline,
-                                                 feature_levels=cats,
-                                                 hinds=hinds,
-                                                 X_transformer=X_transformer,
-                                                 W_transformer=W_transformer,
-                                                 estimator=est,
-                                                 global_inference=global_inference)
+            result = _result(feature_index=feat_ind,
+                             feature_name=name,
+                             feature_baseline=baseline,
+                             feature_levels=cats,
+                             hinds=hinds,
+                             X_transformer=X_transformer,
+                             W_transformer=W_transformer,
+                             estimator=est,
+                             global_inference=global_inference)
 
             return insights, result
 
@@ -947,10 +997,12 @@ def _tree(self, is_policy, Xtest, feature_index, *, treatment_cost=0,
         if is_policy:
             intrp.interpret(result.estimator, Xtest,
                             sample_treatment_costs=treatment_cost)
+            treat = intrp.treat(Xtest)
         else:  # no treatment cost for CATE trees
             intrp.interpret(result.estimator, Xtest)
+            treat = None
 
-        return intrp, result.X_transformer.get_feature_names(self.feature_names_), treatment_names
+        return intrp, result.X_transformer.get_feature_names(self.feature_names_), treatment_names, treat
 
     # TODO: it seems like it would be better to just return the tree itself rather than plot it;
     #       however, the tree can't store the feature and treatment names we compute here...
@@ -977,18 +1029,21 @@ def plot_policy_tree(self, Xtest, feature_index, *, treatment_cost=0,
             Confidence level of the confidence intervals displayed in the leaf nodes.
             A (1-alpha)*100% confidence interval is displayed.
         """
-        intrp, feature_names, treatment_names = self._tree(True, Xtest, feature_index,
-                                                           treatment_cost=treatment_cost,
-                                                           max_depth=max_depth,
-                                                           min_samples_leaf=min_samples_leaf,
-                                                           min_impurity_decrease=min_value_increase,
-                                                           alpha=alpha)
+        intrp, feature_names, treatment_names, _ = self._tree(True, Xtest, feature_index,
+                                                              treatment_cost=treatment_cost,
+                                                              max_depth=max_depth,
+                                                              min_samples_leaf=min_samples_leaf,
+                                                              min_impurity_decrease=min_value_increase,
+                                                              alpha=alpha)
         return intrp.plot(feature_names=feature_names, treatment_names=treatment_names)
 
-    def _policy_tree_string(self, Xtest, feature_index, *, treatment_cost=0,
+    def _policy_tree_output(self, Xtest, feature_index, *, treatment_cost=0,
                             max_depth=3, min_samples_leaf=2, min_value_increase=1e-4, alpha=.1):
         """
-        Get a recommended policy tree in graphviz format as a string.
+        Get a tuple policy outputs.
+
+        The first item in the tuple is the recommended policy tree in graphviz format as a string.
+        The second item is the recommended treatment for each sample as a list.
 
         Parameters
         ----------
@@ -1010,18 +1065,18 @@ def _policy_tree_string(self, Xtest, feature_index, *, treatment_cost=0,
 
         Returns
         -------
-        tree : string
-            The policy tree represented as a graphviz string
+        tree : tuple of string, list of int
+            The policy tree represented as a graphviz string and the recommended treatment for each row
         """
 
-        intrp, feature_names, treatment_names = self._tree(True, Xtest, feature_index,
-                                                           treatment_cost=treatment_cost,
-                                                           max_depth=max_depth,
-                                                           min_samples_leaf=min_samples_leaf,
-                                                           min_impurity_decrease=min_value_increase,
-                                                           alpha=alpha)
+        intrp, feature_names, treatment_names, treat = self._tree(True, Xtest, feature_index,
+                                                                  treatment_cost=treatment_cost,
+                                                                  max_depth=max_depth,
+                                                                  min_samples_leaf=min_samples_leaf,
+                                                                  min_impurity_decrease=min_value_increase,
+                                                                  alpha=alpha)
         return intrp.export_graphviz(feature_names=feature_names,
-                                     treatment_names=treatment_names)
+                                     treatment_names=treatment_names), treat.tolist()
 
     # TODO: it seems like it would be better to just return the tree itself rather than plot it;
     #       however, the tree can't store the feature and treatment names we compute here...
@@ -1049,11 +1104,11 @@ def plot_heterogeneity_tree(self, Xtest, feature_index, *,
             A (1-alpha)*100% confidence interval is displayed.
         """
 
-        intrp, feature_names, treatment_names = self._tree(False, Xtest, feature_index,
-                                                           max_depth=max_depth,
-                                                           min_samples_leaf=min_samples_leaf,
-                                                           min_impurity_decrease=min_impurity_decrease,
-                                                           alpha=alpha)
+        intrp, feature_names, treatment_names, _ = self._tree(False, Xtest, feature_index,
+                                                              max_depth=max_depth,
+                                                              min_samples_leaf=min_samples_leaf,
+                                                              min_impurity_decrease=min_impurity_decrease,
+                                                              alpha=alpha)
         return intrp.plot(feature_names=feature_names,
                           treatment_names=treatment_names)
 
@@ -1081,10 +1136,10 @@ def _heterogeneity_tree_string(self, Xtest, feature_index, *,
             A (1-alpha)*100% confidence interval is displayed.
         """
 
-        intrp, feature_names, treatment_names = self._tree(False, Xtest, feature_index,
-                                                           max_depth=max_depth,
-                                                           min_samples_leaf=min_samples_leaf,
-                                                           min_impurity_decrease=min_impurity_decrease,
-                                                           alpha=alpha)
+        intrp, feature_names, treatment_names, _ = self._tree(False, Xtest, feature_index,
+                                                              max_depth=max_depth,
+                                                              min_samples_leaf=min_samples_leaf,
+                                                              min_impurity_decrease=min_impurity_decrease,
+                                                              alpha=alpha)
         return intrp.export_graphviz(feature_names=feature_names,
                                      treatment_names=treatment_names)