diff --git a/azure-pipelines-steps.yml b/azure-pipelines-steps.yml
index 22f0b66fb..78d43064e 100644
--- a/azure-pipelines-steps.yml
+++ b/azure-pipelines-steps.yml
@@ -53,6 +53,11 @@ jobs:
     displayName: 'Install graphviz on Linux'
     condition: and(succeeded(), eq(variables['Agent.OS'], 'Linux'))
 
+  # Install OpenMP on Mac to support lightgbm
+  - script: 'brew install libomp'
+    displayName: 'Install OpenMP on Mac'
+    condition: and(succeeded(), eq(variables['Agent.OS'], 'Darwin'))
+
   # Install the package
   - script: 'python -m pip install --upgrade pip && pip install --upgrade setuptools wheel Cython && pip install ${{ parameters.package }}'
     displayName: 'Install dependencies'
diff --git a/econml/__init__.py b/econml/__init__.py
index ee333941f..55d7efa96 100644
--- a/econml/__init__.py
+++ b/econml/__init__.py
@@ -18,6 +18,7 @@
            'ortho_iv',
            'policy',
            'score',
+           'solutions',
            'sklearn_extensions',
            'tree',
            'two_stage_least_squares',
diff --git a/econml/inference/_inference.py b/econml/inference/_inference.py
index ac95aed69..ec9e823b3 100644
--- a/econml/inference/_inference.py
+++ b/econml/inference/_inference.py
@@ -867,6 +867,19 @@ def _expand_outputs(self, n_rows):
         """
         pass
 
+    def translate(self, offset):
+        """
+        Update the results in place by translating by an offset.
+
+        Parameters
+        ----------
+        offset: array-like
+            The offset by which to translate these results
+        """
+        # NOTE: use np.asarray(offset) becuase if offset is a pd.Series direct addition would make the sum
+        #       a Series as well, which would subsequently break summary_frame because flatten isn't supported
+        self.pred = self.pred + np.asarray(offset)
+
 
 class NormalInferenceResults(InferenceResults):
     """
@@ -1081,6 +1094,14 @@ def _expand_outputs(self, n_rows):
         return EmpiricalInferenceResults(self.d_t, self.d_y, pred, pred_dist, self.inf_type, self.fname_transformer,
                                          self.feature_names, self.output_names, self.treatment_names)
 
+    def translate(self, other):
+        # offset preds
+        super().translate(other)
+        # offset the distribution, too
+        self.pred_dist = self.pred_dist + np.asarray(other)
+
+    translate.__doc__ = InferenceResults.translate.__doc__
+
 
 class PopulationSummaryResults:
     """
diff --git a/econml/solutions/causal_analysis/__init__.py b/econml/solutions/causal_analysis/__init__.py
new file mode 100644
index 000000000..eb691b2fd
--- /dev/null
+++ b/econml/solutions/causal_analysis/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+from ._causal_analysis import CausalAnalysis
+
+__all__ = ["CausalAnalysis"]
diff --git a/econml/solutions/causal_analysis/_causal_analysis.py b/econml/solutions/causal_analysis/_causal_analysis.py
new file mode 100644
index 000000000..d4be23ec0
--- /dev/null
+++ b/econml/solutions/causal_analysis/_causal_analysis.py
@@ -0,0 +1,1090 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""Module for assessing causal feature importance."""
+
+import warnings
+from collections import OrderedDict, namedtuple
+
+import joblib
+import lightgbm as lgb
+import numpy as np
+import pandas as pd
+from sklearn.base import TransformerMixin
+from sklearn.compose import ColumnTransformer
+from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import Lasso, LassoCV, LogisticRegression, LogisticRegressionCV
+from sklearn.pipeline import make_pipeline, Pipeline
+from sklearn.preprocessing import OneHotEncoder, PolynomialFeatures, StandardScaler
+from sklearn.utils.validation import column_or_1d
+from ...cate_interpreter import SingleTreeCateInterpreter, SingleTreePolicyInterpreter
+from ...dml import LinearDML, CausalForestDML
+from ...inference import NormalInferenceResults
+from ...sklearn_extensions.linear_model import WeightedLasso
+from ...sklearn_extensions.model_selection import GridSearchCVList
+from ...utilities import _RegressionWrapper, inverse_onehot
+
+# TODO: this utility is documented but internal; reimplement?
+from sklearn.utils import _safe_indexing
+# TODO: this utility is even less public...
+from sklearn.utils import _get_column_indices
+
+
+class _CausalInsightsConstants:
+    RawFeatureNameKey = 'raw_name'
+    EngineeredNameKey = 'name'
+    CategoricalColumnKey = 'cat'
+    TypeKey = 'type'
+    PointEstimateKey = 'point'
+    StandardErrorKey = 'stderr'
+    ZStatKey = 'zstat'
+    ConfidenceIntervalLowerKey = 'ci_lower'
+    ConfidenceIntervalUpperKey = 'ci_upper'
+    PValueKey = 'p_value'
+    Version = 'version'
+    CausalComputationTypeKey = 'causal_computation_type'
+    ConfoundingIntervalKey = 'confounding_interval'
+    ViewKey = 'view'
+
+    ALL = [RawFeatureNameKey,
+           EngineeredNameKey,
+           CategoricalColumnKey,
+           TypeKey,
+           PointEstimateKey,
+           StandardErrorKey,
+           ZStatKey,
+           ConfidenceIntervalLowerKey,
+           ConfidenceIntervalUpperKey,
+           PValueKey,
+           Version,
+           CausalComputationTypeKey,
+           ConfoundingIntervalKey,
+           ViewKey]
+
+
+def _get_default_shared_insights_output():
+    """
+    Dictionary elements shared among all analyses.
+
+    In case of breaking changes to this dictionary output, the major version of this
+    dictionary should be updated. In case of a change to this dictionary, the minor
+    version should be updated.
+    """
+    return {
+        _CausalInsightsConstants.RawFeatureNameKey: [],
+        _CausalInsightsConstants.EngineeredNameKey: [],
+        _CausalInsightsConstants.CategoricalColumnKey: [],
+        _CausalInsightsConstants.TypeKey: [],
+        _CausalInsightsConstants.Version: '1.0',
+        _CausalInsightsConstants.CausalComputationTypeKey: "simple",
+        _CausalInsightsConstants.ConfoundingIntervalKey: None,
+    }
+
+
+def _get_default_specific_insights(view):
+    # keys should be mutually exclusive with shared keys, so that the dictionaries can be cleanly merged
+    return {
+        _CausalInsightsConstants.PointEstimateKey: [],
+        _CausalInsightsConstants.StandardErrorKey: [],
+        _CausalInsightsConstants.ZStatKey: [],
+        _CausalInsightsConstants.ConfidenceIntervalLowerKey: [],
+        _CausalInsightsConstants.ConfidenceIntervalUpperKey: [],
+        _CausalInsightsConstants.PValueKey: [],
+        _CausalInsightsConstants.ViewKey: view
+    }
+
+
+def _get_metadata_causal_insights_keys():
+    return [_CausalInsightsConstants.Version,
+            _CausalInsightsConstants.CausalComputationTypeKey,
+            _CausalInsightsConstants.ConfoundingIntervalKey,
+            _CausalInsightsConstants.ViewKey]
+
+
+def _first_stage_reg(X, y, *, automl=True):
+    if automl:
+        model = GridSearchCVList([make_pipeline(StandardScaler(), LassoCV()),
+                                  RandomForestRegressor(
+                                      n_estimators=100, random_state=123, min_samples_leaf=10),
+                                  lgb.LGBMRegressor(num_leaves=32)],
+                                 param_grid_list=[{},
+                                                  {'min_weight_fraction_leaf':
+                                                      [.001, .01, .1]},
+                                                  {'learning_rate': [0.1, 0.3], 'max_depth': [3, 5]}],
+                                 cv=2,
+                                 scoring='neg_mean_squared_error')
+        best_est = model.fit(X, y).best_estimator_
+        if isinstance(best_est, Pipeline):
+            return make_pipeline(StandardScaler(), Lasso(alpha=best_est.steps[1][1].alpha_))
+        else:
+            return best_est
+    else:
+        model = make_pipeline(StandardScaler(), LassoCV(cv=5)).fit(X, y)
+        return make_pipeline(StandardScaler(), Lasso(alpha=model.steps[1][1].alpha_))
+
+
+def _first_stage_clf(X, y, *, make_regressor=False, automl=True):
+    if automl:
+        model = GridSearchCVList([make_pipeline(StandardScaler(), LogisticRegression()),
+                                  RandomForestClassifier(
+                                      n_estimators=100, random_state=123),
+                                  GradientBoostingClassifier(random_state=123)],
+                                 param_grid_list=[{'logisticregression__C': [0.01, .1, 1, 10, 100]},
+                                                  {'max_depth': [3, 5],
+                                                   'min_samples_leaf': [10, 50]},
+                                                  {'n_estimators': [50, 100],
+                                                   'max_depth': [3],
+                                                   'min_samples_leaf': [10, 30]}],
+                                 cv=5,
+                                 scoring='neg_log_loss')
+        est = model.fit(X, y).best_estimator_
+    else:
+        model = make_pipeline(StandardScaler(), LogisticRegressionCV(cv=5, max_iter=1000)).fit(X, y)
+        est = make_pipeline(StandardScaler(), LogisticRegression(C=model.steps[1][1].C_[0]))
+    if make_regressor:
+        return _RegressionWrapper(est)
+    else:
+        return est
+
+
+def _final_stage():
+    return GridSearchCVList([WeightedLasso(),
+                             RandomForestRegressor(n_estimators=100, random_state=123)],
+                            param_grid_list=[{'alpha': [.001, .01, .1, 1, 10]},
+                                             {'max_depth': [3, 5],
+                                              'min_samples_leaf': [10, 50]}],
+                            cv=5,
+                            scoring='neg_mean_squared_error')
+
+
+# simplification of sklearn's ColumnTransformer that encodes categoricals and passes through selected other columns
+# but also supports get_feature_names with expected signature
+
+
+class _ColumnTransformer(TransformerMixin):
+    def __init__(self, categorical, passthrough):
+        self.categorical = categorical
+        self.passthrough = passthrough
+
+    def fit(self, X):
+        cat_cols = _safe_indexing(X, self.categorical, axis=1)
+        if cat_cols.shape[1] > 0:
+            self.has_cats = True
+            self.one_hot_encoder = OneHotEncoder(
+                drop='first', sparse=False).fit(cat_cols)
+        else:
+            self.has_cats = False
+        self.d_x = X.shape[1]
+        return self
+
+    def transform(self, X):
+        rest = _safe_indexing(X, self.passthrough, axis=1)
+        if self.has_cats:
+            cats = self.one_hot_encoder.transform(
+                _safe_indexing(X, self.categorical, axis=1))
+            return np.hstack((cats, rest))
+        else:
+            return rest
+
+    def get_feature_names(self, names=None):
+        if names is None:
+            names = [f"x{i}" for i in range(self.d_x)]
+        rest = _safe_indexing(names, self.passthrough, axis=0)
+        if self.has_cats:
+            cats = self.one_hot_encoder.get_feature_names(
+                _safe_indexing(names, self.categorical, axis=0))
+            return np.concatenate((cats, rest))
+        else:
+            return rest
+
+
+class CausalAnalysis:
+    """
+    Note: this class is experimental and the API may evolve over our next few releases.
+
+    Gets causal importance of features.
+
+    Parameters
+    ----------
+    feature_inds: array-like of int, str, or bool
+        The features for which to estimate causal effects, expressed as either column indices,
+        column names, or boolean flags indicating which columns to pick
+    categorical: array-like of int, str, or bool
+        The features which are categorical in nature, expressed as either column indices,
+        column names, or boolean flags indicating which columns to pick
+    heterogeneity_inds: array-like of int, str, or bool, or None or list of array-like elements or None, default None
+        If a 1d array, then whenever estimating a heterogeneous (local) treatment effect
+        model, then only the features in this array will be used for heterogeneity. If a 2d
+        array then its first dimension should be len(feature_inds) and whenever estimating
+        a local causal effect for target feature feature_inds[i], then only features in
+        heterogeneity_inds[i] will be used for heterogeneity. If heterogeneity_inds[i]=None, then all features
+        are used for heterogeneity when estimating local causal effect for feature_inds[i], and likewise if
+        heterogeneity_inds[i]=[] then no features will be used for heterogeneity. If heterogeneity_ind=None
+        then all features are used for heterogeneity for all features, and if heterogeneity_inds=[] then
+        no features will be.
+    feature_names: list of str, default None
+        The names for all of the features in the data.  Not necessary if the input will be a dataframe.
+        If None and the input is a plain numpy array, generated feature names will be ['X1', 'X2', ...].
+    upper_bound_on_cat_expansion: int, default 5
+        The maximum number of categorical values allowed, because they are expanded via one-hot encoding. If a
+        feature has more than this many values, then a causal effect model is not fitted for that target feature
+        and a warning flag is raised. The remainder of the models are fitted.
+    classification: bool, default False
+        Whether this is a classification (as opposed to regression) task
+        TODO. Enable also multi-class classification (post-MVP)
+    nuisance_models: one of {'linear', 'automl'}, optional (default='linear')
+        What models to use for nuisance estimation (i.e. for estimating propensity models or models of how
+        controls predict the outcome). If 'linear', then LassoCV (for regression) and LogisticRegressionCV
+        (for classification) are used. If 'automl', then a kfold cross-validation and model selection is performed
+        among several models and the best is chosen.
+        TODO. Add other options, such as {'azure_automl', 'forests', 'boosting'} that will use particular sub-cases
+        of models or also integrate with azure autoML. (post-MVP)
+    heterogeneity_model: one of {'linear', 'forest'}, optional (default='linear')
+        What type of model to use for treatment effect heterogeneity. 'linear' means that a heterogeneity model
+        of the form theta(X)=<a, X> will be used, while 'forest' means that a forest model will be trained instead.
+        TODO. Add other options, such as {'automl'} for performing
+        model selection for the causal effect, or {'sparse_linear'} for using a debiased lasso. (post-MVP)
+    n_jobs: int, default -1
+        Degree of parallelism to use when training models via joblib.Parallel
+    """
+
+    _result_data = namedtuple("_result", field_names=[
+        "feature_index", "feature_name", "feature_baseline", "feature_levels", "hinds",
+        "X_transformer", "W_transformer", "estimator", "global_inference"])
+
+    def __init__(self, feature_inds, categorical, heterogeneity_inds=None, feature_names=None, classification=False,
+                 upper_bound_on_cat_expansion=5, nuisance_models='linear', heterogeneity_model='linear', n_jobs=-1):
+        self.feature_inds = feature_inds
+        self.categorical = categorical
+        self.heterogeneity_inds = heterogeneity_inds
+        self.feature_names = feature_names
+        self.classification = classification
+        self.upper_bound_on_cat_expansion = upper_bound_on_cat_expansion
+        self.nuisance_models = nuisance_models
+        self.heterogeneity_model = heterogeneity_model
+        self.n_jobs = n_jobs
+
+    def fit(self, X, y, warm_start=False):
+        """
+        Fits global and local causal effect models for each feature in feature_inds on the data
+
+        Parameters
+        ----------
+        X : array-like
+            Feature data
+        y : array-like of shape (n,) or (n,1)
+            Outcome. If classification=True, then y should take two values. Otherwise an error is raised
+            that only binary classification is implemented for now.
+            TODO. enable multi-class classification for y (post-MVP)
+        warm_start : boolean, default False
+            If False, train models for each feature in `feature_inds`.
+            If True, train only models for features in `feature_inds` that had not already been trained by
+            the previous call to `fit`, and for which neither the corresponding heterogeneity_inds, nor the
+            automl flag have changed. If heterogeneity_inds have changed, then the final stage model of these features
+            will be refit. If the automl flag has changed, then whole model is refit, despite the warm start flag.
+        """
+
+        # Validate inputs
+        assert self.nuisance_models in ['automl', 'linear'], (
+            "The only supported nuisance models are 'linear' and 'automl', "
+            f"but was given {self.nuisance_models}")
+
+        assert self.heterogeneity_model in ['linear', 'forest'], (
+            "The only supported heterogeneity models are 'linear' and, 'forest but received "
+            f"{self.heterogeneity_model}")
+
+        assert np.ndim(X) == 2, f"X must be a 2-dimensional array, but here had shape {np.shape(X)}"
+
+        # TODO: check compatibility of X and Y lengths
+
+        if warm_start:
+            if not hasattr(self, "_results"):
+                # no previous fit, cancel warm start
+                warm_start = False
+
+            elif self._d_x != X.shape[1]:
+                raise ValueError(
+                    f"Can't warm start: previous X had {self._d_x} columns, new X has {X.shape[1]} columns")
+
+        # TODO: implement check for upper bound on categoricals
+
+        # work with numeric feature indices, so that we can easily compare with categorical ones
+        train_inds = _get_column_indices(X, self.feature_inds)
+
+        heterogeneity_inds = self.heterogeneity_inds
+        if heterogeneity_inds is None:
+            heterogeneity_inds = [None for ind in train_inds]
+
+        # if heterogeneity_inds is 1D, repeat it
+        if heterogeneity_inds == [] or isinstance(heterogeneity_inds[0], (int, str, bool)):
+            heterogeneity_inds = [heterogeneity_inds for _ in train_inds]
+
+        # heterogeneity inds should be a 2D list of length same as train_inds
+        elif heterogeneity_inds is not None and len(heterogeneity_inds) != len(train_inds):
+            raise ValueError("Heterogeneity indexes should have the same number of entries, but here "
+                             f" there were {len(heterogeneity_inds)} heterogeneity entries but "
+                             f" {len(train_inds)} feature indices.")
+
+        # replace None elements of heterogeneity_inds and ensure indices are numeric
+        heterogeneity_inds = {ind: list(range(X.shape[1])) if hinds is None else _get_column_indices(X, hinds)
+                              for ind, hinds in zip(train_inds, heterogeneity_inds)}
+
+        if warm_start:
+            if self.nuisance_models != self.nuisance_models_:
+                warnings.warn("warm_start will be ignored since the nuisance models have changed "
+                              f"from {self.nuisance_models_} to {self.nuisance_models} since the previous call to fit")
+                warm_start = False
+
+            if self.heterogeneity_model != self.heterogeneity_model_:
+                warnings.warn("warm_start will be ignored since the heterogeneity model has changed "
+                              f"from {self.heterogeneity_model_} to {self.heterogeneity_model} "
+                              "since the previous call to fit")
+                warm_start = False
+
+            # TODO: bail out also if categorical columns, classification changed?
+
+        # TODO: should we also train a new model_y under any circumstances when warm_start is True?
+        if warm_start:
+            new_inds = [ind for ind in train_inds if (ind not in self._cache or
+                                                      heterogeneity_inds[ind] != self._cache[ind][1].hinds)]
+        else:
+            new_inds = train_inds
+
+            self._cache = {}  # store mapping from feature to insights, results
+
+            # train the Y model
+
+            # perform model selection for the Y model using all X, not on a per-column basis
+            allX = ColumnTransformer([('encode',
+                                       OneHotEncoder(
+                                           drop='first', sparse=False),
+                                       self.categorical)],
+                                     remainder='passthrough').fit_transform(X)
+
+            if self.classification:
+                self._model_y = _first_stage_clf(
+                    allX, y, automl=self.nuisance_models == 'automl', make_regressor=True)
+            else:
+                self._model_y = _first_stage_reg(allX, y, automl=self.nuisance_models == 'automl')
+
+        if self.classification:
+            # now that we've trained the classifier and wrapped it, ensure that y is transformed to
+            # work with the regression wrapper
+
+            # we use column_or_1d to treat pd.Series and pd.DataFrame objects the same way as arrays
+            y = column_or_1d(y).reshape(-1, 1)
+
+            # note that this needs to happen after wrapping to generalize to the multi-class case,
+            # since otherwise we'll have too many columns to be able to train a classifier
+            y = OneHotEncoder(drop='first', sparse=False).fit_transform(y)
+
+        assert y.ndim == 1 or y.shape[1] == 1, ("Multiclass classification isn't supported" if self.classification
+                                                else "Only a single outcome is supported")
+
+        self._vec_y = y.ndim == 1
+        self._d_x = X.shape[1]
+
+        # start with empty results and default shared insights
+        self._results = []
+        self._shared = _get_default_shared_insights_output()
+
+        # convert categorical indicators to numeric indices
+        categorical_inds = _get_column_indices(X, self.categorical)
+
+        def process_feature(name, feat_ind):
+            discrete_treatment = feat_ind in categorical_inds
+            hinds = heterogeneity_inds[feat_ind]
+            WX_transformer = ColumnTransformer([('encode', OneHotEncoder(drop='first', sparse=False),
+                                                 [ind for ind in categorical_inds
+                                                  if ind != feat_ind]),
+                                                ('drop', 'drop', feat_ind)],
+                                               remainder='passthrough')
+            W_transformer = ColumnTransformer([('encode', OneHotEncoder(drop='first', sparse=False),
+                                                [ind for ind in categorical_inds
+                                                 if ind != feat_ind and ind not in hinds]),
+                                               ('drop', 'drop', hinds),
+                                               ('drop_feat', 'drop', feat_ind)],
+                                              remainder='passthrough')
+            # Use _ColumnTransformer instead of ColumnTransformer so we can get feature names
+            X_transformer = _ColumnTransformer([ind for ind in categorical_inds
+                                                if ind != feat_ind and ind in hinds],
+                                               [ind for ind in hinds
+                                                if ind != feat_ind and ind not in categorical_inds])
+
+            # Controls are all other columns of X
+            WX = WX_transformer.fit_transform(X)
+            # can't use X[:, feat_ind] when X is a DataFrame
+            T = _safe_indexing(X, feat_ind, axis=1)
+
+            W = W_transformer.fit_transform(X)
+            X_xf = X_transformer.fit_transform(X)
+            if W.shape[1] == 0:
+                # array checking routines don't accept 0-width arrays
+                W = None
+
+            if X_xf.shape[1] == 0:
+                X_xf = None
+
+            # perform model selection
+            model_t = (_first_stage_clf(WX, T, automl=self.nuisance_models == 'automl')
+                       if discrete_treatment else _first_stage_reg(WX, T, automl=self.nuisance_models == 'automl'))
+
+            h_model = self.heterogeneity_model
+            if X_xf is None:
+                warnings.warn(f"Using a linear model instead of a forest model for feature '{name}' "
+                              "because forests don't support models with no heterogeneity indices")
+                h_model = 'linear'
+
+            if h_model == 'linear':
+                est = LinearDML(model_y=self._model_y,
+                                model_t=model_t,
+                                discrete_treatment=discrete_treatment,
+                                fit_cate_intercept=True,
+                                linear_first_stages=False,
+                                random_state=123)
+            elif h_model == 'forest':
+                est = CausalForestDML(model_y=self._model_y,
+                                      model_t=model_t,
+                                      discrete_treatment=discrete_treatment,
+                                      n_estimators=4000,
+                                      random_state=123)
+                est.tune(y, T, X=X_xf, W=W)
+            est.fit(y, T, X=X_xf, W=W, cache_values=True)
+
+            # Prefer ate__inference to const_marginal_ate_inference(X) because it is doubly-robust and not conservative
+            if h_model == 'forest' and discrete_treatment:
+                global_inference = est.ate__inference()
+            else:
+                # convert to NormalInferenceResults for consistency
+                inf = est.const_marginal_ate_inference(X=X_xf)
+                global_inference = NormalInferenceResults(d_t=inf.d_t, d_y=inf.d_y,
+                                                          pred=inf.mean_point,
+                                                          pred_stderr=inf.stderr_mean,
+                                                          mean_pred_stderr=None,
+                                                          inf_type='ate')
+
+            # Set the dictionary values shared between local and global summaries
+            if discrete_treatment:
+                cats = est.transformer.categories_[0]
+                baseline = cats[est.transformer.drop_idx_[0]]
+                cats = cats[np.setdiff1d(np.arange(len(cats)),
+                                         est.transformer.drop_idx_[0])]
+                d_t = len(cats)
+                insights = {
+                    _CausalInsightsConstants.TypeKey: ['cat'] * d_t,
+                    _CausalInsightsConstants.RawFeatureNameKey: [name] * d_t,
+                    _CausalInsightsConstants.CategoricalColumnKey: cats.tolist(),
+                    _CausalInsightsConstants.EngineeredNameKey: [
+                        f"{name} (base={baseline}): {c}" for c in cats]
+                }
+            else:
+                d_t = 1
+                cats = ["num"]
+                baseline = None
+                insights = {
+                    _CausalInsightsConstants.TypeKey: ["num"],
+                    _CausalInsightsConstants.RawFeatureNameKey: [name],
+                    _CausalInsightsConstants.CategoricalColumnKey: [name],
+                    _CausalInsightsConstants.EngineeredNameKey: [name]
+                }
+            result = CausalAnalysis._result_data(feature_index=feat_ind,
+                                                 feature_name=name,
+                                                 feature_baseline=baseline,
+                                                 feature_levels=cats,
+                                                 hinds=hinds,
+                                                 X_transformer=X_transformer,
+                                                 W_transformer=W_transformer,
+                                                 estimator=est,
+                                                 global_inference=global_inference)
+
+            return insights, result
+
+        if self.feature_names is None:
+            if hasattr(X, "iloc"):
+                feature_names = X.columns
+            else:
+                feature_names = [f"x{i}" for i in range(X.shape[1])]
+        else:
+            feature_names = self.feature_names
+
+        self.feature_names_ = feature_names
+
+        # extract subset of names matching new columns
+        new_feat_names = _safe_indexing(feature_names, new_inds)
+
+        cache_updates = dict(zip(new_inds,
+                                 joblib.Parallel(n_jobs=self.n_jobs,
+                                                 verbose=1)(joblib.delayed(process_feature)(feat_name, feat_ind)
+                                                            for feat_name, feat_ind in zip(new_feat_names, new_inds))))
+
+        self._cache.update(cache_updates)
+
+        for ind in train_inds:
+            dict_update, result = self._cache[ind]
+            self._results.append(result)
+            for k in dict_update:
+                self._shared[k] += dict_update[k]
+
+        self.nuisance_models_ = self.nuisance_models
+        self.heterogeneity_model_ = self.heterogeneity_model
+        return self
+
+    # properties to return from effect InferenceResults
+    @staticmethod
+    def _point_props(alpha):
+        return [(_CausalInsightsConstants.PointEstimateKey, 'point_estimate'),
+                (_CausalInsightsConstants.StandardErrorKey, 'stderr'),
+                (_CausalInsightsConstants.ZStatKey, 'zstat'),
+                (_CausalInsightsConstants.PValueKey, 'pvalue'),
+                (_CausalInsightsConstants.ConfidenceIntervalLowerKey, lambda inf: inf.conf_int(alpha=alpha)[0]),
+                (_CausalInsightsConstants.ConfidenceIntervalUpperKey, lambda inf: inf.conf_int(alpha=alpha)[1])]
+
+    # properties to return from PopulationSummaryResults
+    @staticmethod
+    def _summary_props(alpha):
+        return [(_CausalInsightsConstants.PointEstimateKey, 'mean_point'),
+                (_CausalInsightsConstants.StandardErrorKey, 'stderr_mean'),
+                (_CausalInsightsConstants.ZStatKey, 'zstat'),
+                (_CausalInsightsConstants.PValueKey, 'pvalue'),
+                (_CausalInsightsConstants.ConfidenceIntervalLowerKey, lambda inf: inf.conf_int_mean(alpha=alpha)[0]),
+                (_CausalInsightsConstants.ConfidenceIntervalUpperKey, lambda inf: inf.conf_int_mean(alpha=alpha)[1])]
+
+    # Converts strings to property lookups or method calls as a convenience so that the
+    # _point_props and _summary_props above can be applied to an inference object
+    @staticmethod
+    def _make_accessor(attr):
+        if isinstance(attr, str):
+            s = attr
+
+            def attr(o):
+                val = getattr(o, s)
+                if callable(val):
+                    return val()
+                else:
+                    return val
+        return attr
+
+    # Create a summary combining all results into a single output; this is used
+    # by the various causal_effect and causal_effect_dict methods to generate either a dataframe
+    # or a dictionary, respectively, based on the summary function passed into this method
+    def _summarize(self, *, summary, get_inference, props, expand_arr, drop_sample):
+
+        assert hasattr(self, "_results"), "This object has not been fit, so cannot get results"
+
+        # ensure array has shape (m,y,t)
+        def ensure_proper_dims(arr):
+            if expand_arr:
+                # population summary is missing sample dimension; add it for consistency
+                arr = np.expand_dims(arr, 0)
+            if self._vec_y:
+                # outcome dimension is missing; add it for consistency
+                arr = np.expand_dims(arr, axis=1)
+            assert 2 <= arr.ndim <= 3
+            # add singleton treatment dimension if missing
+            return arr if arr.ndim == 3 else np.expand_dims(arr, axis=2)
+
+        # each attr has dimension (m,y) or (m,y,t)
+        def coalesce(attr):
+            """Join together the arrays for each feature"""
+            attr = self._make_accessor(attr)
+            # concatenate along treatment dimension
+            arr = np.concatenate([ensure_proper_dims(attr(get_inference(res)))
+                                  for res in self._results], axis=2)
+
+            # for dictionary representation, want to remove unneeded sample dimension
+            # in cohort and global results
+            if drop_sample:
+                arr = np.squeeze(arr, 0)
+
+            return arr
+
+        return summary([(key, coalesce(val)) for key, val in props])
+
+    def _pandas_summary(self, get_inference, props, n,
+                        expand_arr=False):
+        """
+        Summarizes results into a dataframe.
+
+        Parameters
+        ----------
+        get_inference : lambda
+            Method to get the relevant inference results from each result object
+        props : list of (string, string or lambda)
+            Set of column names and ways to get the corresponding values from the inference object
+        n : int
+            The number of samples in the dataset
+        expand_arr : boolean, default False
+            Whether to add an initial sample dimension to the result arrays
+        """
+        def make_dataframe(props):
+
+            to_include = OrderedDict([(key, value.reshape(-1))
+                                      for key, value in props])
+
+            # TODO: enrich outcome logic for multi-class classification when that is supported
+            index = pd.MultiIndex.from_tuples([(i, outcome, res.feature_name, f"{lvl}v{res.feature_baseline}"
+                                                if res.feature_baseline is not None
+                                                else lvl)
+                                               for i in range(n)
+                                               for outcome in ["y0"]
+                                               for res in self._results
+                                               for lvl in res.feature_levels],
+                                              names=["sample", "outcome", "feature", "feature_value"])
+            for lvl in index.levels:
+                if len(lvl) == 1:
+                    if not isinstance(index, pd.MultiIndex):
+                        # can't drop only level
+                        index = pd.Index([self._results[0].feature_name], name="feature")
+                    else:
+                        index = index.droplevel(lvl.name)
+            return pd.DataFrame(to_include, index=index)
+
+        return self._summarize(summary=make_dataframe,
+                               get_inference=get_inference,
+                               props=props,
+                               expand_arr=expand_arr,
+                               drop_sample=False)  # dropping the sample dimension is handled above instead
+
+    def _dict_summary(self, get_inference, *, props, kind, drop_sample=False, expand_arr=False):
+        """
+        Summarizes results into a dictionary.
+
+        Parameters
+        ----------
+        get_inference : lambda
+            Method to get the relevant inference results from each result object
+        props : list of (string, string or lambda)
+            Set of column names and ways to get the corresponding values from the inference object
+        kind : string
+            The kind of inference results to get (e.g. 'global', 'local', or 'cohort')
+        drop_sample : boolean, default False
+            Whether to drop the sample dimension from each array
+        expand_arr : boolean, default False
+            Whether to add an initial sample dimension to the result arrays
+        """
+        def make_dict(props):
+            # should be serialization-ready and contain no numpy arrays
+            res = _get_default_specific_insights(kind)
+            res.update([(key, value.tolist()) for key, value in props])
+            return {**self._shared, **res}
+
+        return self._summarize(summary=make_dict,
+                               get_inference=get_inference,
+                               props=props,
+                               expand_arr=expand_arr,
+                               drop_sample=drop_sample)
+
+    def global_causal_effect(self, alpha=0.1):
+        """
+        Get the global causal effect for each feature as a pandas DataFrame.
+
+        Parameters
+        ----------
+        alpha : float, default 0.1
+            The confidence level of the confidence interval
+
+        Returns
+        -------
+        global_effects : pandas Dataframe
+            DataFrame with the following structure:
+
+            :Columns: ['point', 'stderr', 'zstat', 'pvalue', 'ci_lower', 'ci_upper']
+            :Index: ['feature', 'feature_value']
+            :Rows: For each feature that is numerical, we have an entry with index ['{feature_name}', 'num'], where
+                    'num' is literally the string 'num' and feature_name is the input feature name.
+                    For each feature that is categorical, we have an entry with index ['{feature_name}',
+                    '{cat}v{base}'] where cat is the category value and base is the category used as baseline.
+                    If all features are numerical then the feature_value index is dropped in the dataframe, but not
+                    in the serialized dict.
+        """
+        # a global inference indicates the effect of that one feature on the outcome
+        return self._pandas_summary(lambda res: res.global_inference, props=self._point_props(alpha),
+                                    n=1, expand_arr=True)
+
+    def _global_causal_effect_dict(self, alpha=0.1):
+        """
+        Gets the global causal effect for each feature as dictionary.
+
+        Dictionary entries for predictions, etc. will be nested lists of shape (d_y, sum(d_t))
+
+        Only for serialization purposes to upload to AzureML
+        """
+        return self._dict_summary(lambda res: res.global_inference, props=self._point_props(alpha),
+                                  kind='global', drop_sample=True, expand_arr=True)
+
+    def _cohort_effect_inference(self, Xtest):
+        assert np.ndim(Xtest) == 2 and np.shape(Xtest)[1] == self._d_x, (
+            "Shape of Xtest must be compatible with shape of X, "
+            f"but got shape {np.shape(Xtest)} instead of (n, {self._d_x})"
+        )
+
+        def inference_from_result(result):
+            est = result.estimator
+            X = result.X_transformer.transform(Xtest)
+            if X.shape[1] == 0:
+                X = None
+            return est.const_marginal_ate_inference(X=X)
+        return inference_from_result
+
+    def cohort_causal_effect(self, Xtest, alpha=0.1):
+        """
+        Gets the average causal effects for a particular cohort defined by a population of X's.
+
+        Parameters
+        ----------
+        Xtest : array-like
+            The cohort samples for which to return the average causal effects within cohort
+        alpha : float, default 0.1
+            The confidence level of the confidence interval
+
+        Returns
+        -------
+        cohort_effects : pandas Dataframe
+            DataFrame with the following structure:
+
+            :Columns: ['point', 'stderr', 'zstat', 'pvalue', 'ci_lower', 'ci_upper']
+            :Index: ['feature', 'feature_value']
+            :Rows: For each feature that is numerical, we have an entry with index ['{feature_name}', 'num'], where
+              'num' is literally the string 'num' and feature_name is the input feature name.
+              For each feature that is categorical, we have an entry with index ['{feature_name}', '{cat}v{base}']
+              where cat is the category value and base is the category used as baseline.
+              If all features are numerical then the feature_value index is dropped in the dataframe, but not
+              in the serialized dict.
+        """
+        return self._pandas_summary(self._cohort_effect_inference(Xtest),
+                                    props=self._summary_props(alpha), n=1,
+                                    expand_arr=True)
+
+    def _cohort_causal_effect_dict(self, Xtest, alpha=0.1):
+        """
+        Gets the cohort causal effects for each feature as dictionary.
+
+        Dictionary entries for predictions, etc. will be nested lists of shape (d_y, sum(d_t))
+
+        Only for serialization purposes to upload to AzureML
+        """
+        return self._dict_summary(self._cohort_effect_inference(Xtest), props=self._summary_props(alpha),
+                                  kind='cohort', expand_arr=True, drop_sample=True)
+
+    def _local_effect_inference(self, Xtest):
+        assert np.ndim(Xtest) == 2 and np.shape(Xtest)[1] == self._d_x, (
+            "Shape of Xtest must be compatible with shape of X, "
+            f"but got shape {np.shape(Xtest)} instead of (n, {self._d_x})"
+        )
+
+        def inference_from_result(result):
+            est = result.estimator
+            X = result.X_transformer.transform(Xtest)
+            if X.shape[1] == 0:
+                X = None
+            eff = est.const_marginal_effect_inference(X=X)
+            if X is None:
+                # need to reshape the output to match the input
+                eff = eff._expand_outputs(Xtest.shape[0])
+            return eff
+        return inference_from_result
+
+    def local_causal_effect(self, Xtest, alpha=0.1):
+        """
+        Gets the local causal effect for each feature as a pandas DataFrame.
+
+        Parameters
+        ----------
+        Xtest : array-like
+            The samples for which to return the causal effects
+        alpha : float, default 0.1
+            The confidence level of the confidence interval
+
+        Returns
+        -------
+        global_effect : pandas Dataframe
+            DataFrame with the following structure:
+
+            :Columns: ['point', 'stderr', 'zstat', 'pvalue', 'ci_lower', 'ci_upper']
+            :Index: ['sample', 'feature', 'feature_value']
+            :Rows: For each feature that is numeric, we have an entry with index
+                   ['{sampleid}', '{feature_name}', 'num'],
+                   where 'num' is literally the string 'num' and feature_name is the input feature name and sampleid is
+                   the index of the sample in Xtest.
+                   For each feature that is categorical, we have an entry with index
+                   ['{sampleid', '{feature_name}', '{cat}v{base}']
+                   where cat is the category value and base is the category used as baseline.
+                   If all features are numerical then the feature_value index is dropped in the dataframe, but not
+                   in the serialized dict.
+        """
+        return self._pandas_summary(self._local_effect_inference(Xtest),
+                                    props=self._point_props(alpha), n=Xtest.shape[0])
+
+    def _local_causal_effect_dict(self, Xtest, alpha=0.1):
+        """
+        Gets the local feature importance as dictionary
+
+        Dictionary entries for predictions, etc. will be nested lists of shape (n_rows, d_y, sum(d_t))
+
+        Only for serialization purposes to upload to AzureML
+        """
+        return self._dict_summary(self._local_effect_inference(Xtest), props=self._point_props(alpha),
+                                  kind='local')
+
+    def _safe_result_index(self, X, feature_index):
+        assert hasattr(self, "_results"), "This instance has not yet been fitted"
+
+        assert np.ndim(X) == 2 and np.shape(X)[1] == self._d_x, (
+            "Shape of X must be compatible with shape of the fitted X, "
+            f"but got shape {np.shape(X)} instead of (n, {self._d_x})"
+        )
+
+        (numeric_index,) = _get_column_indices(X, [feature_index])
+        results = [res for res in self._results
+                   if res.feature_index == numeric_index]
+
+        assert len(results) != 0, f"The feature index supplied was not fitted"
+        (result,) = results
+        return result
+
+    def whatif(self, X, Xnew, feature_index, y):
+        """
+        Get counterfactual predictions when feature_index is changed to Xnew from its observational counterpart.
+
+        Note that this only applies to regression use cases; for classification what-if analysis is not supported.
+
+        Parameters
+        ----------
+        X: array-like
+            Features
+        Xnew: array-like
+            New values of a single column of X
+        feature_index: int or string
+            The index of the feature being varied to Xnew, either as a numeric index or
+            the string name if the input is a dataframe
+        y: array-like
+            Observed labels or outcome of a predictive model for baseline y values
+
+        Returns
+        -------
+        y_new: InferenceResults
+            The predicted outputs that would have been observed under the counterfactual features
+        """
+
+        assert not self.classification, "What-if analysis cannot be applied to classification tasks"
+
+        assert np.shape(X)[0] == np.shape(Xnew)[0] == np.shape(y)[0], (
+            "X, Xnew, and y must have the same length, but have shapes "
+            f"{np.shape(X)}, {np.shape(Xnew)}, and {np.shape(y)}"
+        )
+
+        assert np.size(feature_index) == 1, f"Only one feature index may be changed, but got {np.size(feature_index)}"
+
+        T0 = _safe_indexing(X, feature_index, axis=1)
+        T1 = Xnew
+        result = self._safe_result_index(X, feature_index)
+        X = result.X_transformer.transform(X)
+        if X.shape[1] == 0:
+            X = None
+        inf = result.estimator.effect_inference(X=X, T0=T0, T1=T1)
+
+        # we want to offset the inference object by the baseline estimate of y
+        inf.translate(y)
+
+        return inf
+
+    def _whatif_dict(self, X, Xnew, feature_index, y, alpha=0.1):
+        """
+        Get counterfactual predictions when feature_index is changed to Xnew from its observational counterpart.
+
+        Note that this only applies to regression use cases; for classification what-if analysis is not supported.
+
+        Parameters
+        ----------
+        X: array-like
+            Features
+        Xnew: array-like
+            New values of a single column of X
+        feature_index: int or string
+            The index of the feature being varied to Xnew, either as a numeric index or
+            the string name if the input is a dataframe
+        y: array-like
+            Observed labels or outcome of a predictive model for baseline y values
+        alpha: float, default 0.1
+            The confidence level used for confidence intervals in the output
+
+        Returns
+        -------
+        dict : dict
+            The counterfactual predictions, as a dictionary
+        """
+
+        inf = self.whatif(X, Xnew, feature_index, y)
+        props = self._point_props(alpha=alpha)
+        res = _get_default_specific_insights('whatif')
+        res.update([(key, self._make_accessor(attr)(inf).tolist()) for key, attr in props])
+        return res
+
+    def _tree(self, is_policy, Xtest, feature_index, *, treatment_cost=0,
+              max_depth=3, min_samples_leaf=2, min_impurity_decrease=1e-4, alpha=.1):
+
+        result = self._safe_result_index(Xtest, feature_index)
+        Xtest = result.X_transformer.transform(Xtest)
+        if Xtest.shape[1] == 0:
+            Xtest = None
+        if result.feature_baseline is None:
+            treatment_names = ['low', 'high']
+        else:
+            treatment_names = [f"{result.feature_baseline}"] + \
+                [f"{lvl}" for lvl in result.feature_levels]
+
+        if len(treatment_names) > 2 and is_policy:
+            raise AssertionError("Can't create policy trees for multi-class features, "
+                                 f"but this feature has values {treatment_names}")
+
+        TreeType = SingleTreePolicyInterpreter if is_policy else SingleTreeCateInterpreter
+        intrp = TreeType(include_model_uncertainty=True,
+                         uncertainty_level=alpha,
+                         max_depth=max_depth,
+                         min_samples_leaf=min_samples_leaf,
+                         min_impurity_decrease=min_impurity_decrease)
+
+        if is_policy:
+            intrp.interpret(result.estimator, Xtest,
+                            sample_treatment_costs=treatment_cost)
+        else:  # no treatment cost for CATE trees
+            intrp.interpret(result.estimator, Xtest)
+
+        return intrp, result.X_transformer.get_feature_names(self.feature_names_), treatment_names
+
+    # TODO: it seems like it would be better to just return the tree itself rather than plot it;
+    #       however, the tree can't store the feature and treatment names we compute here...
+    def plot_policy_tree(self, Xtest, feature_index, *, treatment_cost=0,
+                         max_depth=3, min_samples_leaf=2, min_value_increase=1e-4, alpha=.1):
+        """
+        Plot a recommended policy tree using matplotlib.
+
+        Parameters
+        ----------
+        X : array-like
+            Features
+        feature_index
+            Index of the feature to be considered as treament
+        treatment_cost : int, or array-like of same length as number of rows of X, optional (default=0)
+            Cost of treatment, or cost of treatment for each sample
+        max_depth : int, optional (default=3)
+            maximum depth of the tree
+        min_samples_leaf : int, optional (default=2)
+            minimum number of samples on each leaf
+        min_value_increase : float, optional (default=1e-4)
+            The minimum increase in the policy value that a split needs to create to construct it
+        alpha : float in [0, 1], optional (default=.1)
+            Confidence level of the confidence intervals displayed in the leaf nodes.
+            A (1-alpha)*100% confidence interval is displayed.
+        """
+        intrp, feature_names, treatment_names = self._tree(True, Xtest, feature_index,
+                                                           treatment_cost=treatment_cost,
+                                                           max_depth=max_depth,
+                                                           min_samples_leaf=min_samples_leaf,
+                                                           min_impurity_decrease=min_value_increase,
+                                                           alpha=alpha)
+        return intrp.plot(feature_names=feature_names, treatment_names=treatment_names)
+
+    def _policy_tree_string(self, Xtest, feature_index, *, treatment_cost=0,
+                            max_depth=3, min_samples_leaf=2, min_value_increase=1e-4, alpha=.1):
+        """
+        Get a recommended policy tree in graphviz format as a string.
+
+        Parameters
+        ----------
+        X : array-like
+            Features
+        feature_index
+            Index of the feature to be considered as treament
+        treatment_cost : int, or array-like of same length as number of rows of X, optional (default=0)
+            Cost of treatment, or cost of treatment for each sample
+        max_depth : int, optional (default=3)
+            maximum depth of the tree
+        min_samples_leaf : int, optional (default=2)
+            minimum number of samples on each leaf
+        min_value_increase : float, optional (default=1e-4)
+            The minimum increase in the policy value that a split needs to create to construct it
+        alpha : float in [0, 1], optional (default=.1)
+            Confidence level of the confidence intervals displayed in the leaf nodes.
+            A (1-alpha)*100% confidence interval is displayed.
+
+        Returns
+        -------
+        tree : string
+            The policy tree represented as a graphviz string
+        """
+
+        intrp, feature_names, treatment_names = self._tree(True, Xtest, feature_index,
+                                                           treatment_cost=treatment_cost,
+                                                           max_depth=max_depth,
+                                                           min_samples_leaf=min_samples_leaf,
+                                                           min_impurity_decrease=min_value_increase,
+                                                           alpha=alpha)
+        return intrp.export_graphviz(feature_names=feature_names,
+                                     treatment_names=treatment_names)
+
+    # TODO: it seems like it would be better to just return the tree itself rather than plot it;
+    #       however, the tree can't store the feature and treatment names we compute here...
+    def plot_heterogeneity_tree(self, Xtest, feature_index, *,
+                                max_depth=3, min_samples_leaf=2, min_impurity_decrease=1e-4,
+                                alpha=.1):
+        """
+        Plot an effect hetergoeneity tree using matplotlib.
+
+        Parameters
+        ----------
+        X : array-like
+            Features
+        feature_index
+            Index of the feature to be considered as treament
+        max_depth : int, optional (default=3)
+            maximum depth of the tree
+        min_samples_leaf : int, optional (default=2)
+            minimum number of samples on each leaf
+        min_impurity_decrease : float, optional (default=1e-4)
+            The minimum decrease in the impurity/uniformity of the causal effect that a split needs to
+            achieve to construct it
+        alpha : float in [0, 1], optional (default=.1)
+            Confidence level of the confidence intervals displayed in the leaf nodes.
+            A (1-alpha)*100% confidence interval is displayed.
+        """
+
+        intrp, feature_names, treatment_names = self._tree(False, Xtest, feature_index,
+                                                           max_depth=max_depth,
+                                                           min_samples_leaf=min_samples_leaf,
+                                                           min_impurity_decrease=min_impurity_decrease,
+                                                           alpha=alpha)
+        return intrp.plot(feature_names=feature_names,
+                          treatment_names=treatment_names)
+
+    def _heterogeneity_tree_string(self, Xtest, feature_index, *,
+                                   max_depth=3, min_samples_leaf=2, min_impurity_decrease=1e-4,
+                                   alpha=.1):
+        """
+        Get an effect hetergoeneity tree in graphviz format as a string.
+
+        Parameters
+        ----------
+        X : array-like
+            Features
+        feature_index
+            Index of the feature to be considered as treament
+        max_depth : int, optional (default=3)
+            maximum depth of the tree
+        min_samples_leaf : int, optional (default=2)
+            minimum number of samples on each leaf
+        min_impurity_decrease : float, optional (default=1e-4)
+            The minimum decrease in the impurity/uniformity of the causal effect that a split needs to
+            achieve to construct it
+        alpha : float in [0, 1], optional (default=.1)
+            Confidence level of the confidence intervals displayed in the leaf nodes.
+            A (1-alpha)*100% confidence interval is displayed.
+        """
+
+        intrp, feature_names, treatment_names = self._tree(False, Xtest, feature_index,
+                                                           max_depth=max_depth,
+                                                           min_samples_leaf=min_samples_leaf,
+                                                           min_impurity_decrease=min_impurity_decrease,
+                                                           alpha=alpha)
+        return intrp.export_graphviz(feature_names=feature_names,
+                                     treatment_names=treatment_names)
diff --git a/econml/tests/test_causal_analysis.py b/econml/tests/test_causal_analysis.py
new file mode 100644
index 000000000..bd1fc5096
--- /dev/null
+++ b/econml/tests/test_causal_analysis.py
@@ -0,0 +1,426 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import unittest
+import numpy as np
+import pandas as pd
+from contextlib import ExitStack
+from econml.solutions.causal_analysis import CausalAnalysis
+from econml.solutions.causal_analysis._causal_analysis import _CausalInsightsConstants
+
+
+class TestCausalAnalysis(unittest.TestCase):
+
+    def test_basic_array(self):
+        for d_y in [(), (1,)]:
+            for classification in [False, True]:
+                y = np.random.choice([0, 1], size=(500,) + d_y)
+                X = np.hstack((np.random.normal(size=(500, 2)),
+                               np.random.choice([0, 1], size=(500, 1)),
+                               np.random.choice([0, 1, 2], size=(500, 1))))
+                inds = [0, 1, 2, 3]
+                cats = [2, 3]
+                hinds = [0, 3]
+                ca = CausalAnalysis(inds, cats, hinds, classification=classification)
+                ca.fit(X, y)
+                glo = ca.global_causal_effect()
+                coh = ca.cohort_causal_effect(X[:2])
+                loc = ca.local_causal_effect(X[:2])
+
+                # global and cohort data should have exactly the same structure, but different values
+                assert glo.index.equals(coh.index)
+
+                # local index should have as many times entries as global as there were rows passed in
+                assert len(loc.index) == 2 * len(glo.index)
+
+                assert glo.index.names == ['feature', 'feature_value']
+                assert loc.index.names == ['sample'] + glo.index.names
+
+                glo_dict = ca._global_causal_effect_dict()
+                coh_dict = ca._cohort_causal_effect_dict(X[:2])
+                loc_dict = ca._local_causal_effect_dict(X[:2])
+
+                glo_point_est = np.array(glo_dict[_CausalInsightsConstants.PointEstimateKey])
+                coh_point_est = np.array(coh_dict[_CausalInsightsConstants.PointEstimateKey])
+                loc_point_est = np.array(loc_dict[_CausalInsightsConstants.PointEstimateKey])
+
+                ca._policy_tree_string(X, 1)
+                ca._heterogeneity_tree_string(X, 1)
+                ca._heterogeneity_tree_string(X, 3)
+
+                # Can't handle multi-dimensional treatments
+                with self.assertRaises(AssertionError):
+                    ca._policy_tree_string(X, 3)
+
+                # global shape is (d_y, sum(d_t))
+                assert glo_point_est.shape == coh_point_est.shape == (1, 5)
+                assert loc_point_est.shape == (2,) + glo_point_est.shape
+                if not classification:
+                    # ExitStack can be used as a "do nothing" ContextManager
+                    cm = ExitStack()
+                else:
+                    cm = self.assertRaises(Exception)
+                with cm:
+                    inf = ca.whatif(X[:2], np.ones(shape=(2,)), 1, y[:2])
+                    assert np.shape(inf.point_estimate) == np.shape(y[:2])
+                    inf.summary_frame()
+                    inf = ca.whatif(X[:2], np.ones(shape=(2,)), 2, y[:2])
+                    assert np.shape(inf.point_estimate) == np.shape(y[:2])
+                    inf.summary_frame()
+
+                    ca._whatif_dict(X[:2], np.ones(shape=(2,)), 1, y[:2])
+
+                # features; for categoricals they should appear #cats-1 times each
+                fts = ['x0', 'x1', 'x2', 'x3', 'x3']
+
+                for i in range(len(fts)):
+                    assert fts[i] == glo.index[i][0] == loc.index[i][1] == loc.index[len(fts) + i][1]
+
+                badargs = [
+                    (inds, cats, [4]),  # hinds out of range
+                    (inds, cats, ["test"])  # hinds out of range
+                ]
+
+                for args in badargs:
+                    with self.assertRaises(Exception):
+                        ca = CausalAnalysis(*args)
+                        ca.fit(X, y)
+
+    def test_basic_pandas(self):
+        for classification in [False, True]:
+            y = pd.Series(np.random.choice([0, 1], size=(500,)))
+            X = pd.DataFrame({'a': np.random.normal(size=500),
+                              'b': np.random.normal(size=500),
+                              'c': np.random.choice([0, 1], size=500),
+                              'd': np.random.choice(['a', 'b', 'c'], size=500)})
+            n_inds = [0, 1, 2, 3]
+            t_inds = ['a', 'b', 'c', 'd']
+            n_cats = [2, 3]
+            t_cats = ['c', 'd']
+            n_hinds = [0, 3]
+            t_hinds = ['a', 'd']
+            for (inds, cats, hinds) in [(n_inds, n_cats, n_hinds), (t_inds, t_cats, t_hinds)]:
+                ca = CausalAnalysis(inds, cats, hinds, classification=classification)
+                ca.fit(X, y)
+                glo = ca.global_causal_effect()
+                coh = ca.cohort_causal_effect(X[:2])
+                loc = ca.local_causal_effect(X[:2])
+
+                # global and cohort data should have exactly the same structure, but different values
+                assert glo.index.equals(coh.index)
+
+                # local index should have as many times entries as global as there were rows passed in
+                assert len(loc.index) == 2 * len(glo.index)
+
+                assert glo.index.names == ['feature', 'feature_value']
+                assert loc.index.names == ['sample'] + glo.index.names
+
+                # features; for categoricals they should appear #cats-1 times each
+                fts = ['a', 'b', 'c', 'd', 'd']
+
+                for i in range(len(fts)):
+                    assert fts[i] == glo.index[i][0] == loc.index[i][1] == loc.index[len(fts) + i][1]
+
+                glo_dict = ca._global_causal_effect_dict()
+                coh_dict = ca._cohort_causal_effect_dict(X[:2])
+                loc_dict = ca._local_causal_effect_dict(X[:2])
+
+                glo_point_est = np.array(glo_dict[_CausalInsightsConstants.PointEstimateKey])
+                coh_point_est = np.array(coh_dict[_CausalInsightsConstants.PointEstimateKey])
+                loc_point_est = np.array(loc_dict[_CausalInsightsConstants.PointEstimateKey])
+
+                # global shape is (d_y, sum(d_t))
+                assert glo_point_est.shape == coh_point_est.shape == (1, 5)
+                assert loc_point_est.shape == (2,) + glo_point_est.shape
+
+                ca._policy_tree_string(X, inds[1])
+                ca._heterogeneity_tree_string(X, inds[1])
+                ca._heterogeneity_tree_string(X, inds[3])
+
+                # Can't handle multi-dimensional treatments
+                with self.assertRaises(AssertionError):
+                    ca._policy_tree_string(X, inds[3])
+
+                if not classification:
+                    # ExitStack can be used as a "do nothing" ContextManager
+                    cm = ExitStack()
+                else:
+                    cm = self.assertRaises(Exception)
+                with cm:
+                    inf = ca.whatif(X[:2], np.ones(shape=(2,)), inds[1], y[:2])
+                    assert np.shape(inf.point_estimate) == np.shape(y[:2])
+                    inf.summary_frame()
+                    inf = ca.whatif(X[:2], np.ones(shape=(2,)), inds[2], y[:2])
+                    assert np.shape(inf.point_estimate) == np.shape(y[:2])
+                    inf.summary_frame()
+
+                    ca._whatif_dict(X[:2], np.ones(shape=(2,)), inds[1], y[:2])
+
+            badargs = [
+                (n_inds, n_cats, [4]),  # hinds out of range
+                (n_inds, n_cats, ["test"])  # hinds out of range
+            ]
+
+            for args in badargs:
+                with self.assertRaises(Exception):
+                    ca = CausalAnalysis(*args)
+                    ca.fit(X, y)
+
+    def test_automl_first_stage(self):
+        d_y = (1,)
+        for classification in [False, True]:
+            y = np.random.choice([0, 1], size=(500,) + d_y)
+            X = np.hstack((np.random.normal(size=(500, 2)),
+                           np.random.choice([0, 1], size=(500, 1)),
+                           np.random.choice([0, 1, 2], size=(500, 1))))
+            inds = [0, 1, 2, 3]
+            cats = [2, 3]
+            hinds = [0, 3]
+            ca = CausalAnalysis(inds, cats, hinds, classification=classification, nuisance_models='automl')
+            ca.fit(X, y)
+            glo = ca.global_causal_effect()
+            coh = ca.cohort_causal_effect(X[:2])
+            loc = ca.local_causal_effect(X[:2])
+
+            # global and cohort data should have exactly the same structure, but different values
+            assert glo.index.equals(coh.index)
+
+            # local index should have as many times entries as global as there were rows passed in
+            assert len(loc.index) == 2 * len(glo.index)
+
+            assert glo.index.names == ['feature', 'feature_value']
+            assert loc.index.names == ['sample'] + glo.index.names
+
+            glo_dict = ca._global_causal_effect_dict()
+            coh_dict = ca._cohort_causal_effect_dict(X[:2])
+            loc_dict = ca._local_causal_effect_dict(X[:2])
+
+            glo_point_est = np.array(glo_dict[_CausalInsightsConstants.PointEstimateKey])
+            coh_point_est = np.array(coh_dict[_CausalInsightsConstants.PointEstimateKey])
+            loc_point_est = np.array(loc_dict[_CausalInsightsConstants.PointEstimateKey])
+
+            ca._policy_tree_string(X, 1)
+            ca._heterogeneity_tree_string(X, 1)
+            ca._heterogeneity_tree_string(X, 3)
+
+            # Can't handle multi-dimensional treatments
+            with self.assertRaises(AssertionError):
+                ca._policy_tree_string(X, 3)
+
+            # global shape is (d_y, sum(d_t))
+            assert glo_point_est.shape == coh_point_est.shape == (1, 5)
+            assert loc_point_est.shape == (2,) + glo_point_est.shape
+            if not classification:
+                # ExitStack can be used as a "do nothing" ContextManager
+                cm = ExitStack()
+            else:
+                cm = self.assertRaises(Exception)
+            with cm:
+                inf = ca.whatif(X[:2], np.ones(shape=(2,)), 1, y[:2])
+                assert np.shape(inf.point_estimate) == np.shape(y[:2])
+                inf.summary_frame()
+                inf = ca.whatif(X[:2], np.ones(shape=(2,)), 2, y[:2])
+                assert np.shape(inf.point_estimate) == np.shape(y[:2])
+                inf.summary_frame()
+
+                ca._whatif_dict(X[:2], np.ones(shape=(2,)), 1, y[:2])
+
+            # features; for categoricals they should appear #cats-1 times each
+            fts = ['x0', 'x1', 'x2', 'x3', 'x3']
+
+            for i in range(len(fts)):
+                assert fts[i] == glo.index[i][0] == loc.index[i][1] == loc.index[len(fts) + i][1]
+
+            badargs = [
+                (inds, cats, [4]),  # hinds out of range
+                (inds, cats, ["test"])  # hinds out of range
+            ]
+
+            for args in badargs:
+                with self.assertRaises(Exception):
+                    ca = CausalAnalysis(*args)
+                    ca.fit(X, y)
+
+    def test_one_feature(self):
+        # make sure we don't run into problems dropping every index
+        y = pd.Series(np.random.choice([0, 1], size=(500,)))
+        X = pd.DataFrame({'a': np.random.normal(size=500),
+                          'b': np.random.normal(size=500),
+                          'c': np.random.choice([0, 1], size=500),
+                          'd': np.random.choice(['a', 'b', 'c'], size=500)})
+        inds = ['a']
+        cats = ['c', 'd']
+        hinds = ['a', 'd']
+
+        ca = CausalAnalysis(inds, cats, hinds, classification=False)
+        ca.fit(X, y)
+        glo = ca.global_causal_effect()
+        coh = ca.cohort_causal_effect(X[:2])
+        loc = ca.local_causal_effect(X[:2])
+
+        # global and cohort data should have exactly the same structure, but different values
+        assert glo.index.equals(coh.index)
+
+        # local index should have as many times entries as global as there were rows passed in
+        assert len(loc.index) == 2 * len(glo.index)
+
+        assert glo.index.names == ['feature']
+        assert loc.index.names == ['sample']
+
+        glo_dict = ca._global_causal_effect_dict()
+        coh_dict = ca._cohort_causal_effect_dict(X[:2])
+        loc_dict = ca._local_causal_effect_dict(X[:2])
+
+        glo_point_est = np.array(glo_dict[_CausalInsightsConstants.PointEstimateKey])
+        coh_point_est = np.array(coh_dict[_CausalInsightsConstants.PointEstimateKey])
+        loc_point_est = np.array(loc_dict[_CausalInsightsConstants.PointEstimateKey])
+
+        # global shape is (d_y, sum(d_t))
+        assert glo_point_est.shape == coh_point_est.shape == (1, 1)
+        assert loc_point_est.shape == (2,) + glo_point_est.shape
+
+        ca._policy_tree_string(X, inds[0])
+        ca._heterogeneity_tree_string(X, inds[0])
+
+    def test_final_models(self):
+        d_y = (1,)
+        y = np.random.choice([0, 1], size=(500,) + d_y)
+        X = np.hstack((np.random.normal(size=(500, 2)),
+                       np.random.choice([0, 1], size=(500, 1)),
+                       np.random.choice([0, 1, 2], size=(500, 1))))
+        inds = [0, 1, 2, 3]
+        cats = [2, 3]
+        hinds = [0, 3]
+        for h_model in ['forest', 'linear']:
+            for classification in [False, True]:
+                ca = CausalAnalysis(inds, cats, hinds, classification=classification, heterogeneity_model=h_model)
+                ca.fit(X, y)
+                glo = ca.global_causal_effect()
+                coh = ca.cohort_causal_effect(X[:2])
+                loc = ca.local_causal_effect(X[:2])
+                glo_dict = ca._global_causal_effect_dict()
+                coh_dict = ca._cohort_causal_effect_dict(X[:2])
+                loc_dict = ca._local_causal_effect_dict(X[:2])
+
+                ca._policy_tree_string(X, 1)
+                ca._heterogeneity_tree_string(X, 1)
+                ca._heterogeneity_tree_string(X, 3)
+
+                # Can't handle multi-dimensional treatments
+                with self.assertRaises(AssertionError):
+                    ca._policy_tree_string(X, 3)
+
+                if not classification:
+                    # ExitStack can be used as a "do nothing" ContextManager
+                    cm = ExitStack()
+                else:
+                    cm = self.assertRaises(Exception)
+                with cm:
+                    inf = ca.whatif(X[:2], np.ones(shape=(2,)), 1, y[:2])
+                    inf.summary_frame()
+                    inf = ca.whatif(X[:2], np.ones(shape=(2,)), 2, y[:2])
+                    inf.summary_frame()
+
+                    ca._whatif_dict(X[:2], np.ones(shape=(2,)), 1, y[:2])
+
+        with self.assertRaises(AssertionError):
+            ca = CausalAnalysis(inds, cats, hinds, classification=classification, heterogeneity_model='other')
+            ca.fit(X, y)
+
+    def test_forest_with_pandas(self):
+        y = pd.Series(np.random.choice([0, 1], size=(500,)))
+        X = pd.DataFrame({'a': np.random.normal(size=500),
+                          'b': np.random.normal(size=500),
+                          'c': np.random.choice([0, 1], size=500),
+                          'd': np.random.choice(['a', 'b', 'c'], size=500)})
+        inds = ['a', 'b', 'c', 'd']
+        cats = ['c', 'd']
+        hinds = ['a', 'd']
+
+        ca = CausalAnalysis(inds, cats, hinds, heterogeneity_model='forest')
+        ca.fit(X, y)
+        glo = ca.global_causal_effect()
+        coh = ca.cohort_causal_effect(X[:2])
+        loc = ca.local_causal_effect(X[:2])
+
+        # global and cohort data should have exactly the same structure, but different values
+        assert glo.index.equals(coh.index)
+
+        # local index should have as many times entries as global as there were rows passed in
+        assert len(loc.index) == 2 * len(glo.index)
+
+        assert glo.index.names == ['feature', 'feature_value']
+        assert loc.index.names == ['sample'] + glo.index.names
+
+        # features; for categoricals they should appear #cats-1 times each
+        fts = ['a', 'b', 'c', 'd', 'd']
+
+        for i in range(len(fts)):
+            assert fts[i] == glo.index[i][0] == loc.index[i][1] == loc.index[len(fts) + i][1]
+
+        glo_dict = ca._global_causal_effect_dict()
+        coh_dict = ca._cohort_causal_effect_dict(X[:2])
+        loc_dict = ca._local_causal_effect_dict(X[:2])
+
+        glo_point_est = np.array(glo_dict[_CausalInsightsConstants.PointEstimateKey])
+        coh_point_est = np.array(coh_dict[_CausalInsightsConstants.PointEstimateKey])
+        loc_point_est = np.array(loc_dict[_CausalInsightsConstants.PointEstimateKey])
+
+        # global shape is (d_y, sum(d_t))
+        assert glo_point_est.shape == coh_point_est.shape == (1, 5)
+        assert loc_point_est.shape == (2,) + glo_point_est.shape
+
+        ca._policy_tree_string(X, inds[1])
+        ca._heterogeneity_tree_string(X, inds[1])
+        ca._heterogeneity_tree_string(X, inds[3])
+
+        # Can't handle multi-dimensional treatments
+        with self.assertRaises(AssertionError):
+            ca._policy_tree_string(X, inds[3])
+
+    def test_warm_start(self):
+        for classification in [True, False]:
+            # dgp
+            X1 = np.random.normal(0, 1, size=(500, 5))
+            X2 = np.random.choice([0, 1], size=(500, 1))
+            X3 = np.random.choice([0, 1, 2], size=(500, 1))
+            X = np.hstack((X1, X2, X3))
+            X_df = pd.DataFrame(X, columns=[f"x{i} "for i in range(7)])
+            y = np.random.choice([0, 1], size=(500,))
+            y_df = pd.Series(y)
+            # model
+            hetero_inds = [0, 1, 2]
+            feat_inds = [1, 3, 5]
+            categorical = [5, 6]
+            ca = CausalAnalysis(feat_inds, categorical, heterogeneity_inds=hetero_inds,
+                                classification=classification,
+                                nuisance_models='linear', heterogeneity_model="linear", n_jobs=-1)
+            ca.fit(X_df, y)
+            eff = ca.global_causal_effect(alpha=0.05)
+            eff = ca.local_causal_effect(X_df, alpha=0.05)
+
+            ca.feature_inds = [1, 2, 3, 5]
+            ca.fit(X_df, y, warm_start=True)
+            eff = ca.global_causal_effect(alpha=0.05)
+            eff = ca.local_causal_effect(X_df, alpha=0.05)
+
+    def test_empty_hinds(self):
+        for h_model in ['linear', 'forest']:
+            for classification in [True, False]:
+                X1 = np.random.normal(0, 1, size=(500, 5))
+                X2 = np.random.choice([0, 1], size=(500, 1))
+                X3 = np.random.choice([0, 1, 2], size=(500, 1))
+                X = np.hstack((X1, X2, X3))
+                X_df = pd.DataFrame(X, columns=[f"x{i} "for i in range(7)])
+                y = np.random.choice([0, 1], size=(500,))
+                y_df = pd.Series(y)
+                # model
+                hetero_inds = [[], [], []]
+                feat_inds = [1, 3, 5]
+                categorical = [5, 6]
+                ca = CausalAnalysis(feat_inds, categorical, heterogeneity_inds=hetero_inds,
+                                    classification=classification,
+                                    nuisance_models='linear', heterogeneity_model="linear", n_jobs=-1)
+                ca.fit(X_df, y)
+                eff = ca.global_causal_effect(alpha=0.05)
+                eff = ca.local_causal_effect(X_df, alpha=0.05)
diff --git a/econml/tests/test_inference.py b/econml/tests/test_inference.py
index e9019ce18..75c1ba2e9 100644
--- a/econml/tests/test_inference.py
+++ b/econml/tests/test_inference.py
@@ -2,6 +2,7 @@
 # Licensed under the MIT License.
 
 import numpy as np
+import pandas as pd
 import unittest
 import pytest
 import pickle
@@ -413,6 +414,19 @@ def test_isolate_inferenceresult_from_estimator(self):
         new_coef = est.coef_
         np.testing.assert_array_equal(coef, new_coef)
 
+    def test_translte(self):
+        Y, T, X, W = TestInference.Y, TestInference.T, TestInference.X, TestInference.W
+        for offset in [10, pd.Series(np.arange(TestInference.X.shape[0]))]:
+            for inf in ['auto', BootstrapInference(n_bootstrap_samples=5)]:
+                est = LinearDML().fit(Y, T, X=X, W=W, inference=inf)
+                inf = est.const_marginal_effect_inference(X)
+                pred, bounds, summary = inf.point_estimate, inf.conf_int(), inf.summary_frame()
+                inf.translate(offset)
+                pred2, bounds2, summary2 = inf.point_estimate, inf.conf_int(), inf.summary_frame()
+                np.testing.assert_array_equal(pred + offset, pred2)
+                np.testing.assert_array_almost_equal(bounds[0] + offset, bounds2[0])
+                np.testing.assert_array_almost_equal(bounds[1] + offset, bounds2[1])
+
     class _NoFeatNamesEst:
         def __init__(self, cate_est):
             self.cate_est = clone(cate_est, safe=False)
diff --git a/setup.cfg b/setup.cfg
index e8bccee72..024a86b5a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -42,6 +42,7 @@ install_requires =
     pandas
     shap ~= 0.38.1
     dowhy
+    lightgbm
 test_suite = econml.tests
 tests_require =
     pytest
@@ -51,7 +52,6 @@ tests_require =
     nbconvert < 6
     nbformat
     seaborn
-    lightgbm
     xgboost
     tqdm
     jupyter-client <= 6.1.12