From b756bb40671563227ff30fd3dd033836a3abf508 Mon Sep 17 00:00:00 2001
From: Keith Battocchi <kebatt@microsoft.com>
Date: Tue, 25 Feb 2020 17:52:47 -0500
Subject: [PATCH] Fix IntentToTreatDRIV feature names

---
 econml/ortho_iv.py            | 126 +++++++++++++++++++++++-----------
 econml/tests/test_ortho_iv.py |  49 ++++++-------
 2 files changed, 106 insertions(+), 69 deletions(-)

diff --git a/econml/ortho_iv.py b/econml/ortho_iv.py
index 3d0d94e2e..69f06d93c 100644
--- a/econml/ortho_iv.py
+++ b/econml/ortho_iv.py
@@ -718,6 +718,14 @@ class _BaseDRIV(_OrthoLearner):
     model_final : estimator
         model compatible with the sklearn regression API, used to fit the effect on X
 
+    featurizer : :term:`transformer`, optional, default None
+        Must support fit_transform and transform. Used to create composite features in the final CATE regression.
+        It is ignored if X is None. The final CATE will be trained on the outcome of featurizer.fit_transform(X).
+        If featurizer=None, then CATE is trained on X.
+
+    fit_cate_intercept : bool, optional, default True
+        Whether the linear CATE model should have a constant term.
+
     cov_clip : float, optional, default 0.1
         clipping of the covariate for regions with low "overlap", to reduce variance
 
@@ -762,6 +770,7 @@ def __init__(self,
                  nuisance_models,
                  model_final,
                  featurizer=None,
+                 fit_cate_intercept=True,
                  cov_clip=0.1, opt_reweighted=False,
                  discrete_instrument=False, discrete_treatment=False,
                  n_splits=2, random_state=None):
@@ -777,9 +786,21 @@ class ModelFinal:
             residual on residual regression.
             """
 
-            def __init__(self, model_final, featurizer):
+            def __init__(self, model_final, featurizer, fit_cate_intercept):
                 self._model_final = clone(model_final, safe=False)
-                self._featurizer = clone(featurizer, safe=False)
+                self._fit_cate_intercept = fit_cate_intercept
+                self._original_featurizer = clone(featurizer, safe=False)
+                if self._fit_cate_intercept:
+                    add_intercept = FunctionTransformer(lambda F:
+                                                        hstack([np.ones((F.shape[0], 1)), F]),
+                                                        validate=True)
+                    if featurizer:
+                        self._featurizer = Pipeline([('featurize', self._original_featurizer),
+                                                     ('add_intercept', add_intercept)])
+                    else:
+                        self._featurizer = add_intercept
+                else:
+                    self._featurizer = self._original_featurizer
 
             @staticmethod
             def _effect_estimate(nuisances):
@@ -851,10 +872,14 @@ def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None
                 if sample_weight is not None:
                     return np.mean(np.average((Y_res - Y_res_pred)**2, weights=sample_weight, axis=0))
                 else:
-                    return np.mean((Y_res - Y_res_pred)**2)
+                    return np.mean((Y_res - Y_res_pred) ** 2)
+
+        self.fit_cate_intercept = fit_cate_intercept
+        self.bias_part_of_coef = fit_cate_intercept
+
         self.cov_clip = cov_clip
         self.opt_reweighted = opt_reweighted
-        super().__init__(nuisance_models, ModelFinal(model_final, featurizer),
+        super().__init__(nuisance_models, ModelFinal(model_final, featurizer, fit_cate_intercept),
                          discrete_instrument=discrete_instrument, discrete_treatment=discrete_treatment,
                          n_splits=n_splits, random_state=random_state)
 
@@ -888,6 +913,10 @@ def fit(self, Y, T, Z, X=None, *, sample_weight=None, sample_var=None, inference
         return super().fit(Y, T, X=X, W=None, Z=Z,
                            sample_weight=sample_weight, sample_var=sample_var, inference=inference)
 
+    @property
+    def original_featurizer(self):
+        return super().model_final._original_featurizer
+
     @property
     def featurizer(self):
         # NOTE This is used by the inference methods and has to be the overall featurizer. intended
@@ -899,6 +928,30 @@ def model_final(self):
         # NOTE This is used by the inference methods and is more for internal use to the library
         return super().model_final._model_final
 
+    def cate_feature_names(self, input_feature_names=None):
+        """
+        Get the output feature names.
+
+        Parameters
+        ----------
+        input_feature_names: list of strings of length X.shape[1] or None
+            The names of the input features
+
+        Returns
+        -------
+        out_feature_names: list of strings or None
+            The names of the output features :math:`\\phi(X)`, i.e. the features with respect to which the
+            final constant marginal CATE model is linear. It is the names of the features that are associated
+            with each entry of the :meth:`coef_` parameter. Not available when the featurizer is not None and
+            does not have a method: `get_feature_names(input_feature_names)`. Otherwise None is returned.
+        """
+        if self.original_featurizer is None:
+            return input_feature_names
+        elif hasattr(self.original_featurizer, 'get_feature_names'):
+            return self.original_featurizer.get_feature_names(input_feature_names)
+        else:
+            raise AttributeError("Featurizer does not have a method: get_feature_names!")
+
 
 class _IntentToTreatDRIV(_BaseDRIV):
     """
@@ -909,6 +962,7 @@ def __init__(self, model_Y_X, model_T_XZ,
                  prel_model_effect,
                  model_effect,
                  featurizer=None,
+                 fit_cate_intercept=True,
                  cov_clip=.1,
                  n_splits=3,
                  opt_reweighted=False):
@@ -951,6 +1005,7 @@ def predict(self, Y, T, X=None, W=None, Z=None, sample_weight=None):
         # TODO: check that Y, T, Z do not have multiple columns
         super().__init__(ModelNuisance(model_Y_X, model_T_XZ, prel_model_effect), model_effect,
                          featurizer=featurizer,
+                         fit_cate_intercept=fit_cate_intercept,
                          cov_clip=cov_clip,
                          n_splits=n_splits,
                          discrete_instrument=True, discrete_treatment=True,
@@ -992,6 +1047,14 @@ class IntentToTreatDRIV(_IntentToTreatDRIV):
     final_model_effect : estimator, optional
         a final model for the CATE and projections. If None, then flexible_model_effect is also used as a final model
 
+    featurizer : :term:`transformer`, optional, default None
+        Must support fit_transform and transform. Used to create composite features in the final CATE regression.
+        It is ignored if X is None. The final CATE will be trained on the outcome of featurizer.fit_transform(X).
+        If featurizer=None, then CATE is trained on X.
+
+    fit_cate_intercept : bool, optional, default True
+        Whether the linear CATE model should have a constant term.
+
     cov_clip : float, optional, default 0.1
         clipping of the covariate for regions with low "overlap", to reduce variance
 
@@ -1025,6 +1088,7 @@ def __init__(self, model_Y_X, model_T_XZ,
                  flexible_model_effect,
                  final_model_effect=None,
                  featurizer=None,
+                 fit_cate_intercept=True,
                  cov_clip=.1,
                  n_splits=3,
                  opt_reweighted=False):
@@ -1040,10 +1104,19 @@ def __init__(self, model_Y_X, model_T_XZ,
         super().__init__(model_Y_X, model_T_XZ, prel_model_effect,
                          final_model_effect,
                          featurizer=featurizer,
+                         fit_cate_intercept=fit_cate_intercept,
                          cov_clip=cov_clip,
                          n_splits=n_splits,
                          opt_reweighted=opt_reweighted)
 
+    @property
+    def models_Y_X(self):
+        return [mdl._model_Y_X._model for mdl in super().models_nuisance]
+
+    @property
+    def models_T_XZ(self):
+        return [mdl._model_T_XZ._model for mdl in super().models_nuisance]
+
 
 class LinearIntentToTreatDRIV(StatsModelsCateEstimatorMixin, IntentToTreatDRIV):
     """
@@ -1060,6 +1133,14 @@ class LinearIntentToTreatDRIV(StatsModelsCateEstimatorMixin, IntentToTreatDRIV):
     flexible_model_effect : estimator
         a flexible model for a preliminary version of the CATE, must accept sample_weight at fit time.
 
+    featurizer : :term:`transformer`, optional, default None
+        Must support fit_transform and transform. Used to create composite features in the final CATE regression.
+        It is ignored if X is None. The final CATE will be trained on the outcome of featurizer.fit_transform(X).
+        If featurizer=None, then CATE is trained on X.
+
+    fit_cate_intercept : bool, optional, default True
+        Whether the linear CATE model should have a constant term.
+
     cov_clip : float, optional, default 0.1
         clipping of the covariate for regions with low "overlap", to reduce variance
 
@@ -1096,21 +1177,10 @@ def __init__(self, model_Y_X, model_T_XZ,
                  cov_clip=.1,
                  n_splits=3,
                  opt_reweighted=False):
-        self.fit_cate_intercept = fit_cate_intercept
-        self.bias_part_of_coef = fit_cate_intercept
-        self.original_featurizer = clone(featurizer, safe=False)
-        if fit_cate_intercept:
-            add_intercept = FunctionTransformer(lambda F:
-                                                hstack([np.ones((F.shape[0], 1)), F]),
-                                                validate=True)
-            if self.original_featurizer:
-                featurizer = Pipeline([('featurize', self.original_featurizer),
-                                       ('add_intercept', add_intercept)])
-            else:
-                featurizer = add_intercept
         super().__init__(model_Y_X, model_T_XZ,
                          flexible_model_effect=flexible_model_effect,
                          featurizer=featurizer,
+                         fit_cate_intercept=True,
                          final_model_effect=StatsModelsLinearRegression(fit_intercept=False),
                          cov_clip=cov_clip, n_splits=n_splits, opt_reweighted=opt_reweighted)
 
@@ -1143,27 +1213,3 @@ def fit(self, Y, T, Z, X=None, sample_weight=None, sample_var=None, inference=No
         self : instance
         """
         return super().fit(Y, T, Z, X=X, sample_weight=sample_weight, sample_var=sample_var, inference=inference)
-
-    def cate_feature_names(self, input_feature_names=None):
-        """
-        Get the output feature names.
-
-        Parameters
-        ----------
-        input_feature_names: list of strings of length X.shape[1] or None
-            The names of the input features
-
-        Returns
-        -------
-        out_feature_names: list of strings or None
-            The names of the output features :math:`\\phi(X)`, i.e. the features with respect to which the
-            final constant marginal CATE model is linear. It is the names of the features that are associated
-            with each entry of the :meth:`coef_` parameter. Not available when the featurizer is not None and
-            does not have a method: `get_feature_names(input_feature_names)`. Otherwise None is returned.
-        """
-        if self.original_featurizer is None:
-            return input_feature_names
-        elif hasattr(self.original_featurizer, 'get_feature_names'):
-            return self.original_featurizer.get_feature_names(input_feature_names)
-        else:
-            raise AttributeError("Featurizer does not have a method: get_feature_names!")
diff --git a/econml/tests/test_ortho_iv.py b/econml/tests/test_ortho_iv.py
index 1646867d3..cfe57930a 100644
--- a/econml/tests/test_ortho_iv.py
+++ b/econml/tests/test_ortho_iv.py
@@ -10,7 +10,7 @@
 from econml.ortho_iv import (DMLATEIV, ProjectedDMLATEIV, DMLIV, NonParamDMLIV,
                              IntentToTreatDRIV, LinearIntentToTreatDRIV)
 import numpy as np
-from econml.utilities import shape, hstack, vstack, reshape, cross_product
+from econml.utilities import shape, hstack, vstack, reshape, cross_product, StatsModelsLinearRegression
 from econml.inference import BootstrapInference
 from contextlib import ExitStack
 from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, GradientBoostingClassifier
@@ -265,45 +265,36 @@ def test_multidim_arrays_fail(self):
         with pytest.raises(AttributeError):
             est.fit(Y, T=two_class, Z=three_class)
 
-    # TODO: make IV related
     def test_access_to_internal_models(self):
         """
         Test that API related to accessing the nuisance models, cate_model and featurizer is working.
         """
-        from econml.dml import DMLCateEstimator
-
-        Y = np.array([2, 3, 1, 3, 2, 1, 1, 1])
-        T = np.array([3, 2, 1, 2, 1, 2, 1, 3])
-        X = np.ones((8, 1))
-        est = DMLCateEstimator(model_y=WeightedLasso(),
-                               model_t=LogisticRegression(),
-                               model_final=WeightedLasso(),
-                               featurizer=PolynomialFeatures(degree=2, include_bias=False),
-                               fit_cate_intercept=True,
-                               discrete_treatment=True)
-        est.fit(Y, T, X)
+        est = LinearIntentToTreatDRIV(LinearRegression(), LogisticRegression(C=1000), WeightedLasso(),
+                                      featurizer=PolynomialFeatures(degree=2, include_bias=False))
+        Y = np.array([1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2])
+        T = np.array([1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2])
+        Z = np.array([1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2])
+        X = np.array([1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6]).reshape(-1, 1)
+        est.fit(Y, T, Z, X=X)
         assert isinstance(est.original_featurizer, PolynomialFeatures)
         assert isinstance(est.featurizer, Pipeline)
-        assert isinstance(est.model_cate, WeightedLasso)
-        for mdl in est.models_y:
-            assert isinstance(mdl, WeightedLasso)
-        for mdl in est.models_t:
+        assert isinstance(est.model_final, StatsModelsLinearRegression)
+        for mdl in est.models_Y_X:
+            assert isinstance(mdl, LinearRegression)
+        for mdl in est.models_T_XZ:
             assert isinstance(mdl, LogisticRegression)
         np.testing.assert_array_equal(est.cate_feature_names(['A']), ['A', 'A^2'])
         np.testing.assert_array_equal(est.cate_feature_names(), ['x0', 'x0^2'])
-        est = DMLCateEstimator(model_y=WeightedLasso(),
-                               model_t=LogisticRegression(),
-                               model_final=WeightedLasso(),
-                               featurizer=None,
-                               fit_cate_intercept=True,
-                               discrete_treatment=True)
-        est.fit(Y, T, X)
+
+        est = LinearIntentToTreatDRIV(LinearRegression(), LogisticRegression(C=1000), WeightedLasso(),
+                                      featurizer=None)
+        est.fit(Y, T, Z, X=X)
         assert est.original_featurizer is None
         assert isinstance(est.featurizer, FunctionTransformer)
-        assert isinstance(est.model_cate, WeightedLasso)
-        for mdl in est.models_y:
-            assert isinstance(mdl, WeightedLasso)
-        for mdl in est.models_t:
+        assert isinstance(est.model_final, StatsModelsLinearRegression)
+        for mdl in est.models_Y_X:
+            assert isinstance(mdl, LinearRegression)
+        for mdl in est.models_T_XZ:
             assert isinstance(mdl, LogisticRegression)
         np.testing.assert_array_equal(est.cate_feature_names(['A']), ['A'])