py-why · heimengqi · Nov 13, 2019 · Nov 7, 2019 · Nov 7, 2019 · Nov 8, 2019
diff --git a/econml/metalearners.py b/econml/metalearners.py
@@ -25,7 +25,7 @@ class TLearner(TreatmentExpansionMixin, LinearCateEstimator):
     ----------
     models : outcome estimators for both control units and treatment units
         It can be a single estimator applied to all the control and treatment units or a tuple/list of
-        estimators with the same number of treatments (include control).
+        estimators with one estimator per treatment (including control).
         Must implement `fit` and `predict` methods.
 
     """
@@ -42,7 +42,7 @@ def __init__(self, models):
         super().__init__()
 
     @BaseCateEstimator._wrap_fit
-    def fit(self, Y, T, X, inference=None):
+    def fit(self, Y, T, X, *, inference=None):
         """Build an instance of TLearner.
 
         Parameters
@@ -70,11 +70,10 @@ def fit(self, Y, T, X, inference=None):
         Y, T, X, _ = check_inputs(Y, T, X, multi_output_T=False)
         T = self._label_encoder.fit_transform(T)
         self._one_hot_encoder.fit(T.reshape(-1, 1))
-        self._d_t = len(self._label_encoder.classes_)
-        self._d_y = Y.shape[1:]
-        self.models = check_models(self.models, self._d_t)
+        self._d_t = len(self._label_encoder.classes_) - 1
+        self.models = check_models(self.models, self._d_t + 1)
 
-        for ind in range(self._d_t):
+        for ind in range(self._d_t + 1):
             self.models[ind].fit(X[T == ind], Y[T == ind])
 
     def const_marginal_effect(self, X):
@@ -87,17 +86,17 @@ def const_marginal_effect(self, X):
 
         Returns
         -------
-        τ_hat : matrix, shape (m, d_y, n_t-1), n_t is number of treatments
+        τ_hat : matrix, shape (m, d_y, d_t)
             Constant marginal CATE of each treatment on each outcome for each sample X[i].
             Note that when Y is a vector rather than a 2-dimensional array,
             the corresponding singleton dimensions in the output will be collapsed
         """
         # Check inputs
         X = check_array(X)
         taus = []
-        for ind in range(1, self._d_t):
-            taus.append(self.models[ind].predict(X) - self.models[0].predict(X))
-        taus = np.column_stack(taus).reshape((-1, self._d_t - 1,) + self._d_y)  # shape as of m*d_t*d_y
+        for ind in range(self._d_t):
+            taus.append(self.models[ind + 1].predict(X) - self.models[0].predict(X))
+        taus = np.column_stack(taus).reshape((-1, self._d_t,) + self._d_y)  # shape as of m*d_t*d_y
         if self._d_y:
             taus = transpose(taus, (0, 2, 1))  # shape as of m*d_y*d_t
         return taus
@@ -126,7 +125,7 @@ def __init__(self, overall_model):
         super().__init__()
 
     @BaseCateEstimator._wrap_fit
-    def fit(self, Y, T, X, inference=None):
+    def fit(self, Y, T, X=None, *, inference=None):
         """Build an instance of SLearner.
 
         Parameters
@@ -138,7 +137,7 @@ def fit(self, Y, T, X, inference=None):
             Treatment policy. Only binary treatments are accepted as input.
             T will be flattened if shape is (n, 1).
 
-        X : array-like, shape (n, d_x)
+        X : array-like, shape (n, d_x), optional
             Feature vector that captures heterogeneity.
 
         inference: string, `Inference` instance, or None
@@ -150,39 +149,42 @@ def fit(self, Y, T, X, inference=None):
         self : an instance of self.
         """
         # Check inputs
+        if X is None:
+            X = np.ones((Y.shape[0], 1))
-            X = np.ones((Y.shape[0], 1))
+            X = np.empty((Y.shape[0], 0))
-            X = np.ones((Y.shape[0], 1))
+            X = np.empty((Y.shape[0], 0))
         Y, T, X, _ = check_inputs(Y, T, X, multi_output_T=False)
         T = self._label_encoder.fit_transform(T)
         T = self._one_hot_encoder.fit_transform(T.reshape(-1, 1))
-        self._d_t = T.shape[1:]
-        self._d_y = Y.shape[1:]
+        self._d_t = T.shape[1] - 1
         feat_arr = np.concatenate((X, T), axis=1)
         self.overall_model.fit(feat_arr, Y)
 
-    def const_marginal_effect(self, X):
+    def const_marginal_effect(self, X=None):
         """Calculate the constant marginal treatment effect on a vector of features for each sample.
 
         Parameters
         ----------
-        X : matrix, shape (m × dₓ)
+        X : matrix, shape (m × dₓ), optional
             Matrix of features for each sample.
 
         Returns
         -------
-        τ_hat : matrix, shape (m, d_y, n_t-1), n_t is number of treatments
+        τ_hat : matrix, shape (m, d_y, d_t)
             Constant marginal CATE of each treatment on each outcome for each sample X[i].
             Note that when Y is a vector rather than a 2-dimensional array,
             the corresponding singleton dimensions in the output will be collapsed
         """
         # Check inputs
+        if X is None:
+            X = np.ones((1, 1))
         X = check_array(X)
-        Xs, Ts = broadcast_unit_treatments(X, self._d_t[0])
+        Xs, Ts = broadcast_unit_treatments(X, self._d_t + 1)
         feat_arr = np.concatenate((Xs, Ts), axis=1)
-        prediction = self.overall_model.predict(feat_arr).reshape((-1,) + self._d_t + self._d_y)
+        prediction = self.overall_model.predict(feat_arr).reshape((-1, self._d_t + 1,) + self._d_y)
         if self._d_y:
             prediction = transpose(prediction, (0, 2, 1))
-            taus = (prediction - np.repeat(prediction[:, :, 0], self._d_t[0]).reshape(prediction.shape))[:, :, 1:]
+            taus = (prediction - np.repeat(prediction[:, :, 0], self._d_t + 1).reshape(prediction.shape))[:, :, 1:]
         else:
-            taus = (prediction - np.repeat(prediction[:, 0], self._d_t[0]).reshape(prediction.shape))[:, 1:]
+            taus = (prediction - np.repeat(prediction[:, 0], self._d_t + 1).reshape(prediction.shape))[:, 1:]
         return taus
 
 
@@ -194,13 +196,13 @@ class XLearner(TreatmentExpansionMixin, LinearCateEstimator):
     ----------
     models : outcome estimators for both control units and treatment units
         It can be a single estimator applied to all the control and treatment units or a tuple/list of
-        estimators with the same number of treatments (include control).
+        estimators with one estimator per treatment (including control).
         Must implement `fit` and `predict` methods.
 
     cate_models : estimator for pseudo-treatment effects on control and treatments
         It can be a single estimator applied to all the control and treatments or a tuple/list of
-        estimators with the same number of treatments (include control).
-        If None, it will be same models with outcome estimators.
+        estimators with one estimator per treatment (including control).
+        If None, it will be same models as the outcome estimators.
         Must implement `fit` and `predict` methods.
 
     propensity_model : estimator for the propensity function
@@ -224,7 +226,7 @@ def __init__(self, models,
         super().__init__()
 
     @BaseCateEstimator._wrap_fit
-    def fit(self, Y, T, X, inference=None):
+    def fit(self, Y, T, X, *, inference=None):
         """Build an instance of XLearner.
 
         Parameters
@@ -251,31 +253,30 @@ def fit(self, Y, T, X, inference=None):
         Y, T, X, _ = check_inputs(Y, T, X, multi_output_T=False)
         T = self._label_encoder.fit_transform(T)
         self._one_hot_encoder.fit(T.reshape(-1, 1))
-        self._d_t = len(self._label_encoder.classes_)
-        self._d_y = Y.shape[1:]
-        self.models = check_models(self.models, self._d_t)
+        self._d_t = len(self._label_encoder.classes_) - 1
+        self.models = check_models(self.models, self._d_t + 1)
         if self.cate_models is None:
             self.cate_models = self.models
         else:
-            self.cate_models = check_models(self.cate_models, self._d_t)
+            self.cate_models = check_models(self.cate_models, self._d_t + 1)
         self.propensity_models = []
         self.cate_treated_models = []
         self.cate_controls_models = []
 
         # Estimate response function
-        for ind in range(self._d_t):
+        for ind in range(self._d_t + 1):
             self.models[ind].fit(X[T == ind], Y[T == ind])
-        for ind in range(1, self._d_t):
-            self.cate_treated_models.append(clone(self.cate_models[ind], safe=False))
+        for ind in range(self._d_t):
+            self.cate_treated_models.append(clone(self.cate_models[ind + 1], safe=False))
             self.cate_controls_models.append(clone(self.cate_models[0], safe=False))
             self.propensity_models.append(clone(self.propensity_model, safe=False))
-            imputed_effect_on_controls = self.models[ind].predict(X[T == 0]) - Y[T == 0]
-            imputed_effect_on_treated = Y[T == ind] - self.models[0].predict(X[T == ind])
-            self.cate_controls_models[ind - 1].fit(X[T == 0], imputed_effect_on_controls)
-            self.cate_treated_models[ind - 1].fit(X[T == ind], imputed_effect_on_treated)
-            X_concat = np.concatenate((X[T == 0], X[T == ind]), axis=0)
-            T_concat = np.concatenate((T[T == 0], T[T == ind]), axis=0)
-            self.propensity_models[ind - 1].fit(X_concat, T_concat)
+            imputed_effect_on_controls = self.models[ind + 1].predict(X[T == 0]) - Y[T == 0]
+            imputed_effect_on_treated = Y[T == ind + 1] - self.models[0].predict(X[T == ind + 1])
+            self.cate_controls_models[ind].fit(X[T == 0], imputed_effect_on_controls)
+            self.cate_treated_models[ind].fit(X[T == ind + 1], imputed_effect_on_treated)
+            X_concat = np.concatenate((X[T == 0], X[T == ind + 1]), axis=0)
+            T_concat = np.concatenate((T[T == 0], T[T == ind + 1]), axis=0)
+            self.propensity_models[ind].fit(X_concat, T_concat)
 
     def const_marginal_effect(self, X):
         """Calculate the constant marginal treatment effect on a vector of features for each sample.
@@ -287,20 +288,20 @@ def const_marginal_effect(self, X):
 
         Returns
         -------
-        τ_hat : matrix, shape (m, d_y, n_t-1), n_t is number of treatments
+        τ_hat : matrix, shape (m, d_y, d_t)
             Constant marginal CATE of each treatment on each outcome for each sample X[i].
             Note that when Y is a vector rather than a 2-dimensional array,
             the corresponding singleton dimensions in the output will be collapsed
         """
         X = check_array(X)
         m = X.shape[0]
         taus = []
-        for ind in range(self._d_t - 1):
+        for ind in range(self._d_t):
             propensity_scores = self.propensity_models[ind].predict_proba(X)[:, 1:]
             tau_hat = propensity_scores * self.cate_controls_models[ind].predict(X).reshape(m, -1) \
                 + (1 - propensity_scores) * self.cate_treated_models[ind].predict(X).reshape(m, -1)
             taus.append(tau_hat)
-        taus = np.column_stack(taus).reshape((-1, self._d_t - 1,) + self._d_y)  # shape as of m*d_t*d_y
+        taus = np.column_stack(taus).reshape((-1, self._d_t,) + self._d_y)  # shape as of m*d_t*d_y
         if self._d_y:
             taus = transpose(taus, (0, 2, 1))  # shape as of m*d_y*d_t
         return taus
@@ -314,13 +315,13 @@ class DomainAdaptationLearner(TreatmentExpansionMixin, LinearCateEstimator):
     ----------
     models : outcome estimators for both control units and treatment units
         It can be a single estimator applied to all the control and treatment units or a tuple/list of
-        estimators with the same number of treatments (include control).
+        estimators with one estimator per treatment (including control).
         Must implement `fit` and `predict` methods.
         The `fit` method must accept the `sample_weight` parameter.
 
     final_models : estimators for pseudo-treatment effects for each treatment
         It can be a single estimator applied to all the control and treatment units or a tuple/list of
-        estimators with the same number of treatments (exclude control).
+        estimators with ones estimator per treatments (excluding control).
         Must implement `fit` and `predict` methods.
 
     propensity_model : estimator for the propensity function
@@ -345,7 +346,7 @@ def __init__(self, models,
         super().__init__()
 
     @BaseCateEstimator._wrap_fit
-    def fit(self, Y, T, X, inference=None):
+    def fit(self, Y, T, X, *, inference=None):
         """Build an instance of DomainAdaptationLearner.
 
         Parameters
@@ -372,36 +373,36 @@ def fit(self, Y, T, X, inference=None):
         Y, T, X, _ = check_inputs(Y, T, X, multi_output_T=False)
         T = self._label_encoder.fit_transform(T)
         self._one_hot_encoder.fit(T.reshape(-1, 1))
-        self._d_t = len(self._label_encoder.classes_)
-        self._d_y = Y.shape[1:]
-        self.models = check_models(self.models, self._d_t)
-        self.final_models = check_models(self.final_models, self._d_t - 1)
+        self._d_t = len(self._label_encoder.classes_) - 1
+        self.models = check_models(self.models, self._d_t + 1)
+        self.final_models = check_models(self.final_models, self._d_t)
         self.propensity_models = []
         self.models_control = []
         self.models_treated = []
-        for ind in range(1, self._d_t):
+        for ind in range(self._d_t):
             self.models_control.append(clone(self.models[0], safe=False))
-            self.models_treated.append(clone(self.models[ind], safe=False))
+            self.models_treated.append(clone(self.models[ind + 1], safe=False))
             self.propensity_models.append(clone(self.propensity_model, safe=False))
 
-            X_concat = np.concatenate((X[T == 0], X[T == ind]), axis=0)
-            T_concat = np.concatenate((T[T == 0], T[T == ind]), axis=0)
-            self.propensity_models[ind - 1].fit(X_concat, T_concat)
-            pro_scores = self.propensity_models[ind - 1].predict_proba(X_concat)[:, 1]
+            X_concat = np.concatenate((X[T == 0], X[T == ind + 1]), axis=0)
+            T_concat = np.concatenate((T[T == 0], T[T == ind + 1]), axis=0)
+            self.propensity_models[ind].fit(X_concat, T_concat)
+            pro_scores = self.propensity_models[ind].predict_proba(X_concat)[:, 1]
 
             # Train model on controls. Assign higher weight to units resembling
             # treated units.
-            self._fit_weighted_pipeline(self.models_control[ind - 1], X[T == 0], Y[T == 0],
+            self._fit_weighted_pipeline(self.models_control[ind], X[T == 0], Y[T == 0],
                                         sample_weight=pro_scores[T_concat == 0] / (1 - pro_scores[T_concat == 0]))
             # Train model on the treated. Assign higher weight to units resembling
             # control units.
-            self._fit_weighted_pipeline(self.models_treated[ind - 1], X[T == ind], Y[T == ind],
-                                        sample_weight=(1 - pro_scores[T_concat == ind]) / pro_scores[T_concat == ind])
-            imputed_effect_on_controls = self.models_treated[ind - 1].predict(X[T == 0]) - Y[T == 0]
-            imputed_effect_on_treated = Y[T == ind] - self.models_control[ind - 1].predict(X[T == ind])
+            self._fit_weighted_pipeline(self.models_treated[ind], X[T == ind + 1], Y[T == ind + 1],
+                                        sample_weight=(1 - pro_scores[T_concat == ind + 1]) /
+                                        pro_scores[T_concat == ind + 1])
+            imputed_effect_on_controls = self.models_treated[ind].predict(X[T == 0]) - Y[T == 0]
+            imputed_effect_on_treated = Y[T == ind + 1] - self.models_control[ind].predict(X[T == ind + 1])
 
             imputed_effects_concat = np.concatenate((imputed_effect_on_controls, imputed_effect_on_treated), axis=0)
-            self.final_models[ind - 1].fit(X_concat, imputed_effects_concat)
+            self.final_models[ind].fit(X_concat, imputed_effects_concat)
 
     def const_marginal_effect(self, X):
         """Calculate the constant marginal treatment effect on a vector of features for each sample.
@@ -413,7 +414,7 @@ def const_marginal_effect(self, X):
 
         Returns
         -------
-        τ_hat : matrix, shape (m, d_y, n_t-1), n_t is number of treatments
+        τ_hat : matrix, shape (m, d_y, d_t)
             Constant marginal CATE of each treatment on each outcome for each sample X[i].
             Note that when Y is a vector rather than a 2-dimensional array,
             the corresponding singleton dimensions in the output will be collapsed
@@ -422,7 +423,7 @@ def const_marginal_effect(self, X):
         taus = []
         for model in self.final_models:
             taus.append(model.predict(X))
-        taus = np.column_stack(taus).reshape((-1, self._d_t - 1,) + self._d_y)  # shape as of m*d_t*d_y
+        taus = np.column_stack(taus).reshape((-1, self._d_t,) + self._d_y)  # shape as of m*d_t*d_y
         if self._d_y:
             taus = transpose(taus, (0, 2, 1))  # shape as of m*d_y*d_t
         return taus