Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support multi treatment in meta learners #141

Merged
merged 12 commits into from
Nov 13, 2019
Merged
127 changes: 64 additions & 63 deletions econml/metalearners.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class TLearner(TreatmentExpansionMixin, LinearCateEstimator):
----------
models : outcome estimators for both control units and treatment units
It can be a single estimator applied to all the control and treatment units or a tuple/list of
estimators with the same number of treatments (include control).
estimators with one estimator per treatment (including control).
Must implement `fit` and `predict` methods.

"""
Expand All @@ -42,7 +42,7 @@ def __init__(self, models):
super().__init__()

@BaseCateEstimator._wrap_fit
def fit(self, Y, T, X, inference=None):
def fit(self, Y, T, X, *, inference=None):
"""Build an instance of TLearner.

Parameters
Expand Down Expand Up @@ -70,11 +70,10 @@ def fit(self, Y, T, X, inference=None):
Y, T, X, _ = check_inputs(Y, T, X, multi_output_T=False)
T = self._label_encoder.fit_transform(T)
self._one_hot_encoder.fit(T.reshape(-1, 1))
self._d_t = len(self._label_encoder.classes_)
self._d_y = Y.shape[1:]
self.models = check_models(self.models, self._d_t)
self._d_t = len(self._label_encoder.classes_) - 1
heimengqi marked this conversation as resolved.
Show resolved Hide resolved
self.models = check_models(self.models, self._d_t + 1)

for ind in range(self._d_t):
for ind in range(self._d_t + 1):
self.models[ind].fit(X[T == ind], Y[T == ind])

def const_marginal_effect(self, X):
Expand All @@ -87,17 +86,17 @@ def const_marginal_effect(self, X):

Returns
-------
τ_hat : matrix, shape (m, d_y, n_t-1), n_t is number of treatments
τ_hat : matrix, shape (m, d_y, d_t)
Constant marginal CATE of each treatment on each outcome for each sample X[i].
Note that when Y is a vector rather than a 2-dimensional array,
the corresponding singleton dimensions in the output will be collapsed
"""
# Check inputs
X = check_array(X)
taus = []
for ind in range(1, self._d_t):
taus.append(self.models[ind].predict(X) - self.models[0].predict(X))
taus = np.column_stack(taus).reshape((-1, self._d_t - 1,) + self._d_y) # shape as of m*d_t*d_y
for ind in range(self._d_t):
taus.append(self.models[ind + 1].predict(X) - self.models[0].predict(X))
taus = np.column_stack(taus).reshape((-1, self._d_t,) + self._d_y) # shape as of m*d_t*d_y
if self._d_y:
taus = transpose(taus, (0, 2, 1)) # shape as of m*d_y*d_t
return taus
Expand Down Expand Up @@ -126,7 +125,7 @@ def __init__(self, overall_model):
super().__init__()

@BaseCateEstimator._wrap_fit
heimengqi marked this conversation as resolved.
Show resolved Hide resolved
def fit(self, Y, T, X, inference=None):
def fit(self, Y, T, X=None, *, inference=None):
"""Build an instance of SLearner.

Parameters
Expand All @@ -138,7 +137,7 @@ def fit(self, Y, T, X, inference=None):
Treatment policy. Only binary treatments are accepted as input.
T will be flattened if shape is (n, 1).

X : array-like, shape (n, d_x)
X : array-like, shape (n, d_x), optional
Feature vector that captures heterogeneity.

inference: string, `Inference` instance, or None
Expand All @@ -150,39 +149,42 @@ def fit(self, Y, T, X, inference=None):
self : an instance of self.
"""
# Check inputs
if X is None:
X = np.ones((Y.shape[0], 1))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[minor]
I think that in this case, the default could be a 0-column array rather than a column of ones (the columns from T will still be there):

Suggested change
X = np.ones((Y.shape[0], 1))
X = np.empty((Y.shape[0], 0))

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On the other hand, maybe it's silly to even allow X=None because there is no W (unlike DML)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. In the setting of Slearner, X = None is the same with learning the diff of mean(Y) in each class. I will keep it for now.

Y, T, X, _ = check_inputs(Y, T, X, multi_output_T=False)
T = self._label_encoder.fit_transform(T)
T = self._one_hot_encoder.fit_transform(T.reshape(-1, 1))
self._d_t = T.shape[1:]
self._d_y = Y.shape[1:]
self._d_t = T.shape[1] - 1
heimengqi marked this conversation as resolved.
Show resolved Hide resolved
feat_arr = np.concatenate((X, T), axis=1)
self.overall_model.fit(feat_arr, Y)

def const_marginal_effect(self, X):
def const_marginal_effect(self, X=None):
"""Calculate the constant marginal treatment effect on a vector of features for each sample.

Parameters
----------
X : matrix, shape (m × dₓ)
X : matrix, shape (m × dₓ), optional
Matrix of features for each sample.

Returns
-------
τ_hat : matrix, shape (m, d_y, n_t-1), n_t is number of treatments
τ_hat : matrix, shape (m, d_y, d_t)
Constant marginal CATE of each treatment on each outcome for each sample X[i].
Note that when Y is a vector rather than a 2-dimensional array,
the corresponding singleton dimensions in the output will be collapsed
"""
# Check inputs
if X is None:
X = np.ones((1, 1))
heimengqi marked this conversation as resolved.
Show resolved Hide resolved
X = check_array(X)
Xs, Ts = broadcast_unit_treatments(X, self._d_t[0])
Xs, Ts = broadcast_unit_treatments(X, self._d_t + 1)
feat_arr = np.concatenate((Xs, Ts), axis=1)
prediction = self.overall_model.predict(feat_arr).reshape((-1,) + self._d_t + self._d_y)
prediction = self.overall_model.predict(feat_arr).reshape((-1, self._d_t + 1,) + self._d_y)
if self._d_y:
prediction = transpose(prediction, (0, 2, 1))
taus = (prediction - np.repeat(prediction[:, :, 0], self._d_t[0]).reshape(prediction.shape))[:, :, 1:]
taus = (prediction - np.repeat(prediction[:, :, 0], self._d_t + 1).reshape(prediction.shape))[:, :, 1:]
else:
taus = (prediction - np.repeat(prediction[:, 0], self._d_t[0]).reshape(prediction.shape))[:, 1:]
taus = (prediction - np.repeat(prediction[:, 0], self._d_t + 1).reshape(prediction.shape))[:, 1:]
return taus


Expand All @@ -194,13 +196,13 @@ class XLearner(TreatmentExpansionMixin, LinearCateEstimator):
----------
models : outcome estimators for both control units and treatment units
It can be a single estimator applied to all the control and treatment units or a tuple/list of
estimators with the same number of treatments (include control).
estimators with one estimator per treatment (including control).
Must implement `fit` and `predict` methods.

cate_models : estimator for pseudo-treatment effects on control and treatments
It can be a single estimator applied to all the control and treatments or a tuple/list of
estimators with the same number of treatments (include control).
If None, it will be same models with outcome estimators.
estimators with one estimator per treatment (including control).
If None, it will be same models as the outcome estimators.
Must implement `fit` and `predict` methods.

propensity_model : estimator for the propensity function
Expand All @@ -224,7 +226,7 @@ def __init__(self, models,
super().__init__()

@BaseCateEstimator._wrap_fit
def fit(self, Y, T, X, inference=None):
def fit(self, Y, T, X, *, inference=None):
"""Build an instance of XLearner.

Parameters
Expand All @@ -251,31 +253,30 @@ def fit(self, Y, T, X, inference=None):
Y, T, X, _ = check_inputs(Y, T, X, multi_output_T=False)
T = self._label_encoder.fit_transform(T)
self._one_hot_encoder.fit(T.reshape(-1, 1))
self._d_t = len(self._label_encoder.classes_)
self._d_y = Y.shape[1:]
self.models = check_models(self.models, self._d_t)
self._d_t = len(self._label_encoder.classes_) - 1
heimengqi marked this conversation as resolved.
Show resolved Hide resolved
self.models = check_models(self.models, self._d_t + 1)
if self.cate_models is None:
self.cate_models = self.models
else:
self.cate_models = check_models(self.cate_models, self._d_t)
self.cate_models = check_models(self.cate_models, self._d_t + 1)
self.propensity_models = []
self.cate_treated_models = []
self.cate_controls_models = []

# Estimate response function
for ind in range(self._d_t):
for ind in range(self._d_t + 1):
self.models[ind].fit(X[T == ind], Y[T == ind])
for ind in range(1, self._d_t):
self.cate_treated_models.append(clone(self.cate_models[ind], safe=False))
for ind in range(self._d_t):
self.cate_treated_models.append(clone(self.cate_models[ind + 1], safe=False))
self.cate_controls_models.append(clone(self.cate_models[0], safe=False))
self.propensity_models.append(clone(self.propensity_model, safe=False))
imputed_effect_on_controls = self.models[ind].predict(X[T == 0]) - Y[T == 0]
imputed_effect_on_treated = Y[T == ind] - self.models[0].predict(X[T == ind])
self.cate_controls_models[ind - 1].fit(X[T == 0], imputed_effect_on_controls)
self.cate_treated_models[ind - 1].fit(X[T == ind], imputed_effect_on_treated)
X_concat = np.concatenate((X[T == 0], X[T == ind]), axis=0)
T_concat = np.concatenate((T[T == 0], T[T == ind]), axis=0)
self.propensity_models[ind - 1].fit(X_concat, T_concat)
imputed_effect_on_controls = self.models[ind + 1].predict(X[T == 0]) - Y[T == 0]
imputed_effect_on_treated = Y[T == ind + 1] - self.models[0].predict(X[T == ind + 1])
self.cate_controls_models[ind].fit(X[T == 0], imputed_effect_on_controls)
self.cate_treated_models[ind].fit(X[T == ind + 1], imputed_effect_on_treated)
X_concat = np.concatenate((X[T == 0], X[T == ind + 1]), axis=0)
T_concat = np.concatenate((T[T == 0], T[T == ind + 1]), axis=0)
self.propensity_models[ind].fit(X_concat, T_concat)

def const_marginal_effect(self, X):
"""Calculate the constant marginal treatment effect on a vector of features for each sample.
Expand All @@ -287,20 +288,20 @@ def const_marginal_effect(self, X):

Returns
-------
τ_hat : matrix, shape (m, d_y, n_t-1), n_t is number of treatments
τ_hat : matrix, shape (m, d_y, d_t)
Constant marginal CATE of each treatment on each outcome for each sample X[i].
Note that when Y is a vector rather than a 2-dimensional array,
the corresponding singleton dimensions in the output will be collapsed
"""
X = check_array(X)
m = X.shape[0]
taus = []
for ind in range(self._d_t - 1):
for ind in range(self._d_t):
propensity_scores = self.propensity_models[ind].predict_proba(X)[:, 1:]
tau_hat = propensity_scores * self.cate_controls_models[ind].predict(X).reshape(m, -1) \
+ (1 - propensity_scores) * self.cate_treated_models[ind].predict(X).reshape(m, -1)
taus.append(tau_hat)
taus = np.column_stack(taus).reshape((-1, self._d_t - 1,) + self._d_y) # shape as of m*d_t*d_y
taus = np.column_stack(taus).reshape((-1, self._d_t,) + self._d_y) # shape as of m*d_t*d_y
if self._d_y:
taus = transpose(taus, (0, 2, 1)) # shape as of m*d_y*d_t
return taus
Expand All @@ -314,13 +315,13 @@ class DomainAdaptationLearner(TreatmentExpansionMixin, LinearCateEstimator):
----------
models : outcome estimators for both control units and treatment units
It can be a single estimator applied to all the control and treatment units or a tuple/list of
estimators with the same number of treatments (include control).
estimators with one estimator per treatment (including control).
Must implement `fit` and `predict` methods.
The `fit` method must accept the `sample_weight` parameter.

final_models : estimators for pseudo-treatment effects for each treatment
It can be a single estimator applied to all the control and treatment units or a tuple/list of
estimators with the same number of treatments (exclude control).
estimators with ones estimator per treatments (excluding control).
Must implement `fit` and `predict` methods.

propensity_model : estimator for the propensity function
Expand All @@ -345,7 +346,7 @@ def __init__(self, models,
super().__init__()

@BaseCateEstimator._wrap_fit
def fit(self, Y, T, X, inference=None):
def fit(self, Y, T, X, *, inference=None):
"""Build an instance of DomainAdaptationLearner.

Parameters
Expand All @@ -372,36 +373,36 @@ def fit(self, Y, T, X, inference=None):
Y, T, X, _ = check_inputs(Y, T, X, multi_output_T=False)
T = self._label_encoder.fit_transform(T)
self._one_hot_encoder.fit(T.reshape(-1, 1))
self._d_t = len(self._label_encoder.classes_)
self._d_y = Y.shape[1:]
self.models = check_models(self.models, self._d_t)
self.final_models = check_models(self.final_models, self._d_t - 1)
self._d_t = len(self._label_encoder.classes_) - 1
heimengqi marked this conversation as resolved.
Show resolved Hide resolved
self.models = check_models(self.models, self._d_t + 1)
self.final_models = check_models(self.final_models, self._d_t)
self.propensity_models = []
self.models_control = []
self.models_treated = []
for ind in range(1, self._d_t):
for ind in range(self._d_t):
self.models_control.append(clone(self.models[0], safe=False))
self.models_treated.append(clone(self.models[ind], safe=False))
self.models_treated.append(clone(self.models[ind + 1], safe=False))
self.propensity_models.append(clone(self.propensity_model, safe=False))

X_concat = np.concatenate((X[T == 0], X[T == ind]), axis=0)
T_concat = np.concatenate((T[T == 0], T[T == ind]), axis=0)
self.propensity_models[ind - 1].fit(X_concat, T_concat)
pro_scores = self.propensity_models[ind - 1].predict_proba(X_concat)[:, 1]
X_concat = np.concatenate((X[T == 0], X[T == ind + 1]), axis=0)
T_concat = np.concatenate((T[T == 0], T[T == ind + 1]), axis=0)
self.propensity_models[ind].fit(X_concat, T_concat)
pro_scores = self.propensity_models[ind].predict_proba(X_concat)[:, 1]

# Train model on controls. Assign higher weight to units resembling
# treated units.
self._fit_weighted_pipeline(self.models_control[ind - 1], X[T == 0], Y[T == 0],
self._fit_weighted_pipeline(self.models_control[ind], X[T == 0], Y[T == 0],
sample_weight=pro_scores[T_concat == 0] / (1 - pro_scores[T_concat == 0]))
# Train model on the treated. Assign higher weight to units resembling
# control units.
self._fit_weighted_pipeline(self.models_treated[ind - 1], X[T == ind], Y[T == ind],
sample_weight=(1 - pro_scores[T_concat == ind]) / pro_scores[T_concat == ind])
imputed_effect_on_controls = self.models_treated[ind - 1].predict(X[T == 0]) - Y[T == 0]
imputed_effect_on_treated = Y[T == ind] - self.models_control[ind - 1].predict(X[T == ind])
self._fit_weighted_pipeline(self.models_treated[ind], X[T == ind + 1], Y[T == ind + 1],
sample_weight=(1 - pro_scores[T_concat == ind + 1]) /
pro_scores[T_concat == ind + 1])
imputed_effect_on_controls = self.models_treated[ind].predict(X[T == 0]) - Y[T == 0]
imputed_effect_on_treated = Y[T == ind + 1] - self.models_control[ind].predict(X[T == ind + 1])

imputed_effects_concat = np.concatenate((imputed_effect_on_controls, imputed_effect_on_treated), axis=0)
self.final_models[ind - 1].fit(X_concat, imputed_effects_concat)
self.final_models[ind].fit(X_concat, imputed_effects_concat)

def const_marginal_effect(self, X):
"""Calculate the constant marginal treatment effect on a vector of features for each sample.
Expand All @@ -413,7 +414,7 @@ def const_marginal_effect(self, X):

Returns
-------
τ_hat : matrix, shape (m, d_y, n_t-1), n_t is number of treatments
τ_hat : matrix, shape (m, d_y, d_t)
Constant marginal CATE of each treatment on each outcome for each sample X[i].
Note that when Y is a vector rather than a 2-dimensional array,
the corresponding singleton dimensions in the output will be collapsed
Expand All @@ -422,7 +423,7 @@ def const_marginal_effect(self, X):
taus = []
for model in self.final_models:
taus.append(model.predict(X))
taus = np.column_stack(taus).reshape((-1, self._d_t - 1,) + self._d_y) # shape as of m*d_t*d_y
taus = np.column_stack(taus).reshape((-1, self._d_t,) + self._d_y) # shape as of m*d_t*d_y
if self._d_y:
taus = transpose(taus, (0, 2, 1)) # shape as of m*d_y*d_t
return taus
Expand Down
Loading