Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

some small fixes to the debiased lasso #358

Merged
merged 17 commits into from
Jan 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions econml/dml/dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,18 @@ class SparseLinearDML(DebiasedLassoCateEstimatorMixin, DML):
CATE L1 regularization applied through the debiased lasso in the final model.
'auto' corresponds to a CV form of the :class:`MultiOutputDebiasedLasso`.

n_alphas : int, optional, default 100
How many alphas to try if alpha='auto'

alpha_cov : string | float, optional, default 'auto'
The regularization alpha that is used when constructing the pseudo inverse of
the covariance matrix Theta used to for correcting the final state lasso coefficient
in the debiased lasso. Each such regression corresponds to the regression of one feature
on the remainder of the features.

n_alphas_cov : int, optional, default 10
How many alpha_cov to try if alpha_cov='auto'.

max_iter : int, optional, default=1000
The maximum number of iterations in the Debiased Lasso

Expand Down Expand Up @@ -707,8 +719,12 @@ class SparseLinearDML(DebiasedLassoCateEstimatorMixin, DML):
def __init__(self,
model_y='auto', model_t='auto',
alpha='auto',
n_alphas=100,
alpha_cov='auto',
n_alphas_cov=10,
max_iter=1000,
tol=1e-4,
n_jobs=None,
featurizer=None,
fit_cate_intercept=True,
linear_first_stages=True,
Expand All @@ -718,9 +734,13 @@ def __init__(self,
random_state=None):
model_final = MultiOutputDebiasedLasso(
alpha=alpha,
n_alphas=n_alphas,
alpha_cov=alpha_cov,
n_alphas_cov=n_alphas_cov,
fit_intercept=False,
max_iter=max_iter,
tol=tol,
n_jobs=n_jobs,
random_state=random_state)
super().__init__(model_y=model_y,
model_t=model_t,
Expand Down
36 changes: 28 additions & 8 deletions econml/drlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -853,6 +853,18 @@ class SparseLinearDRLearner(DebiasedLassoCateEstimatorDiscreteMixin, DRLearner):
CATE L1 regularization applied through the debiased lasso in the final model.
'auto' corresponds to a CV form of the :class:`DebiasedLasso`.
n_alphas : int, optional, default 100
How many alphas to try if alpha='auto'
alpha_cov : string | float, optional, default 'auto'
The regularization alpha that is used when constructing the pseudo inverse of
the covariance matrix Theta used to for correcting the final state lasso coefficient
in the debiased lasso. Each such regression corresponds to the regression of one feature
on the remainder of the features.
n_alphas_cov : int, optional, default 10
How many alpha_cov to try if alpha_cov='auto'.
max_iter : int, optional, default 1000
The maximum number of iterations in the Debiased Lasso
Expand Down Expand Up @@ -910,17 +922,17 @@ class SparseLinearDRLearner(DebiasedLassoCateEstimatorDiscreteMixin, DRLearner):
est.fit(y, T, X=X, W=None)
>>> est.effect(X[:3])
array([ 0.418400..., 0.306400..., -0.130733...])
array([ 0.41..., 0.31..., -0.12...])
>>> est.effect_interval(X[:3])
(array([ 0.056783..., -0.206438..., -0.739296...]), array([0.780017..., 0.819239..., 0.477828...]))
(array([ 0.04..., -0.19..., -0.73...]), array([0.77..., 0.82..., 0.47...]))
>>> est.coef_(T=1)
array([0.449779..., 0.004807..., 0.061954...])
array([ 0.45..., -0.00..., 0.06...])
>>> est.coef__interval(T=1)
(array([ 0.242194... , -0.190825..., -0.139646...]), array([0.657365..., 0.200440..., 0.263556...]))
(array([ 0.24... , -0.19..., -0.13...]), array([0.65..., 0.19..., 0.26...]))
>>> est.intercept_(T=1)
0.88436847...
0.88...
>>> est.intercept__interval(T=1)
(0.68683788..., 1.08189907...)
(0.68..., 1.08...)
Attributes
----------
Expand All @@ -942,17 +954,25 @@ def __init__(self,
featurizer=None,
fit_cate_intercept=True,
alpha='auto',
n_alphas=100,
alpha_cov='auto',
n_alphas_cov=10,
max_iter=1000,
tol=1e-4,
min_propensity=1e-6,
categories='auto',
n_splits=2, random_state=None):
n_splits=2,
random_state=None):
self.fit_cate_intercept = fit_cate_intercept
model_final = DebiasedLasso(
alpha=alpha,
n_alphas=n_alphas,
alpha_cov=alpha_cov,
n_alphas_cov=n_alphas_cov,
fit_intercept=fit_cate_intercept,
max_iter=max_iter,
tol=tol)
tol=tol,
random_state=random_state)
super().__init__(model_propensity=model_propensity,
model_regression=model_regression,
model_final=model_final,
Expand Down
129 changes: 89 additions & 40 deletions econml/sklearn_extensions/linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from statsmodels.tools.tools import add_constant
from statsmodels.api import RLM
import statsmodels
from joblib import Parallel, delayed


def _weighted_check_cv(cv=5, y=None, classifier=False):
Expand Down Expand Up @@ -539,6 +540,34 @@ def fit(self, X, y, sample_weight=None):
return self


def _get_theta_coefs_and_tau_sq(i, X, sample_weight, alpha_cov, n_alphas_cov, max_iter, tol, random_state):
n_samples, n_features = X.shape
y = X[:, i]
X_reduced = X[:, list(range(i)) + list(range(i + 1, n_features))]
# Call weighted lasso on reduced design matrix
if alpha_cov == 'auto':
local_wlasso = WeightedLassoCV(cv=3, n_alphas=n_alphas_cov,
fit_intercept=False,
max_iter=max_iter,
tol=tol, n_jobs=1,
random_state=random_state)
else:
local_wlasso = WeightedLasso(alpha=alpha_cov,
fit_intercept=False,
max_iter=max_iter,
tol=tol,
random_state=random_state)
local_wlasso.fit(X_reduced, y, sample_weight=sample_weight)
coefs = local_wlasso.coef_
# Weighted tau
if sample_weight is not None:
y_weighted = y * sample_weight / np.sum(sample_weight)
else:
y_weighted = y / n_samples
tausq = np.dot(y - local_wlasso.predict(X_reduced), y_weighted)
return coefs, tausq


class DebiasedLasso(WeightedLasso):
"""Debiased Lasso model.
Expand All @@ -555,6 +584,18 @@ class DebiasedLasso(WeightedLasso):
reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised.
Given this, you should use the :class:`.LinearRegression` object.
n_alphas : int, optional, default 100
How many alphas to try if alpha='auto'
alpha_cov : string | float, optional, default 'auto'
The regularization alpha that is used when constructing the pseudo inverse of
the covariance matrix Theta used to for correcting the lasso coefficient. Each
such regression corresponds to the regression of one feature on the remainder
of the features.
n_alphas_cov : int, optional, default 10
How many alpha_cov to try if alpha_cov='auto'.
fit_intercept : boolean, optional, default True
Whether to calculate the intercept for this model. If set
to False, no intercept will be used in calculations
Expand Down Expand Up @@ -597,6 +638,9 @@ class DebiasedLasso(WeightedLasso):
(setting to 'random') often leads to significantly faster convergence
especially when tol is higher than 1e-4.
n_jobs : int or None, default None
How many jobs to use whenever parallelism is invoked
Attributes
----------
coef_ : array, shape (n_features,)
Expand All @@ -620,10 +664,14 @@ class DebiasedLasso(WeightedLasso):
"""

def __init__(self, alpha='auto', fit_intercept=True,
precompute=False, copy_X=True, max_iter=1000,
def __init__(self, alpha='auto', n_alphas=100, alpha_cov='auto', n_alphas_cov=10,
fit_intercept=True, precompute=False, copy_X=True, max_iter=1000,
tol=1e-4, warm_start=False,
random_state=None, selection='cyclic'):
random_state=None, selection='cyclic', n_jobs=None):
self.n_jobs = n_jobs
self.n_alphas = n_alphas
self.alpha_cov = alpha_cov
self.n_alphas_cov = n_alphas_cov
super().__init__(
alpha=alpha, fit_intercept=fit_intercept,
precompute=precompute, copy_X=copy_X,
Expand Down Expand Up @@ -747,18 +795,8 @@ def predict_interval(self, X, alpha=0.1):
lower = alpha / 2
upper = 1 - alpha / 2
y_pred = self.predict(X)
y_lower = np.empty(y_pred.shape)
y_upper = np.empty(y_pred.shape)
# Note that in the case of no intercept, X_offset is 0
if self.fit_intercept:
X = X - self._X_offset
# Calculate the variance of the predictions
var_pred = np.sum(np.matmul(X, self._coef_variance) * X, axis=1)
if self.fit_intercept:
var_pred += self._mean_error_variance

# Calculate prediction confidence intervals
sd_pred = np.sqrt(var_pred)
sd_pred = self.prediction_stderr(X)
y_lower = y_pred + \
np.apply_along_axis(lambda s: norm.ppf(
lower, scale=s), 0, sd_pred)
Expand Down Expand Up @@ -810,20 +848,25 @@ def intercept__interval(self, alpha=0.1):

def _get_coef_correction(self, X, y, y_pred, sample_weight, theta_hat):
# Assumes flattened y
n_samples, n_features = X.shape
n_samples, _ = X.shape
y_res = np.ndarray.flatten(y) - y_pred
# Compute weighted residuals
if sample_weight is not None:
y_res_scaled = y_res * sample_weight / np.sum(sample_weight)
else:
y_res_scaled = y_res / n_samples
delta_coef = np.matmul(
np.matmul(theta_hat, X.T), y_res_scaled)
theta_hat, np.matmul(X.T, y_res_scaled))
return delta_coef

def _get_optimal_alpha(self, X, y, sample_weight):
# To be done once per target. Assumes y can be flattened.
cv_estimator = WeightedLassoCV(cv=5, fit_intercept=self.fit_intercept)
cv_estimator = WeightedLassoCV(cv=5, n_alphas=self.n_alphas, fit_intercept=self.fit_intercept,
precompute=self.precompute, copy_X=True,
max_iter=self.max_iter, tol=self.tol,
random_state=self.random_state,
selection=self.selection,
n_jobs=self.n_jobs)
cv_estimator.fit(X, y.flatten(), sample_weight=sample_weight)
return cv_estimator.alpha_

Expand All @@ -835,27 +878,15 @@ def _get_theta_hat(self, X, sample_weight):
C_hat = np.ones((1, 1))
tausq = (X.T @ X / n_samples).flatten()
return np.diag(1 / tausq) @ C_hat
coefs = np.empty((n_features, n_features - 1))
tausq = np.empty(n_features)
# Compute Lasso coefficients for the columns of the design matrix
for i in range(n_features):
y = X[:, i]
X_reduced = X[:, list(range(i)) + list(range(i + 1, n_features))]
# Call weighted lasso on reduced design matrix
# Inherit some parameters from the parent
local_wlasso = WeightedLasso(
alpha=self.alpha,
vsyrgkanis marked this conversation as resolved.
Show resolved Hide resolved
fit_intercept=False,
max_iter=self.max_iter,
tol=self.tol
).fit(X_reduced, y, sample_weight=sample_weight)
coefs[i] = local_wlasso.coef_
# Weighted tau
if sample_weight is not None:
y_weighted = y * sample_weight / np.sum(sample_weight)
else:
y_weighted = y / n_samples
tausq[i] = np.dot(y - local_wlasso.predict(X_reduced), y_weighted)
results = Parallel(n_jobs=self.n_jobs)(
delayed(_get_theta_coefs_and_tau_sq)(i, X, sample_weight,
self.alpha_cov, self.n_alphas_cov,
self.max_iter, self.tol, self.random_state)
for i in range(n_features))
coefs, tausq = zip(*results)
coefs = np.array(coefs)
tausq = np.array(tausq)
# Compute C_hat
C_hat = np.diag(np.ones(n_features))
C_hat[0][1:] = -coefs[0]
Expand Down Expand Up @@ -893,6 +924,18 @@ class MultiOutputDebiasedLasso(MultiOutputRegressor):
reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised.
Given this, you should use the :class:`LinearRegression` object.
n_alphas : int, optional, default 100
How many alphas to try if alpha='auto'
alpha_cov : string | float, optional, default 'auto'
The regularization alpha that is used when constructing the pseudo inverse of
the covariance matrix Theta used to for correcting the lasso coefficient. Each
such regression corresponds to the regression of one feature on the remainder
of the features.
n_alphas_cov : int, optional, default 10
How many alpha_cov to try if alpha_cov='auto'.
fit_intercept : boolean, optional, default True
Whether to calculate the intercept for this model. If set
to False, no intercept will be used in calculations
Expand Down Expand Up @@ -935,6 +978,9 @@ class MultiOutputDebiasedLasso(MultiOutputRegressor):
(setting to 'random') often leads to significantly faster convergence
especially when tol is higher than 1e-4.
n_jobs : int or None, default None
How many jobs to use whenever parallelism is invoked
Attributes
----------
coef_ : array, shape (n_targets, n_features) or (n_features,)
Expand All @@ -954,14 +1000,17 @@ class MultiOutputDebiasedLasso(MultiOutputRegressor):
"""

def __init__(self, alpha='auto', fit_intercept=True,
def __init__(self, alpha='auto', n_alphas=100, alpha_cov='auto', n_alphas_cov=10,
fit_intercept=True,
precompute=False, copy_X=True, max_iter=1000,
tol=1e-4, warm_start=False,
random_state=None, selection='cyclic', n_jobs=None):
self.estimator = DebiasedLasso(alpha=alpha, fit_intercept=fit_intercept,
self.estimator = DebiasedLasso(alpha=alpha, n_alphas=n_alphas, alpha_cov=alpha_cov, n_alphas_cov=n_alphas_cov,
fit_intercept=fit_intercept,
precompute=precompute, copy_X=copy_X, max_iter=max_iter,
tol=tol, warm_start=warm_start,
random_state=random_state, selection=selection)
random_state=random_state, selection=selection,
n_jobs=n_jobs)
super().__init__(estimator=self.estimator, n_jobs=n_jobs)

def fit(self, X, y, sample_weight=None):
Expand Down
9 changes: 5 additions & 4 deletions econml/tests/test_dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -967,9 +967,10 @@ def test_categories(self):
dmls = [LinearDML, SparseLinearDML]
for ctor in dmls:
dml1 = ctor(LinearRegression(), LogisticRegression(C=1000),
fit_cate_intercept=False, discrete_treatment=True)
fit_cate_intercept=False, discrete_treatment=True, random_state=123)
dml2 = ctor(LinearRegression(), LogisticRegression(C=1000),
fit_cate_intercept=False, discrete_treatment=True, categories=['c', 'b', 'a'])
fit_cate_intercept=False, discrete_treatment=True, categories=['c', 'b', 'a'],
random_state=123)

# create a simple artificial setup where effect of moving from treatment
# a -> b is 2,
Expand Down Expand Up @@ -1003,9 +1004,9 @@ def test_categories(self):
# but const_marginal_effect should be reordered based on the explicit cagetories
cme1 = dml1.const_marginal_effect(np.ones((1, 1))).reshape(-1)
cme2 = dml2.const_marginal_effect(np.ones((1, 1))).reshape(-1)
self.assertAlmostEqual(cme1[1], -cme2[1], places=4) # 1->3 in original ordering; 3->1 in new ordering
self.assertAlmostEqual(cme1[1], -cme2[1], places=3) # 1->3 in original ordering; 3->1 in new ordering
# 1-> 2 in original ordering; combination of 3->1 and 3->2
self.assertAlmostEqual(cme1[0], -cme2[1] + cme2[0], places=4)
self.assertAlmostEqual(cme1[0], -cme2[1] + cme2[0], places=3)

def test_groups(self):
groups = [1, 2, 3, 4, 5, 6] * 10
Expand Down
Loading