Skip to content

Commit

Permalink
some small fixes to the debiased lasso (#358)
Browse files Browse the repository at this point in the history
* some small fixes to the debiased lasso

* added parallelism across rows of design matrix to run each lassocv in parallel. added n_jobs param to debiased lasso and to sparselineardml

* added n_jobs to multioutput debiasedlasso

* added separate options for alpha for the covariance matrxi estimation

* added extra alpha options in sparselineardrlearner and sparsellineardml
  • Loading branch information
vsyrgkanis authored Jan 9, 2021
1 parent bb042d5 commit ce2f2b5
Show file tree
Hide file tree
Showing 5 changed files with 218 additions and 128 deletions.
20 changes: 20 additions & 0 deletions econml/dml/dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,18 @@ class SparseLinearDML(DebiasedLassoCateEstimatorMixin, DML):
CATE L1 regularization applied through the debiased lasso in the final model.
'auto' corresponds to a CV form of the :class:`MultiOutputDebiasedLasso`.
n_alphas : int, optional, default 100
How many alphas to try if alpha='auto'
alpha_cov : string | float, optional, default 'auto'
The regularization alpha that is used when constructing the pseudo inverse of
the covariance matrix Theta used to for correcting the final state lasso coefficient
in the debiased lasso. Each such regression corresponds to the regression of one feature
on the remainder of the features.
n_alphas_cov : int, optional, default 10
How many alpha_cov to try if alpha_cov='auto'.
max_iter : int, optional, default=1000
The maximum number of iterations in the Debiased Lasso
Expand Down Expand Up @@ -707,8 +719,12 @@ class SparseLinearDML(DebiasedLassoCateEstimatorMixin, DML):
def __init__(self,
model_y='auto', model_t='auto',
alpha='auto',
n_alphas=100,
alpha_cov='auto',
n_alphas_cov=10,
max_iter=1000,
tol=1e-4,
n_jobs=None,
featurizer=None,
fit_cate_intercept=True,
linear_first_stages=True,
Expand All @@ -718,9 +734,13 @@ def __init__(self,
random_state=None):
model_final = MultiOutputDebiasedLasso(
alpha=alpha,
n_alphas=n_alphas,
alpha_cov=alpha_cov,
n_alphas_cov=n_alphas_cov,
fit_intercept=False,
max_iter=max_iter,
tol=tol,
n_jobs=n_jobs,
random_state=random_state)
super().__init__(model_y=model_y,
model_t=model_t,
Expand Down
36 changes: 28 additions & 8 deletions econml/drlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -853,6 +853,18 @@ class SparseLinearDRLearner(DebiasedLassoCateEstimatorDiscreteMixin, DRLearner):
CATE L1 regularization applied through the debiased lasso in the final model.
'auto' corresponds to a CV form of the :class:`DebiasedLasso`.
n_alphas : int, optional, default 100
How many alphas to try if alpha='auto'
alpha_cov : string | float, optional, default 'auto'
The regularization alpha that is used when constructing the pseudo inverse of
the covariance matrix Theta used to for correcting the final state lasso coefficient
in the debiased lasso. Each such regression corresponds to the regression of one feature
on the remainder of the features.
n_alphas_cov : int, optional, default 10
How many alpha_cov to try if alpha_cov='auto'.
max_iter : int, optional, default 1000
The maximum number of iterations in the Debiased Lasso
Expand Down Expand Up @@ -910,17 +922,17 @@ class SparseLinearDRLearner(DebiasedLassoCateEstimatorDiscreteMixin, DRLearner):
est.fit(y, T, X=X, W=None)
>>> est.effect(X[:3])
array([ 0.418400..., 0.306400..., -0.130733...])
array([ 0.41..., 0.31..., -0.12...])
>>> est.effect_interval(X[:3])
(array([ 0.056783..., -0.206438..., -0.739296...]), array([0.780017..., 0.819239..., 0.477828...]))
(array([ 0.04..., -0.19..., -0.73...]), array([0.77..., 0.82..., 0.47...]))
>>> est.coef_(T=1)
array([0.449779..., 0.004807..., 0.061954...])
array([ 0.45..., -0.00..., 0.06...])
>>> est.coef__interval(T=1)
(array([ 0.242194... , -0.190825..., -0.139646...]), array([0.657365..., 0.200440..., 0.263556...]))
(array([ 0.24... , -0.19..., -0.13...]), array([0.65..., 0.19..., 0.26...]))
>>> est.intercept_(T=1)
0.88436847...
0.88...
>>> est.intercept__interval(T=1)
(0.68683788..., 1.08189907...)
(0.68..., 1.08...)
Attributes
----------
Expand All @@ -942,17 +954,25 @@ def __init__(self,
featurizer=None,
fit_cate_intercept=True,
alpha='auto',
n_alphas=100,
alpha_cov='auto',
n_alphas_cov=10,
max_iter=1000,
tol=1e-4,
min_propensity=1e-6,
categories='auto',
n_splits=2, random_state=None):
n_splits=2,
random_state=None):
self.fit_cate_intercept = fit_cate_intercept
model_final = DebiasedLasso(
alpha=alpha,
n_alphas=n_alphas,
alpha_cov=alpha_cov,
n_alphas_cov=n_alphas_cov,
fit_intercept=fit_cate_intercept,
max_iter=max_iter,
tol=tol)
tol=tol,
random_state=random_state)
super().__init__(model_propensity=model_propensity,
model_regression=model_regression,
model_final=model_final,
Expand Down
129 changes: 89 additions & 40 deletions econml/sklearn_extensions/linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from statsmodels.tools.tools import add_constant
from statsmodels.api import RLM
import statsmodels
from joblib import Parallel, delayed


def _weighted_check_cv(cv=5, y=None, classifier=False):
Expand Down Expand Up @@ -539,6 +540,34 @@ def fit(self, X, y, sample_weight=None):
return self


def _get_theta_coefs_and_tau_sq(i, X, sample_weight, alpha_cov, n_alphas_cov, max_iter, tol, random_state):
n_samples, n_features = X.shape
y = X[:, i]
X_reduced = X[:, list(range(i)) + list(range(i + 1, n_features))]
# Call weighted lasso on reduced design matrix
if alpha_cov == 'auto':
local_wlasso = WeightedLassoCV(cv=3, n_alphas=n_alphas_cov,
fit_intercept=False,
max_iter=max_iter,
tol=tol, n_jobs=1,
random_state=random_state)
else:
local_wlasso = WeightedLasso(alpha=alpha_cov,
fit_intercept=False,
max_iter=max_iter,
tol=tol,
random_state=random_state)
local_wlasso.fit(X_reduced, y, sample_weight=sample_weight)
coefs = local_wlasso.coef_
# Weighted tau
if sample_weight is not None:
y_weighted = y * sample_weight / np.sum(sample_weight)
else:
y_weighted = y / n_samples
tausq = np.dot(y - local_wlasso.predict(X_reduced), y_weighted)
return coefs, tausq


class DebiasedLasso(WeightedLasso):
"""Debiased Lasso model.
Expand All @@ -555,6 +584,18 @@ class DebiasedLasso(WeightedLasso):
reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised.
Given this, you should use the :class:`.LinearRegression` object.
n_alphas : int, optional, default 100
How many alphas to try if alpha='auto'
alpha_cov : string | float, optional, default 'auto'
The regularization alpha that is used when constructing the pseudo inverse of
the covariance matrix Theta used to for correcting the lasso coefficient. Each
such regression corresponds to the regression of one feature on the remainder
of the features.
n_alphas_cov : int, optional, default 10
How many alpha_cov to try if alpha_cov='auto'.
fit_intercept : boolean, optional, default True
Whether to calculate the intercept for this model. If set
to False, no intercept will be used in calculations
Expand Down Expand Up @@ -597,6 +638,9 @@ class DebiasedLasso(WeightedLasso):
(setting to 'random') often leads to significantly faster convergence
especially when tol is higher than 1e-4.
n_jobs : int or None, default None
How many jobs to use whenever parallelism is invoked
Attributes
----------
coef_ : array, shape (n_features,)
Expand All @@ -620,10 +664,14 @@ class DebiasedLasso(WeightedLasso):
"""

def __init__(self, alpha='auto', fit_intercept=True,
precompute=False, copy_X=True, max_iter=1000,
def __init__(self, alpha='auto', n_alphas=100, alpha_cov='auto', n_alphas_cov=10,
fit_intercept=True, precompute=False, copy_X=True, max_iter=1000,
tol=1e-4, warm_start=False,
random_state=None, selection='cyclic'):
random_state=None, selection='cyclic', n_jobs=None):
self.n_jobs = n_jobs
self.n_alphas = n_alphas
self.alpha_cov = alpha_cov
self.n_alphas_cov = n_alphas_cov
super().__init__(
alpha=alpha, fit_intercept=fit_intercept,
precompute=precompute, copy_X=copy_X,
Expand Down Expand Up @@ -747,18 +795,8 @@ def predict_interval(self, X, alpha=0.1):
lower = alpha / 2
upper = 1 - alpha / 2
y_pred = self.predict(X)
y_lower = np.empty(y_pred.shape)
y_upper = np.empty(y_pred.shape)
# Note that in the case of no intercept, X_offset is 0
if self.fit_intercept:
X = X - self._X_offset
# Calculate the variance of the predictions
var_pred = np.sum(np.matmul(X, self._coef_variance) * X, axis=1)
if self.fit_intercept:
var_pred += self._mean_error_variance

# Calculate prediction confidence intervals
sd_pred = np.sqrt(var_pred)
sd_pred = self.prediction_stderr(X)
y_lower = y_pred + \
np.apply_along_axis(lambda s: norm.ppf(
lower, scale=s), 0, sd_pred)
Expand Down Expand Up @@ -810,20 +848,25 @@ def intercept__interval(self, alpha=0.1):

def _get_coef_correction(self, X, y, y_pred, sample_weight, theta_hat):
# Assumes flattened y
n_samples, n_features = X.shape
n_samples, _ = X.shape
y_res = np.ndarray.flatten(y) - y_pred
# Compute weighted residuals
if sample_weight is not None:
y_res_scaled = y_res * sample_weight / np.sum(sample_weight)
else:
y_res_scaled = y_res / n_samples
delta_coef = np.matmul(
np.matmul(theta_hat, X.T), y_res_scaled)
theta_hat, np.matmul(X.T, y_res_scaled))
return delta_coef

def _get_optimal_alpha(self, X, y, sample_weight):
# To be done once per target. Assumes y can be flattened.
cv_estimator = WeightedLassoCV(cv=5, fit_intercept=self.fit_intercept)
cv_estimator = WeightedLassoCV(cv=5, n_alphas=self.n_alphas, fit_intercept=self.fit_intercept,
precompute=self.precompute, copy_X=True,
max_iter=self.max_iter, tol=self.tol,
random_state=self.random_state,
selection=self.selection,
n_jobs=self.n_jobs)
cv_estimator.fit(X, y.flatten(), sample_weight=sample_weight)
return cv_estimator.alpha_

Expand All @@ -835,27 +878,15 @@ def _get_theta_hat(self, X, sample_weight):
C_hat = np.ones((1, 1))
tausq = (X.T @ X / n_samples).flatten()
return np.diag(1 / tausq) @ C_hat
coefs = np.empty((n_features, n_features - 1))
tausq = np.empty(n_features)
# Compute Lasso coefficients for the columns of the design matrix
for i in range(n_features):
y = X[:, i]
X_reduced = X[:, list(range(i)) + list(range(i + 1, n_features))]
# Call weighted lasso on reduced design matrix
# Inherit some parameters from the parent
local_wlasso = WeightedLasso(
alpha=self.alpha,
fit_intercept=False,
max_iter=self.max_iter,
tol=self.tol
).fit(X_reduced, y, sample_weight=sample_weight)
coefs[i] = local_wlasso.coef_
# Weighted tau
if sample_weight is not None:
y_weighted = y * sample_weight / np.sum(sample_weight)
else:
y_weighted = y / n_samples
tausq[i] = np.dot(y - local_wlasso.predict(X_reduced), y_weighted)
results = Parallel(n_jobs=self.n_jobs)(
delayed(_get_theta_coefs_and_tau_sq)(i, X, sample_weight,
self.alpha_cov, self.n_alphas_cov,
self.max_iter, self.tol, self.random_state)
for i in range(n_features))
coefs, tausq = zip(*results)
coefs = np.array(coefs)
tausq = np.array(tausq)
# Compute C_hat
C_hat = np.diag(np.ones(n_features))
C_hat[0][1:] = -coefs[0]
Expand Down Expand Up @@ -893,6 +924,18 @@ class MultiOutputDebiasedLasso(MultiOutputRegressor):
reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised.
Given this, you should use the :class:`LinearRegression` object.
n_alphas : int, optional, default 100
How many alphas to try if alpha='auto'
alpha_cov : string | float, optional, default 'auto'
The regularization alpha that is used when constructing the pseudo inverse of
the covariance matrix Theta used to for correcting the lasso coefficient. Each
such regression corresponds to the regression of one feature on the remainder
of the features.
n_alphas_cov : int, optional, default 10
How many alpha_cov to try if alpha_cov='auto'.
fit_intercept : boolean, optional, default True
Whether to calculate the intercept for this model. If set
to False, no intercept will be used in calculations
Expand Down Expand Up @@ -935,6 +978,9 @@ class MultiOutputDebiasedLasso(MultiOutputRegressor):
(setting to 'random') often leads to significantly faster convergence
especially when tol is higher than 1e-4.
n_jobs : int or None, default None
How many jobs to use whenever parallelism is invoked
Attributes
----------
coef_ : array, shape (n_targets, n_features) or (n_features,)
Expand All @@ -954,14 +1000,17 @@ class MultiOutputDebiasedLasso(MultiOutputRegressor):
"""

def __init__(self, alpha='auto', fit_intercept=True,
def __init__(self, alpha='auto', n_alphas=100, alpha_cov='auto', n_alphas_cov=10,
fit_intercept=True,
precompute=False, copy_X=True, max_iter=1000,
tol=1e-4, warm_start=False,
random_state=None, selection='cyclic', n_jobs=None):
self.estimator = DebiasedLasso(alpha=alpha, fit_intercept=fit_intercept,
self.estimator = DebiasedLasso(alpha=alpha, n_alphas=n_alphas, alpha_cov=alpha_cov, n_alphas_cov=n_alphas_cov,
fit_intercept=fit_intercept,
precompute=precompute, copy_X=copy_X, max_iter=max_iter,
tol=tol, warm_start=warm_start,
random_state=random_state, selection=selection)
random_state=random_state, selection=selection,
n_jobs=n_jobs)
super().__init__(estimator=self.estimator, n_jobs=n_jobs)

def fit(self, X, y, sample_weight=None):
Expand Down
9 changes: 5 additions & 4 deletions econml/tests/test_dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -967,9 +967,10 @@ def test_categories(self):
dmls = [LinearDML, SparseLinearDML]
for ctor in dmls:
dml1 = ctor(LinearRegression(), LogisticRegression(C=1000),
fit_cate_intercept=False, discrete_treatment=True)
fit_cate_intercept=False, discrete_treatment=True, random_state=123)
dml2 = ctor(LinearRegression(), LogisticRegression(C=1000),
fit_cate_intercept=False, discrete_treatment=True, categories=['c', 'b', 'a'])
fit_cate_intercept=False, discrete_treatment=True, categories=['c', 'b', 'a'],
random_state=123)

# create a simple artificial setup where effect of moving from treatment
# a -> b is 2,
Expand Down Expand Up @@ -1003,9 +1004,9 @@ def test_categories(self):
# but const_marginal_effect should be reordered based on the explicit cagetories
cme1 = dml1.const_marginal_effect(np.ones((1, 1))).reshape(-1)
cme2 = dml2.const_marginal_effect(np.ones((1, 1))).reshape(-1)
self.assertAlmostEqual(cme1[1], -cme2[1], places=4) # 1->3 in original ordering; 3->1 in new ordering
self.assertAlmostEqual(cme1[1], -cme2[1], places=3) # 1->3 in original ordering; 3->1 in new ordering
# 1-> 2 in original ordering; combination of 3->1 and 3->2
self.assertAlmostEqual(cme1[0], -cme2[1] + cme2[0], places=4)
self.assertAlmostEqual(cme1[0], -cme2[1] + cme2[0], places=3)

def test_groups(self):
groups = [1, 2, 3, 4, 5, 6] * 10
Expand Down
Loading

0 comments on commit ce2f2b5

Please sign in to comment.