Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
102 commits
Select commit Hold shift + click to select a range
1214a5f
reorder init steps irm for readability
SvenKlaassen Jun 18, 2024
86cd871
first version of discrete treatment dataset
SvenKlaassen Jun 18, 2024
8728413
first apo model
SvenKlaassen Jun 18, 2024
545de58
Create test_apo_exceptions.py
SvenKlaassen Jun 18, 2024
4eaaa52
update irm sensitivity atte
SvenKlaassen Jun 18, 2024
825aa0c
Update _utils_irm_manual.py
SvenKlaassen Jun 18, 2024
7c3f1c1
add estimation and sensitivity to apo model
SvenKlaassen Jun 18, 2024
44d3100
rename estimation
SvenKlaassen Jun 18, 2024
592e642
Create _utils_apo_manual.py
SvenKlaassen Jun 18, 2024
fbc9e71
first manual apo test
SvenKlaassen Jun 18, 2024
fab0ae2
add external prediction test and se unit test
SvenKlaassen Jun 20, 2024
b814fce
add sensitivity and bootstrap test for apo
SvenKlaassen Jun 20, 2024
118f42a
add test for external predictions in apo
SvenKlaassen Jun 20, 2024
f184560
Update test_apo_external_predictions.py
SvenKlaassen Jun 20, 2024
827345b
add classifier unit test for apo
SvenKlaassen Jun 20, 2024
66f7df0
add tune with unit test for apo
SvenKlaassen Jun 20, 2024
a43ab19
Create test_apo_weighted_scores.py
SvenKlaassen Jun 20, 2024
aed2227
Update test_apo_weighted_scores.py
SvenKlaassen Jun 20, 2024
8f15923
adding capo and gapo to apo class
SvenKlaassen Jun 20, 2024
134fc4c
small fixes to remove unnecessary lines
SvenKlaassen Jun 20, 2024
0d5ecd5
update dgp
SvenKlaassen Jun 21, 2024
b86f8aa
fix typo
SvenKlaassen Jun 21, 2024
70b1b00
remove seed from dgp
SvenKlaassen Jun 21, 2024
5be62d6
Add basic unit tests for dgp
SvenKlaassen Jun 21, 2024
2a1903f
add docstring for APO model
SvenKlaassen Jun 21, 2024
87cfa76
add warning for low percentange of treatment level
SvenKlaassen Jun 21, 2024
5c0501c
remove double check
SvenKlaassen Jun 21, 2024
1506474
remove self_i_quant from qte
SvenKlaassen Jun 21, 2024
64a0e20
add first apos model
SvenKlaassen Jun 21, 2024
f849bd0
update irm dgp
SvenKlaassen Jul 15, 2024
ec24668
Update apo.py
SvenKlaassen Jul 15, 2024
58f2b39
Update apos.py
SvenKlaassen Jul 15, 2024
9ab05aa
update set sample splitting documentation
SvenKlaassen Jul 17, 2024
b3f4f77
add set_sample_slit to apos.py
SvenKlaassen Jul 17, 2024
d2ab512
create manual apos version and basic unit test
SvenKlaassen Jul 17, 2024
e6d680c
Update _utils_apos_manual.py
SvenKlaassen Jul 17, 2024
c388326
Merge branch 's-set-sample-splitting' into s-add-irm-apo
SvenKlaassen Jul 22, 2024
2adc6d4
update set_sample_splitting in apos.py
SvenKlaassen Jul 22, 2024
29f67c4
create manual confint version for qte and apos
SvenKlaassen Jul 22, 2024
d488dd5
add boostrap() to apos
SvenKlaassen Jul 22, 2024
3a41529
add generate summary to utils
SvenKlaassen Jul 22, 2024
3bde8fd
add summary and properties to apos.py
SvenKlaassen Jul 22, 2024
cb6ee7f
Update test_apos.py
SvenKlaassen Jul 22, 2024
72ad859
Update test_apos.py
SvenKlaassen Jul 22, 2024
c82ca7e
Create test_apos_weighted_scores.py
SvenKlaassen Jul 22, 2024
5d07fe6
Create test_apos_classfier.py
SvenKlaassen Jul 22, 2024
402a30b
add treatment_levels property
SvenKlaassen Jul 22, 2024
25e710b
add simple average treatment effects
SvenKlaassen Jul 22, 2024
660fdce
add optional treatment_names to framework
SvenKlaassen Jul 23, 2024
02388d6
fix dimensions in docstrings
SvenKlaassen Jul 23, 2024
a17644f
add setter for treatment_names in framework
SvenKlaassen Jul 23, 2024
2cea2ce
rename to causal_contrast
SvenKlaassen Jul 23, 2024
739641e
update treatment_levels to allow for iterable objects
SvenKlaassen Jul 23, 2024
310d532
add causal_contrasts to apos with unit tests
SvenKlaassen Jul 23, 2024
3a73ac9
update scaling for apos dgp
SvenKlaassen Jul 24, 2024
879cfb0
reduce irm settings for unit tests
SvenKlaassen Jul 25, 2024
aaa581e
extend weight tests for apo
SvenKlaassen Jul 25, 2024
9cbc9b3
add pytest mark.ci to weight test
SvenKlaassen Jul 25, 2024
e076f97
extend weight tests for apos
SvenKlaassen Jul 25, 2024
64de600
extend apos weights test
SvenKlaassen Jul 25, 2024
c4f6a05
remove apply_cross_fitting from apo_manual
SvenKlaassen Jul 25, 2024
705afb8
add test for classifier without binary outcome in apo
SvenKlaassen Jul 25, 2024
8b31815
Add exception for classfier in DoubleMLAPOS class
SvenKlaassen Jul 25, 2024
9d9b7fd
Update test_apos_exceptions.py
SvenKlaassen Jul 25, 2024
0daf406
add seperate function for binary outcome check
SvenKlaassen Jul 25, 2024
b9d8ff6
update binary outcome check iivm
SvenKlaassen Jul 25, 2024
ffa77cf
update binary treatment check plr
SvenKlaassen Jul 25, 2024
d66658c
Update plr.py
SvenKlaassen Jul 25, 2024
50d8b2d
update binary outcome check apo
SvenKlaassen Jul 25, 2024
0bfda41
adjust check data for APO
SvenKlaassen Jul 25, 2024
4189df5
add apo and gapo exception tests
SvenKlaassen Jul 25, 2024
73a164f
add methods exception tests for apos
SvenKlaassen Jul 25, 2024
03d8636
add property tests for DoubleMLAPOS
SvenKlaassen Jul 25, 2024
039901d
fix exception test apos
SvenKlaassen Jul 26, 2024
953cbef
add exception for framework with sensitivity analysis
SvenKlaassen Jul 26, 2024
a367b1e
update default test for doubleml
SvenKlaassen Jul 26, 2024
87f6acc
extend model default tests for apo and apos
SvenKlaassen Jul 27, 2024
1628001
fix model default test
SvenKlaassen Jul 28, 2024
7b66663
Merge branch 's-add-sensitivity-framework' into s-add-irm-apo
SvenKlaassen Jul 30, 2024
f24d13a
add sensitivity_elements property to apos
SvenKlaassen Jul 31, 2024
ac38ba7
add sensitivity_params and sensitivity_analysis to apos
SvenKlaassen Jul 31, 2024
8f323a5
add sensitivity_plot to apos
SvenKlaassen Jul 31, 2024
706004c
Update test_return_types.py
SvenKlaassen Jul 31, 2024
fb2dfa6
fix format
SvenKlaassen Jul 31, 2024
f13ba78
add sensitivity_summary to framework obj
SvenKlaassen Jul 31, 2024
e0715c8
move sensitivity_summary to DoubleMLFramework class
SvenKlaassen Jul 31, 2024
04fae5e
fix gain statistics for multiple treatments
SvenKlaassen Jul 31, 2024
53b4e4a
Update gain_statistics.py
SvenKlaassen Jul 31, 2024
9911b69
add benchmarking to apos
SvenKlaassen Jul 31, 2024
31b386a
add _all_treatments to apos
SvenKlaassen Jul 31, 2024
738edf3
add exception tests for external predicitons
SvenKlaassen Jul 31, 2024
e18aba1
add exception for multiple treatment variables in apo setting
SvenKlaassen Jul 31, 2024
bfa57da
add simple external predictions to apos
SvenKlaassen Jul 31, 2024
6f8d3f4
fix sensitivity_summary tests
SvenKlaassen Jul 31, 2024
e25663c
add more restrictions on ext predictions for apos
SvenKlaassen Aug 1, 2024
07e919a
finalize external predictions (docstrings and method names
SvenKlaassen Aug 1, 2024
7b8b330
add evaluations for external predictions in DoubleMLAPO
SvenKlaassen Aug 1, 2024
ae5b2c0
fix dimensions in gain_statistics
SvenKlaassen Aug 1, 2024
fef6987
fix formatting
SvenKlaassen Aug 1, 2024
5200ce2
update make_irm_data_discrete_treatments descriptions
SvenKlaassen Aug 1, 2024
0eb95ef
Merge branch 's-add-sensitivity-framework' into s-add-irm-apo
SvenKlaassen Aug 9, 2024
56ce8cc
Merge branch 'main' into s-add-irm-apo
SvenKlaassen Aug 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 23 additions & 17 deletions doubleml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from .plm.plr import DoubleMLPLR
from .plm.pliv import DoubleMLPLIV
from .irm.irm import DoubleMLIRM
from .irm.apo import DoubleMLAPO
from .irm.apos import DoubleMLAPOS
from .irm.iivm import DoubleMLIIVM
from .double_ml_data import DoubleMLData, DoubleMLClusterData
from .did.did import DoubleMLDID
Expand All @@ -18,22 +20,26 @@
from .utils.blp import DoubleMLBLP
from .utils.policytree import DoubleMLPolicyTree

__all__ = ['concat',
'DoubleMLFramework',
'DoubleMLPLR',
'DoubleMLPLIV',
'DoubleMLIRM',
'DoubleMLIIVM',
'DoubleMLData',
'DoubleMLClusterData',
'DoubleMLDID',
'DoubleMLDIDCS',
'DoubleMLPQ',
'DoubleMLQTE',
'DoubleMLLPQ',
'DoubleMLCVAR',
'DoubleMLBLP',
'DoubleMLPolicyTree',
'DoubleMLSSM']
__all__ = [
'concat',
'DoubleMLFramework',
'DoubleMLPLR',
'DoubleMLPLIV',
'DoubleMLIRM',
'DoubleMLAPO',
'DoubleMLAPOS',
'DoubleMLIIVM',
'DoubleMLData',
'DoubleMLClusterData',
'DoubleMLDID',
'DoubleMLDIDCS',
'DoubleMLPQ',
'DoubleMLQTE',
'DoubleMLLPQ',
'DoubleMLCVAR',
'DoubleMLBLP',
'DoubleMLPolicyTree',
'DoubleMLSSM'
]

__version__ = importlib.metadata.version('doubleml')
159 changes: 159 additions & 0 deletions doubleml/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1485,3 +1485,162 @@ def make_ssm_data(n_obs=8000, dim_x=100, theta=1, mar=True, return_type='DoubleM
return DoubleMLData(data, 'y', 'd', x_cols, 'z', None, 's')
else:
raise ValueError('Invalid return_type.')


def make_irm_data_discrete_treatments(n_obs=200, n_levels=3, linear=False, random_state=None, **kwargs):
"""
Generates data from a interactive regression (IRM) model with multiple treatment levels (based on an
underlying continous treatment).

The data generating process is defined as follows (similar to the Monte Carlo simulation used
in Sant'Anna and Zhao (2020)).

Let :math:`X= (X_1, X_2, X_3, X_4, X_5)^T \\sim \\mathcal{N}(0, \\Sigma)`, where :math:`\\Sigma` corresponds
to the identity matrix.
Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
where

.. math::

\\tilde{Z}_1 &= \\exp(0.5 \\cdot X_1)

\\tilde{Z}_2 &= 10 + X_2/(1 + \\exp(X_1))

\\tilde{Z}_3 &= (0.6 + X_1 \\cdot X_3 / 25)^3

\\tilde{Z}_4 &= (20 + X_2 + X_4)^2

\\tilde{Z}_5 &= X_5.

A continuous treatment :math:`D_{\\text{cont}}` is generated as

.. math::

D_{\\text{cont}} = \\xi (-Z_1 + 0.5 Z_2 - 0.25 Z_3 - 0.1 Z_4) + \\varepsilon_D,

where :math:`\\varepsilon_D \\sim \\mathcal{N}(0,1)` and :math:`\\xi=0.3`. The corresponding treatment
effect is defined as

.. math::

\\text{\\theta}(d) = 0.1 \\exp(d) + 10 \\sin(0.7 d) + 2 d - 0.2 d^2.

Based on the continous treatment, a discrete treatment :math:`D` is generated as with a baseline level of
:math:`D=0` and additional levels based on the quantiles of :math:`D_{\\text{cont}}`. The number of levels
is defined by :math:`n_{\\text{levels}}`. Each level is chosen to have the same probability of being selected.

The potential outcomes are defined as

.. math::

Y(0) &= 210 + 27.4 Z_1 + 13.7 (Z_2 + Z_3 + Z_4) + \\varepsilon_Y

Y(1) &= \\text{\\theta}(D_{\\text{cont}}) 1\\{D_{\\text{cont}} > 0\\} + Y(0),

where :math:`\\varepsilon_Y \\sim \\mathcal{N}(0,5)`. Further, the observed outcome is defined as

.. math::

Y = Y(1) 1\\{D > 0\\} + Y(0) 1\\{D = 0\\}.

The data is returned as a dictionary with the entries ``x``, ``y``, ``d`` and ``oracle_values``.

Parameters
----------
n_obs : int
The number of observations to simulate.
Default is ``200``.

n_levels : int
The number of treatment levels.
Default is ``3``.

linear : bool
Indicates whether the true underlying regression is linear.
Default is ``False``.

random_state : int
Random seed for reproducibility.
Default is ``42``.

Returns
-------
res_dict : dictionary
Dictionary with entries ``x``, ``y``, ``d`` and ``oracle_values``.

"""
if random_state is not None:
np.random.seed(random_state)
xi = kwargs.get('xi', 0.3)
c = kwargs.get('c', 0.0)
dim_x = kwargs.get('dim_x', 5)

if not isinstance(n_levels, int):
raise ValueError('n_levels must be an integer.')
if n_levels < 2:
raise ValueError('n_levels must be at least 2.')

# observed covariates
cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])

def f_reg(w):
res = 210 + 27.4*w[:, 0] + 13.7*(w[:, 1] + w[:, 2] + w[:, 3])
return res

def f_treatment(w, xi):
res = xi * (-w[:, 0] + 0.5*w[:, 1] - 0.25*w[:, 2] - 0.1*w[:, 3])
return res

def treatment_effect(d, scale=15):
return scale * (1 / (1 + np.exp(-d - 1.2 * np.cos(d)))) - 2

z_tilde_1 = np.exp(0.5 * x[:, 0])
z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
z_tilde_3 = (0.6 + x[:, 0] * x[:, 2]/25)**3
z_tilde_4 = (20 + x[:, 1] + x[:, 3])**2

z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, x[:, 4:]))
z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)

# error terms
var_eps_y = 5
eps_y = np.random.normal(loc=0, scale=np.sqrt(var_eps_y), size=n_obs)
var_eps_d = 1
eps_d = np.random.normal(loc=0, scale=np.sqrt(var_eps_d), size=n_obs)

if linear:
g = f_reg(x)
m = f_treatment(x, xi)
else:
assert not linear
g = f_reg(z)
m = f_treatment(z, xi)

cont_d = m + eps_d
level_bounds = np.quantile(cont_d, q=np.linspace(0, 1, n_levels + 1))
potential_level = sum([1.0 * (cont_d >= bound) for bound in level_bounds[1:-1]]) + 1
eta = np.random.uniform(0, 1, size=n_obs)
d = 1.0 * (eta >= 1/n_levels) * potential_level

ite = treatment_effect(cont_d)
y0 = g + eps_y
# only treated for d > 0 compared to the baseline
y = ite * (d > 0) + y0

oracle_values = {
'cont_d': cont_d,
'level_bounds': level_bounds,
'potential_level': potential_level,
'ite': ite,
'y0': y0,
}

resul_dict = {
'x': x,
'y': y,
'd': d,
'oracle_values': oracle_values
}

return resul_dict
43 changes: 6 additions & 37 deletions doubleml/double_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -1219,6 +1219,8 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None):
>>> ml_m = learner
>>> obj_dml_data = make_plr_CCDDHNR2018(n_obs=10, alpha=0.5)
>>> dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m)
>>> # simple sample splitting with two folds and without cross-fitting
>>> smpls = ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9])
>>> dml_plr_obj.set_sample_splitting(smpls)
>>> # sample splitting with two folds and cross-fitting
>>> smpls = [([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]),
Expand Down Expand Up @@ -1434,44 +1436,11 @@ def sensitivity_summary(self):
res : str
Summary for the sensitivity analysis.
"""
header = '================== Sensitivity Analysis ==================\n'
if self.sensitivity_params is None:
res = header + 'Apply sensitivity_analysis() to generate sensitivity_summary.'
if self._framework is None:
raise ValueError('Apply sensitivity_analysis() before sensitivity_summary.')
else:
sig_level = f'Significance Level: level={self.sensitivity_params["input"]["level"]}\n'
scenario_params = f'Sensitivity parameters: cf_y={self.sensitivity_params["input"]["cf_y"]}; ' \
f'cf_d={self.sensitivity_params["input"]["cf_d"]}, ' \
f'rho={self.sensitivity_params["input"]["rho"]}'

theta_and_ci_col_names = ['CI lower', 'theta lower', ' theta', 'theta upper', 'CI upper']
theta_and_ci = np.transpose(np.vstack((self.sensitivity_params['ci']['lower'],
self.sensitivity_params['theta']['lower'],
self.coef,
self.sensitivity_params['theta']['upper'],
self.sensitivity_params['ci']['upper'])))
df_theta_and_ci = pd.DataFrame(theta_and_ci,
columns=theta_and_ci_col_names,
index=self._dml_data.d_cols)
theta_and_ci_summary = str(df_theta_and_ci)

rvs_col_names = ['H_0', 'RV (%)', 'RVa (%)']
rvs = np.transpose(np.vstack((self.sensitivity_params['rv'],
self.sensitivity_params['rva']))) * 100

df_rvs = pd.DataFrame(np.column_stack((self.sensitivity_params["input"]["null_hypothesis"], rvs)),
columns=rvs_col_names,
index=self._dml_data.d_cols)
rvs_summary = str(df_rvs)

res = header + \
'\n------------------ Scenario ------------------\n' + \
sig_level + scenario_params + '\n' + \
'\n------------------ Bounds with CI ------------------\n' + \
theta_and_ci_summary + '\n' + \
'\n------------------ Robustness Values ------------------\n' + \
rvs_summary

return res
sensitivity_summary = self._framework.sensitivity_summary
return sensitivity_summary

def sensitivity_plot(self, idx_treatment=0, value='theta', rho=1.0, level=0.95, null_hypothesis=0.0,
include_scenario=True, benchmarks=None, fill=True, grid_bounds=(0.15, 0.15), grid_size=100):
Expand Down
Loading