From 1214a5fe59af43ba23346d19a39de12f62ef0917 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Tue, 18 Jun 2024 15:39:05 +0200 Subject: [PATCH 01/98] reorder init steps irm for readability --- doubleml/irm/irm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py index 82d22d1dd..eface6917 100644 --- a/doubleml/irm/irm.py +++ b/doubleml/irm/irm.py @@ -143,7 +143,6 @@ def __init__(self, ml_g_is_classifier = self._check_learner(ml_g, 'ml_g', regressor=True, classifier=True) _ = self._check_learner(ml_m, 'ml_m', regressor=False, classifier=True) self._learner = {'ml_g': ml_g, 'ml_m': ml_m} - self._normalize_ipw = normalize_ipw if ml_g_is_classifier: if obj_dml_data.binary_outcome: self._predict_method = {'ml_g': 'predict_proba', 'ml_m': 'predict_proba'} @@ -154,6 +153,7 @@ def __init__(self, self._predict_method = {'ml_g': 'predict', 'ml_m': 'predict_proba'} self._initialize_ml_nuisance_params() + self._normalize_ipw = normalize_ipw if not isinstance(self.normalize_ipw, bool): raise TypeError('Normalization indicator has to be boolean. ' + f'Object of type {str(type(self.normalize_ipw))} passed.') From 86cd871d6e0d6bfbcde6e4578ab0b4bc1144492a Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Tue, 18 Jun 2024 16:17:46 +0200 Subject: [PATCH 02/98] first version of discrete treatment dataset --- doubleml/datasets.py | 60 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/doubleml/datasets.py b/doubleml/datasets.py index b510f4b6d..cfbebdd99 100644 --- a/doubleml/datasets.py +++ b/doubleml/datasets.py @@ -1433,3 +1433,63 @@ def make_ssm_data(n_obs=8000, dim_x=100, theta=1, mar=True, return_type='DoubleM return DoubleMLData(data, 'y', 'd', x_cols, 'z', None, 's') else: raise ValueError('Invalid return_type.') + + +def make_irm_data_discrete_treatements(n_obs=200, p=10, support_size=5, n_levels=3, random_state=42): + """ + Generates data from a interactive regression (IRM) model with multiple treatment levels. + """ + + np.random.seed(random_state) + + # define continous treatment effect + def treatment_effect(x): + return np.exp(2 * x[:, 0]) + 3 * np.sin(4 * x[:, 0]) + + # Outcome support and coefficients + support_y = np.random.choice(np.arange(p), size=support_size, replace=False) + coefs_y = np.random.uniform(0, 1, size=support_size) + # treatment support and coefficients + support_d = support_y + range_coefs_d = [0.2, 0.3] + coefs_d = np.random.uniform(range_coefs_d[0], range_coefs_d[1], size=support_size) + + # noise + epsilon = np.random.uniform(-1, 1, size=n_obs) + + # Generate controls, covariates, treatments and outcomes + x = np.random.uniform(0, 1, size=(n_obs, p)) + # Heterogeneous treatment effects + te = treatment_effect(x) + + # set d to be a discrete number of levels + range_cont_d = support_size * range_coefs_d + # devide the range into n_levels + levels = np.linspace(range_cont_d[0], range_cont_d[1], n_levels - 1) + + # define a discrete treatment version (with a baseline probability) + eta = np.random.uniform(0, 1, size=n_obs) + potential_level = sum([1.0 * (np.dot(x[:, support_d], coefs_d) >= level) for level in levels]) + 1 + d = 1.0 * (eta >= 1/n_levels) * potential_level + + # only treated for d > 0 compared to the baseline + y = te * (d > 0) + np.dot(x[:, support_y], coefs_y) + epsilon + + oracle_values = { + 'levels': levels, + 'support_y': support_y, + 'coefs_y': coefs_y, + 'support_d': support_d, + 'coefs_d': coefs_d, + 'te': te, + 'treatment_effect': treatment_effect + } + + resul_dict = { + 'x': x, + 'y': y, + 'd': d, + 'oracle_values': oracle_values + } + + return resul_dict From 8728413fe54211947523cb0850defc1ecdfd0128 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Tue, 18 Jun 2024 16:18:03 +0200 Subject: [PATCH 03/98] first apo model --- doubleml/__init__.py | 2 + doubleml/irm/apo.py | 157 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+) create mode 100644 doubleml/irm/apo.py diff --git a/doubleml/__init__.py b/doubleml/__init__.py index 700991c79..69e064a00 100644 --- a/doubleml/__init__.py +++ b/doubleml/__init__.py @@ -5,6 +5,7 @@ from .plm.plr import DoubleMLPLR from .plm.pliv import DoubleMLPLIV from .irm.irm import DoubleMLIRM +from .irm.apo import DoubleMLAPO from .irm.iivm import DoubleMLIIVM from .double_ml_data import DoubleMLData, DoubleMLClusterData from .did.did import DoubleMLDID @@ -23,6 +24,7 @@ 'DoubleMLPLR', 'DoubleMLPLIV', 'DoubleMLIRM', + 'DoubleMLAPO', 'DoubleMLIIVM', 'DoubleMLData', 'DoubleMLClusterData', diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py new file mode 100644 index 000000000..ce1dc3ff3 --- /dev/null +++ b/doubleml/irm/apo.py @@ -0,0 +1,157 @@ +import numpy as np + +from ..double_ml import DoubleML + +from ..double_ml_score_mixins import LinearScoreMixin +from ..double_ml_data import DoubleMLData + +from ..utils._checks import _check_score, _check_trimming, _check_weights + + +class DoubleMLAPO(LinearScoreMixin, DoubleML): + """Double machine learning average potential outcomes for interactive regression models + + Parameters + """ + def __init__(self, + obj_dml_data, + ml_g, + ml_m, + treatment_level, + n_folds=5, + n_rep=1, + score='APO', + weights=None, + normalize_ipw=False, + trimming_rule='truncate', + trimming_threshold=1e-2, + draw_sample_splitting=True): + super().__init__(obj_dml_data, + n_folds, + n_rep, + score, + draw_sample_splitting) + + # set up treatment level and check data + self._treatment_level = treatment_level + self._treated = self._dml_data.d == self._treatment_level + + self._check_data(self._dml_data) + valid_scores = ['APO'] + _check_score(self.score, valid_scores, allow_callable=False) + + # set stratication for resampling + self._strata = self._dml_data.d + if draw_sample_splitting: + self.draw_sample_splitting() + + ml_g_is_classifier = self._check_learner(ml_g, 'ml_g', regressor=True, classifier=True) + _ = self._check_learner(ml_m, 'ml_m', regressor=False, classifier=True) + self._learner = {'ml_g': ml_g, 'ml_m': ml_m} + self._normalize_ipw = normalize_ipw + if ml_g_is_classifier: + if obj_dml_data.binary_outcome: + self._predict_method = {'ml_g': 'predict_proba', 'ml_m': 'predict_proba'} + else: + raise ValueError(f'The ml_g learner {str(ml_g)} was identified as classifier ' + 'but the outcome variable is not binary with values 0 and 1.') + else: + self._predict_method = {'ml_g': 'predict', 'ml_m': 'predict_proba'} + self._initialize_ml_nuisance_params() + + self._normalize_ipw = normalize_ipw + if not isinstance(self.normalize_ipw, bool): + raise TypeError('Normalization indicator has to be boolean. ' + + f'Object of type {str(type(self.normalize_ipw))} passed.') + self._trimming_rule = trimming_rule + self._trimming_threshold = trimming_threshold + _check_trimming(self._trimming_rule, self._trimming_threshold) + + self._sensitivity_implemented = True + self._external_predictions_implemented = True + + # ATE weights are the standard case + _check_weights(weights, score="ATE", n_obs=obj_dml_data.n_obs, n_rep=self.n_rep) + self._initialize_weights(weights) + + return self + + @property + def treatment_level(self): + """ + Chosen treatment level for average potential outcomes. + """ + return self._treatment_level + + @property + def treated(self): + """ + Indicator for treated observations (with the corresponding treatment level). + """ + return self._treated + + @property + def normalize_ipw(self): + """ + Indicates whether the inverse probability weights are normalized. + """ + return self._normalize_ipw + + @property + def trimming_rule(self): + """ + Specifies the used trimming rule. + """ + return self._trimming_rule + + @property + def trimming_threshold(self): + """ + Specifies the used trimming threshold. + """ + return self._trimming_threshold + + @property + def weights(self): + """ + Specifies the weights for a weighted average potential outcome. + """ + return self._weights + + def _initialize_ml_nuisance_params(self): + valid_learner = ['ml_g', 'ml_m'] + self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} + for learner in valid_learner} + + def _nuisance_est(self): + # Estimate nuisance parameters + # This is a placeholder for the estimation logic + print("Estimating nuisance parameters...") + + def _nuisance_tuning(self): + # Tune nuisance parameters + # This is a placeholder for tuning logic + print("Tuning nuisance parameters...") + + def _sensitivity_element_est(self): + # Estimate sensitivity elements + # This is a placeholder for sensitivity estimation logic + print("Estimating sensitivity elements...") + + def _check_data(self, obj_dml_data): + if not isinstance(obj_dml_data, DoubleMLData): + raise TypeError('The data must be of DoubleMLData type. ' + f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.') + if obj_dml_data.z_cols is not None: + raise ValueError('Incompatible data. ' + + ' and '.join(obj_dml_data.z_cols) + + ' have been set as instrumental variable(s).') + + # check if treatment level is valid + if np.sum(self.treated) < 5: + raise ValueError( + 'The number of treated observations is less than 5. ' + + f'Number of treated observations: {np.sum(self.treated)} for treatment level {self.treatment_level}.' + ) + + return From 545de586aeb0349386754bbf00ffd1cee6b6e97f Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Tue, 18 Jun 2024 16:18:09 +0200 Subject: [PATCH 04/98] Create test_apo_exceptions.py --- doubleml/irm/tests/test_apo_exceptions.py | 122 ++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 doubleml/irm/tests/test_apo_exceptions.py diff --git a/doubleml/irm/tests/test_apo_exceptions.py b/doubleml/irm/tests/test_apo_exceptions.py new file mode 100644 index 000000000..cf5227957 --- /dev/null +++ b/doubleml/irm/tests/test_apo_exceptions.py @@ -0,0 +1,122 @@ +import pytest +import pandas as pd +import numpy as np + +from doubleml import DoubleMLAPO, DoubleMLData +from doubleml.datasets import make_irm_data_discrete_treatements, make_iivm_data + +from sklearn.linear_model import Lasso, LogisticRegression + +n = 100 +data_apo = make_irm_data_discrete_treatements(n_obs=n) +df_apo = pd.DataFrame(np.column_stack((data_apo['y'], data_apo['d'], data_apo['x'])), + columns=['y', 'd'] + ['x' + str(i) for i in range(data_apo['x'].shape[1])]) + +dml_data = DoubleMLData(df_apo, 'y', 'd') + +ml_g = Lasso() +ml_m = LogisticRegression() + + +@pytest.mark.ci +def test_apo_exception_data(): + msg = 'The data must be of DoubleMLData or DoubleMLClusterData type.' + with pytest.raises(TypeError, match=msg): + _ = DoubleMLAPO(pd.DataFrame(), ml_g, ml_m, treatment_level=0) + + dml_data_z = make_iivm_data() + msg = r'Incompatible data. z have been set as instrumental variable\(s\).' + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPO(dml_data_z, ml_g, ml_m, treatment_level=0) + + msg = 'The number of treated observations is less than 5. Number of treated observations: 0 for treatment level 1.1.' + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=1.1) + + +@pytest.mark.ci +def test_apo_exception_scores(): + msg = 'Invalid score MAR. Valid score APO.' + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, score='MAR') + + +@pytest.mark.ci +def test_apo_exception_trimming_rule(): + msg = 'Invalid trimming_rule discard. Valid trimming_rule truncate.' + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, trimming_rule='discard') + + # check the trimming_threshold exceptions + msg = "trimming_threshold has to be a float. Object of type passed." + with pytest.raises(TypeError, match=msg): + _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, + trimming_rule='truncate', trimming_threshold="0.1") + + msg = 'Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5.' + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, + trimming_rule='truncate', trimming_threshold=0.6) + + +@pytest.mark.ci +def test_apo_exception_ipw_normalization(): + msg = "Normalization indicator has to be boolean. Object of type passed." + with pytest.raises(TypeError, match=msg): + _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, normalize_ipw=1) + + +@pytest.mark.ci +def test_apo_exception_weights(): + msg = "weights must be a numpy array or dictionary. weights of type was passed." + with pytest.raises(TypeError, match=msg): + _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, weights=1) + msg = r"weights must have keys \['weights', 'weights_bar'\]. keys dict_keys\(\['d'\]\) were passed." + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, weights={'d': [1, 2, 3]}) + + # shape checks + msg = rf"weights must have shape \({n},\). weights of shape \(1,\) was passed." + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, weights=np.ones(1)) + msg = rf"weights must have shape \({n},\). weights of shape \({n}, 2\) was passed." + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, weights=np.ones((n, 2))) + + msg = rf"weights must have shape \({n},\). weights of shape \(1,\) was passed." + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, + weights={'weights': np.ones(1), 'weights_bar': np.ones(1)}) + msg = rf"weights must have shape \({n},\). weights of shape \({n}, 2\) was passed." + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, + weights={'weights': np.ones((n, 2)), 'weights_bar': np.ones((n, 2))}) + msg = rf"weights_bar must have shape \({n}, 1\). weights_bar of shape \({n}, 2\) was passed." + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, + weights={'weights': np.ones(n), 'weights_bar': np.ones((n, 2))}) + + # value checks + msg = "All weights values must be greater or equal 0." + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, + weights=-1*np.ones(n,)) + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, + weights={'weights': -1*np.ones(n,), 'weights_bar': np.ones((n, 1))}) + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, + weights={'weights': np.ones(n,), 'weights_bar': -1*np.ones((n, 1))}) + + msg = "At least one weight must be non-zero." + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, + weights=np.zeros((dml_data.d.shape[0], ))) + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, + weights={'weights': np.zeros((dml_data.d.shape[0], )), + 'weights_bar': np.ones((dml_data.d.shape[0], 1))}) + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, + weights={'weights': np.ones((dml_data.d.shape[0], )), + 'weights_bar': np.zeros((dml_data.d.shape[0], 1))}) From 4eaaa526f9aaf7f65aea2e925c1aac06959f6220 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Tue, 18 Jun 2024 17:03:38 +0200 Subject: [PATCH 05/98] update irm sensitivity atte --- doubleml/irm/irm.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py index eface6917..0fa6ec749 100644 --- a/doubleml/irm/irm.py +++ b/doubleml/irm/irm.py @@ -376,11 +376,7 @@ def _sensitivity_element_est(self, preds): m_hat = preds['predictions']['ml_m'] g_hat0 = preds['predictions']['ml_g0'] - if self.score == 'ATE': - g_hat1 = preds['predictions']['ml_g1'] - else: - assert self.score == 'ATTE' - g_hat1 = y + g_hat1 = preds['predictions']['ml_g1'] # use weights make this extendable weights, weights_bar = self._get_weights(m_hat=m_hat) From 825aa0c387a1b50fbd1e1d137485c8f6bb866120 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Tue, 18 Jun 2024 17:11:42 +0200 Subject: [PATCH 06/98] Update _utils_irm_manual.py --- doubleml/irm/tests/_utils_irm_manual.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/doubleml/irm/tests/_utils_irm_manual.py b/doubleml/irm/tests/_utils_irm_manual.py index c249c9cf9..5fbdd174c 100644 --- a/doubleml/irm/tests/_utils_irm_manual.py +++ b/doubleml/irm/tests/_utils_irm_manual.py @@ -248,11 +248,7 @@ def fit_sensitivity_elements_irm(y, d, all_coef, predictions, score, n_rep): m_hat = predictions['ml_m'][:, i_rep, 0] g_hat0 = predictions['ml_g0'][:, i_rep, 0] - if score == 'ATE': - g_hat1 = predictions['ml_g1'][:, i_rep, 0] - else: - assert score == 'ATTE' - g_hat1 = y + g_hat1 = predictions['ml_g1'][:, i_rep, 0] if score == 'ATE': weights = np.ones_like(d) From 7c3f1c1553bca0b629afed637c7afe5915817a5f Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Tue, 18 Jun 2024 17:14:04 +0200 Subject: [PATCH 07/98] add estimation and sensitivity to apo model --- doubleml/irm/apo.py | 175 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 161 insertions(+), 14 deletions(-) diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py index ce1dc3ff3..cfd8d60f1 100644 --- a/doubleml/irm/apo.py +++ b/doubleml/irm/apo.py @@ -1,11 +1,17 @@ import numpy as np +from sklearn.utils import check_X_y +from sklearn.utils.multiclass import type_of_target + from ..double_ml import DoubleML from ..double_ml_score_mixins import LinearScoreMixin from ..double_ml_data import DoubleMLData -from ..utils._checks import _check_score, _check_trimming, _check_weights +from ..utils._estimation import _dml_cv_predict, _get_cond_smpls, _cond_targets, _trimm, \ + _normalize_ipw +from ..utils._checks import _check_score, _check_trimming, _check_weights, _check_finite_predictions, \ + _check_is_propensity class DoubleMLAPO(LinearScoreMixin, DoubleML): @@ -48,7 +54,6 @@ def __init__(self, ml_g_is_classifier = self._check_learner(ml_g, 'ml_g', regressor=True, classifier=True) _ = self._check_learner(ml_m, 'ml_m', regressor=False, classifier=True) self._learner = {'ml_g': ml_g, 'ml_m': ml_m} - self._normalize_ipw = normalize_ipw if ml_g_is_classifier: if obj_dml_data.binary_outcome: self._predict_method = {'ml_g': 'predict_proba', 'ml_m': 'predict_proba'} @@ -74,8 +79,6 @@ def __init__(self, _check_weights(weights, score="ATE", n_obs=obj_dml_data.n_obs, n_rep=self.n_rep) self._initialize_weights(weights) - return self - @property def treatment_level(self): """ @@ -119,25 +122,169 @@ def weights(self): return self._weights def _initialize_ml_nuisance_params(self): - valid_learner = ['ml_g', 'ml_m'] + valid_learner = ['ml_g0', 'ml_g1', 'ml_m'] self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in valid_learner} - def _nuisance_est(self): - # Estimate nuisance parameters - # This is a placeholder for the estimation logic - print("Estimating nuisance parameters...") + def _initialize_weights(self, weights): + if weights is None: + weights = np.ones(self._dml_data.n_obs) + if isinstance(weights, np.ndarray): + self._weights = {'weights': weights} + else: + assert isinstance(weights, dict) + self._weights = weights + + def _get_weights(self, m_hat=None): + # standard case for APO/ATE + weights = self._weights['weights'] + if 'weights_bar' not in self._weights.keys(): + weights_bar = self._weights['weights'] + else: + weights_bar = self._weights['weights_bar'][:, self._i_rep] + + return weights, weights_bar + + def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False): + x, y = check_X_y(self._dml_data.x, self._dml_data.y, + force_all_finite=False) + # use the treated indicator to get the correct sample splits + x, d = check_X_y(x, self.treated, + force_all_finite=False) + + # get train indices for d == treatment_level + smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d) + g0_external = external_predictions['ml_g0'] is not None + g1_external = external_predictions['ml_g1'] is not None + m_external = external_predictions['ml_m'] is not None + + # nuisance g (g0 only relevant for sensitivity analysis) + if g0_external: + # use external predictions + g_hat0 = {'preds': external_predictions['ml_g0'], + 'targets': None, + 'models': None} + else: + g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv, + est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'], + return_models=return_models) + _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls) + g_hat0['targets'] = _cond_targets(g_hat0['targets'], cond_sample=(d == 0)) + + if self._dml_data.binary_outcome: + binary_preds = (type_of_target(g_hat0['preds']) == 'binary') + zero_one_preds = np.all((np.power(g_hat0['preds'], 2) - g_hat0['preds']) == 0) + if binary_preds & zero_one_preds: + raise ValueError(f'For the binary outcome variable {self._dml_data.y_col}, ' + f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also ' + 'observed to be binary with values 0 and 1. Make sure that for classifiers ' + 'probabilities and not labels are predicted.') + + if g1_external: + # use external predictions + g_hat1 = {'preds': external_predictions['ml_g1'], + 'targets': None, + 'models': None} + else: + g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv, + est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'], + return_models=return_models) + _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls) + # adjust target values to consider only compatible subsamples + g_hat1['targets'] = _cond_targets(g_hat1['targets'], cond_sample=(d == 1)) + + if self._dml_data.binary_outcome: + binary_preds = (type_of_target(g_hat1['preds']) == 'binary') + zero_one_preds = np.all((np.power(g_hat1['preds'], 2) - g_hat1['preds']) == 0) + if binary_preds & zero_one_preds: + raise ValueError(f'For the binary outcome variable {self._dml_data.y_col}, ' + f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also ' + 'observed to be binary with values 0 and 1. Make sure that for classifiers ' + 'probabilities and not labels are predicted.') + + # nuisance m + if m_external: + # use external predictions + m_hat = {'preds': external_predictions['ml_m'], + 'targets': None, + 'models': None} + else: + m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv, + est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'], + return_models=return_models) + _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls) + _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12) + + # also trimm external predictions + m_hat['preds'] = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold) + + psi_a, psi_b = self._score_elements(y, d, g_hat0['preds'], g_hat1['preds'], + m_hat['preds'], smpls) + psi_elements = {'psi_a': psi_a, + 'psi_b': psi_b} + + preds = {'predictions': {'ml_g0': g_hat0['preds'], + 'ml_g1': g_hat1['preds'], + 'ml_m': m_hat['preds']}, + 'targets': {'ml_g0': g_hat0['targets'], + 'ml_g1': g_hat1['targets'], + 'ml_m': m_hat['targets']}, + 'models': {'ml_g0': g_hat0['models'], + 'ml_g1': g_hat1['models'], + 'ml_m': m_hat['models']} + } + return psi_elements, preds + + def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls): + m_hat_adj = np.full_like(m_hat, np.nan, dtype='float64') + if self.normalize_ipw: + m_hat_adj = _normalize_ipw(m_hat, d) + else: + m_hat_adj = m_hat + + u_hat = y - g_hat1 + weights, weights_bar = self._get_weights(m_hat=m_hat_adj) + psi_b = weights * g_hat1 + weights_bar * np.divide(np.multiply(d, u_hat), m_hat_adj) + psi_a = np.full_like(m_hat_adj, -1.0) + + return psi_a, psi_b + + def _sensitivity_element_est(self, preds): + # set elments for readability + y = self._dml_data.y + d = self.treated + + m_hat = preds['predictions']['ml_m'] + g_hat0 = preds['predictions']['ml_g0'] + g_hat1 = preds['predictions']['ml_g1'] + + weights, weights_bar = self._get_weights(m_hat=m_hat) + + sigma2_score_element = np.square(y - np.multiply(d, g_hat1) - np.multiply(1.0-d, g_hat0)) + sigma2 = np.mean(sigma2_score_element) + psi_sigma2 = sigma2_score_element - sigma2 + + # calc m(W,alpha) and Riesz representer + m_alpha = np.multiply(weights, np.multiply(weights_bar, np.divide(1.0, m_hat))) + rr = np.multiply(weights_bar, np.divide(d, m_hat)) + + nu2_score_element = np.multiply(2.0, m_alpha) - np.square(rr) + nu2 = np.mean(nu2_score_element) + psi_nu2 = nu2_score_element - nu2 + + element_dict = {'sigma2': sigma2, + 'nu2': nu2, + 'psi_sigma2': psi_sigma2, + 'psi_nu2': psi_nu2, + 'riesz_rep': rr, + } + return element_dict def _nuisance_tuning(self): # Tune nuisance parameters # This is a placeholder for tuning logic print("Tuning nuisance parameters...") - def _sensitivity_element_est(self): - # Estimate sensitivity elements - # This is a placeholder for sensitivity estimation logic - print("Estimating sensitivity elements...") - def _check_data(self, obj_dml_data): if not isinstance(obj_dml_data, DoubleMLData): raise TypeError('The data must be of DoubleMLData type. ' From 44d3100eefc9b6ca63ff9f5e045cc5cf55fb3932 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Tue, 18 Jun 2024 18:19:25 +0200 Subject: [PATCH 08/98] rename estimation --- doubleml/irm/apo.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py index cfd8d60f1..4c6298b9a 100644 --- a/doubleml/irm/apo.py +++ b/doubleml/irm/apo.py @@ -149,11 +149,11 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) # use the treated indicator to get the correct sample splits - x, d = check_X_y(x, self.treated, - force_all_finite=False) + x, treated = check_X_y(x, self.treated, + force_all_finite=False) # get train indices for d == treatment_level - smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d) + smpls_d0, smpls_d1 = _get_cond_smpls(smpls, treated) g0_external = external_predictions['ml_g0'] is not None g1_external = external_predictions['ml_g1'] is not None m_external = external_predictions['ml_m'] is not None @@ -169,7 +169,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'], return_models=return_models) _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls) - g_hat0['targets'] = _cond_targets(g_hat0['targets'], cond_sample=(d == 0)) + g_hat0['targets'] = _cond_targets(g_hat0['targets'], cond_sample=(treated == 0)) if self._dml_data.binary_outcome: binary_preds = (type_of_target(g_hat0['preds']) == 'binary') @@ -191,7 +191,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa return_models=return_models) _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls) # adjust target values to consider only compatible subsamples - g_hat1['targets'] = _cond_targets(g_hat1['targets'], cond_sample=(d == 1)) + g_hat1['targets'] = _cond_targets(g_hat1['targets'], cond_sample=(treated == 1)) if self._dml_data.binary_outcome: binary_preds = (type_of_target(g_hat1['preds']) == 'binary') @@ -209,7 +209,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa 'targets': None, 'models': None} else: - m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv, + m_hat = _dml_cv_predict(self._learner['ml_m'], x, treated, smpls=smpls, n_jobs=n_jobs_cv, est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'], return_models=return_models) _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls) @@ -218,7 +218,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa # also trimm external predictions m_hat['preds'] = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold) - psi_a, psi_b = self._score_elements(y, d, g_hat0['preds'], g_hat1['preds'], + psi_a, psi_b = self._score_elements(y, treated, g_hat0['preds'], g_hat1['preds'], m_hat['preds'], smpls) psi_elements = {'psi_a': psi_a, 'psi_b': psi_b} @@ -235,16 +235,16 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa } return psi_elements, preds - def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls): + def _score_elements(self, y, treated, g_hat0, g_hat1, m_hat, smpls): m_hat_adj = np.full_like(m_hat, np.nan, dtype='float64') if self.normalize_ipw: - m_hat_adj = _normalize_ipw(m_hat, d) + m_hat_adj = _normalize_ipw(m_hat, treated) else: m_hat_adj = m_hat u_hat = y - g_hat1 weights, weights_bar = self._get_weights(m_hat=m_hat_adj) - psi_b = weights * g_hat1 + weights_bar * np.divide(np.multiply(d, u_hat), m_hat_adj) + psi_b = weights * g_hat1 + weights_bar * np.divide(np.multiply(treated, u_hat), m_hat_adj) psi_a = np.full_like(m_hat_adj, -1.0) return psi_a, psi_b @@ -252,7 +252,7 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls): def _sensitivity_element_est(self, preds): # set elments for readability y = self._dml_data.y - d = self.treated + treated = self.treated m_hat = preds['predictions']['ml_m'] g_hat0 = preds['predictions']['ml_g0'] @@ -260,13 +260,13 @@ def _sensitivity_element_est(self, preds): weights, weights_bar = self._get_weights(m_hat=m_hat) - sigma2_score_element = np.square(y - np.multiply(d, g_hat1) - np.multiply(1.0-d, g_hat0)) + sigma2_score_element = np.square(y - np.multiply(treated, g_hat1) - np.multiply(1.0-treated, g_hat0)) sigma2 = np.mean(sigma2_score_element) psi_sigma2 = sigma2_score_element - sigma2 # calc m(W,alpha) and Riesz representer m_alpha = np.multiply(weights, np.multiply(weights_bar, np.divide(1.0, m_hat))) - rr = np.multiply(weights_bar, np.divide(d, m_hat)) + rr = np.multiply(weights_bar, np.divide(treated, m_hat)) nu2_score_element = np.multiply(2.0, m_alpha) - np.square(rr) nu2 = np.mean(nu2_score_element) From 592e6420a730059b5f25d998b0d150afb6447545 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Tue, 18 Jun 2024 18:19:31 +0200 Subject: [PATCH 09/98] Create _utils_apo_manual.py --- doubleml/irm/tests/_utils_apo_manual.py | 125 ++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 doubleml/irm/tests/_utils_apo_manual.py diff --git a/doubleml/irm/tests/_utils_apo_manual.py b/doubleml/irm/tests/_utils_apo_manual.py new file mode 100644 index 000000000..329f0830c --- /dev/null +++ b/doubleml/irm/tests/_utils_apo_manual.py @@ -0,0 +1,125 @@ +import numpy as np +from sklearn.base import clone, is_classifier + +from ...tests._utils_boot import boot_manual, draw_weights +from ...tests._utils import fit_predict, fit_predict_proba, tune_grid_search + +from ...utils._estimation import _normalize_ipw +from ...utils._checks import _check_is_propensity + + +def fit_apo(y, x, d, + learner_g, learner_m, treatment_level, all_smpls, score, + n_rep=1, g0_params=None, g1_params=None, m_params=None, + normalize_ipw=False, trimming_threshold=1e-2): + n_obs = len(y) + treated = (d == treatment_level) + + thetas = np.zeros(n_rep) + ses = np.zeros(n_rep) + all_g_hat0 = list() + all_g_hat1 = list() + all_m_hat = list() + + for i_rep in range(n_rep): + smpls = all_smpls[i_rep] + g_hat0, g_hat1, m_hat = fit_nuisance_apo(y, x, d, treated, + learner_g, learner_m, smpls, score, + g0_params=g0_params, g1_params=g1_params, m_params=m_params, + trimming_threshold=trimming_threshold) + + all_g_hat0.append(g_hat0) + all_g_hat1.append(g_hat1) + all_m_hat.append(m_hat) + + thetas[i_rep], ses[i_rep] = apo_dml2(y, x, d, treated, + g_hat0, g_hat1, m_hat, + smpls, score, normalize_ipw) + + theta = np.median(thetas) + se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs) + + res = {'theta': theta, 'se': se, + 'thetas': thetas, 'ses': ses, + 'all_g_hat0': all_g_hat0, 'all_g_hat1': all_g_hat1, 'all_m_hat': all_m_hat} + + return res + + +def fit_nuisance_apo(y, x, d, treated, + learner_g, learner_m, smpls, score, + g0_params=None, g1_params=None, m_params=None, + trimming_threshold=1e-12): + ml_g0 = clone(learner_g) + ml_g1 = clone(learner_g) + + train_cond0 = np.where(treated == 0)[0] + if is_classifier(learner_g): + g_hat0_list = fit_predict_proba(y, x, ml_g0, g0_params, smpls, + train_cond=train_cond0) + else: + g_hat0_list = fit_predict(y, x, ml_g0, g0_params, smpls, + train_cond=train_cond0) + + train_cond1 = np.where(treated == 1)[0] + if is_classifier(learner_g): + g_hat1_list = fit_predict_proba(y, x, ml_g1, g1_params, smpls, + train_cond=train_cond1) + else: + g_hat1_list = fit_predict(y, x, ml_g1, g1_params, smpls, + train_cond=train_cond1) + + ml_m = clone(learner_m) + m_hat_list = fit_predict_proba(treated, x, ml_m, m_params, smpls, + trimming_threshold=trimming_threshold) + + return g_hat0_list, g_hat1_list, m_hat_list + + +def compute_residuals(y, g_hat0_list, g_hat1_list, m_hat_list, smpls): + u_hat0 = np.full_like(y, np.nan, dtype='float64') + u_hat1 = np.full_like(y, np.nan, dtype='float64') + g_hat0 = np.full_like(y, np.nan, dtype='float64') + g_hat1 = np.full_like(y, np.nan, dtype='float64') + m_hat = np.full_like(y, np.nan, dtype='float64') + for idx, (_, test_index) in enumerate(smpls): + u_hat0[test_index] = y[test_index] - g_hat0_list[idx] + u_hat1[test_index] = y[test_index] - g_hat1_list[idx] + g_hat0[test_index] = g_hat0_list[idx] + g_hat1[test_index] = g_hat1_list[idx] + m_hat[test_index] = m_hat_list[idx] + + _check_is_propensity(m_hat, 'learner_m', 'ml_m', smpls, eps=1e-12) + return u_hat0, u_hat1, g_hat0, g_hat1, m_hat + + +def apo_dml2(y, x, d, treated, g_hat0_list, g_hat1_list, m_hat_list, smpls, score, normalize_ipw): + n_obs = len(y) + u_hat0, u_hat1, g_hat0, g_hat1, m_hat = compute_residuals( + y, g_hat0_list, g_hat1_list, m_hat_list, smpls + ) + + if normalize_ipw: + m_hat_adj = _normalize_ipw(m_hat, treated) + else: + m_hat_adj = m_hat + + theta_hat = apo_orth(g_hat0, g_hat1, m_hat_adj, + u_hat0, u_hat1, treated, score) + + se = np.sqrt(var_apo(theta_hat, g_hat0, g_hat1, + m_hat_adj, + u_hat0, u_hat1, + treated, score, n_obs)) + + return theta_hat, se + + +def apo_orth(g_hat0, g_hat1, m_hat, p_hat, u_hat0, u_hat1, treated, score): + res = np.mean(g_hat1 + np.divide(np.multiply(treated, u_hat1), m_hat)) + return res + + +def var_apo(theta, g_hat0, g_hat1, m_hat, p_hat, u_hat0, u_hat1, treated, score, n_obs): + var = 1/n_obs * np.mean(np.power(g_hat1 + np.divide(np.multiply(treated, u_hat1), m_hat), 2)) + return var From fbc9e717b875d36d4447461d6114f56e8369fe0c Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Tue, 18 Jun 2024 19:27:46 +0200 Subject: [PATCH 10/98] first manual apo test --- doubleml/irm/tests/_utils_apo_manual.py | 4 +- doubleml/irm/tests/test_apo.py | 103 ++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 doubleml/irm/tests/test_apo.py diff --git a/doubleml/irm/tests/_utils_apo_manual.py b/doubleml/irm/tests/_utils_apo_manual.py index 329f0830c..c7a9b4321 100644 --- a/doubleml/irm/tests/_utils_apo_manual.py +++ b/doubleml/irm/tests/_utils_apo_manual.py @@ -115,11 +115,11 @@ def apo_dml2(y, x, d, treated, g_hat0_list, g_hat1_list, m_hat_list, smpls, scor return theta_hat, se -def apo_orth(g_hat0, g_hat1, m_hat, p_hat, u_hat0, u_hat1, treated, score): +def apo_orth(g_hat0, g_hat1, m_hat, u_hat0, u_hat1, treated, score): res = np.mean(g_hat1 + np.divide(np.multiply(treated, u_hat1), m_hat)) return res -def var_apo(theta, g_hat0, g_hat1, m_hat, p_hat, u_hat0, u_hat1, treated, score, n_obs): +def var_apo(theta, g_hat0, g_hat1, m_hat, u_hat0, u_hat1, treated, score, n_obs): var = 1/n_obs * np.mean(np.power(g_hat1 + np.divide(np.multiply(treated, u_hat1), m_hat), 2)) return var diff --git a/doubleml/irm/tests/test_apo.py b/doubleml/irm/tests/test_apo.py new file mode 100644 index 000000000..939310d99 --- /dev/null +++ b/doubleml/irm/tests/test_apo.py @@ -0,0 +1,103 @@ +import numpy as np +import pandas as pd +import pytest +import math + +from sklearn.base import clone + +from sklearn.linear_model import LogisticRegression, LinearRegression +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor + +import doubleml as dml +from doubleml.datasets import make_irm_data_discrete_treatements +from doubleml.utils.resampling import DoubleMLResampling + +from ...tests._utils import draw_smpls +from ._utils_apo_manual import fit_apo + + +@pytest.fixture(scope='module', + params=[[LinearRegression(), + LogisticRegression(solver='lbfgs', max_iter=250)], + [RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42), + RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42)]]) +def learner(request): + return request.param + + +@pytest.fixture(scope='module', + params=[False, True]) +def normalize_ipw(request): + return request.param + + +@pytest.fixture(scope='module', + params=[0.2, 0.15]) +def trimming_threshold(request): + return request.param + + +@pytest.fixture(scope='module') +def dml_apo_fixture(generate_data_irm, learner, normalize_ipw, trimming_threshold): + boot_methods = ['normal'] + n_folds = 2 + n_rep_boot = 499 + + + # Set machine learning methods for m & g + ml_g = clone(learner[0]) + ml_m = clone(learner[1]) + + np.random.seed(3141) + n_obs = 100 + data_apo = make_irm_data_discrete_treatements(n_obs=n_obs) + y = data_apo['y'] + x = data_apo['x'] + d = data_apo['d'] + df_apo = pd.DataFrame( + np.column_stack((y, d, x)), + columns=['y', 'd'] + ['x' + str(i) for i in range(data_apo['x'].shape[1])] + ) + + dml_data = dml.DoubleMLData(df_apo, 'y', 'd') + all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d) + + np.random.seed(3141) + dml_obj = dml.DoubleMLAPO(dml_data, + ml_g, ml_m, + treatment_level=0, + n_folds=n_folds, + normalize_ipw=normalize_ipw, + draw_sample_splitting=False, + trimming_threshold=trimming_threshold) + + # synchronize the sample splitting + dml_obj.set_sample_splitting(all_smpls=all_smpls) + dml_obj.fit() + + np.random.seed(3141) + res_manual = fit_apo(y, x, d, + clone(learner[0]), clone(learner[1]), + treatment_level=0, + all_smpls=all_smpls, + score='APO', + normalize_ipw=normalize_ipw, + trimming_threshold=trimming_threshold) + + res_dict = {'coef': dml_obj.coef, + 'coef_manual': res_manual['theta'], + 'coef_ext': dml_obj.coef_extern, + 'se': dml_obj.se, + 'se_manual': res_manual['se']} + + return res_dict + + +@pytest.mark.ci +def test_dml_apo_coef(dml_apo_fixture): + assert math.isclose(dml_apo_fixture['coef'][0], + dml_apo_fixture['coef_manual'], + rel_tol=1e-9, abs_tol=1e-4) + assert math.isclose(dml_apo_fixture['coef'][0], + dml_apo_fixture['coef_ext'][0], + rel_tol=1e-9, abs_tol=1e-4) \ No newline at end of file From fab0ae2349414b8f096583ad18666e3ef616dfea Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 20 Jun 2024 11:54:21 +0200 Subject: [PATCH 11/98] add external prediction test and se unit test --- doubleml/irm/apo.py | 6 ++-- doubleml/irm/tests/_utils_apo_manual.py | 2 +- doubleml/irm/tests/test_apo.py | 40 ++++++++++++++++++++++--- 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py index 4c6298b9a..d841475af 100644 --- a/doubleml/irm/apo.py +++ b/doubleml/irm/apo.py @@ -135,7 +135,7 @@ def _initialize_weights(self, weights): assert isinstance(weights, dict) self._weights = weights - def _get_weights(self, m_hat=None): + def _get_weights(self): # standard case for APO/ATE weights = self._weights['weights'] if 'weights_bar' not in self._weights.keys(): @@ -243,7 +243,7 @@ def _score_elements(self, y, treated, g_hat0, g_hat1, m_hat, smpls): m_hat_adj = m_hat u_hat = y - g_hat1 - weights, weights_bar = self._get_weights(m_hat=m_hat_adj) + weights, weights_bar = self._get_weights() psi_b = weights * g_hat1 + weights_bar * np.divide(np.multiply(treated, u_hat), m_hat_adj) psi_a = np.full_like(m_hat_adj, -1.0) @@ -258,7 +258,7 @@ def _sensitivity_element_est(self, preds): g_hat0 = preds['predictions']['ml_g0'] g_hat1 = preds['predictions']['ml_g1'] - weights, weights_bar = self._get_weights(m_hat=m_hat) + weights, weights_bar = self._get_weights() sigma2_score_element = np.square(y - np.multiply(treated, g_hat1) - np.multiply(1.0-treated, g_hat0)) sigma2 = np.mean(sigma2_score_element) diff --git a/doubleml/irm/tests/_utils_apo_manual.py b/doubleml/irm/tests/_utils_apo_manual.py index c7a9b4321..38bb0870c 100644 --- a/doubleml/irm/tests/_utils_apo_manual.py +++ b/doubleml/irm/tests/_utils_apo_manual.py @@ -121,5 +121,5 @@ def apo_orth(g_hat0, g_hat1, m_hat, u_hat0, u_hat1, treated, score): def var_apo(theta, g_hat0, g_hat1, m_hat, u_hat0, u_hat1, treated, score, n_obs): - var = 1/n_obs * np.mean(np.power(g_hat1 + np.divide(np.multiply(treated, u_hat1), m_hat), 2)) + var = 1/n_obs * np.mean(np.power(g_hat1 + np.divide(np.multiply(treated, u_hat1), m_hat) - theta, 2)) return var diff --git a/doubleml/irm/tests/test_apo.py b/doubleml/irm/tests/test_apo.py index 939310d99..1f96a5d51 100644 --- a/doubleml/irm/tests/test_apo.py +++ b/doubleml/irm/tests/test_apo.py @@ -18,7 +18,7 @@ @pytest.fixture(scope='module', params=[[LinearRegression(), - LogisticRegression(solver='lbfgs', max_iter=250)], + LogisticRegression(solver='lbfgs', max_iter=250, random_state=42)], [RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42), RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42)]]) def learner(request): @@ -67,6 +67,7 @@ def dml_apo_fixture(generate_data_irm, learner, normalize_ipw, trimming_threshol ml_g, ml_m, treatment_level=0, n_folds=n_folds, + score='APO', normalize_ipw=normalize_ipw, draw_sample_splitting=False, trimming_threshold=trimming_threshold) @@ -84,11 +85,32 @@ def dml_apo_fixture(generate_data_irm, learner, normalize_ipw, trimming_threshol normalize_ipw=normalize_ipw, trimming_threshold=trimming_threshold) + np.random.seed(3141) + # test with external nuisance predictions + dml_obj_ext = dml.DoubleMLAPO(dml_data, + ml_g, ml_m, + treatment_level=0, + n_folds=n_folds, + score='APO', + normalize_ipw=normalize_ipw, + draw_sample_splitting=False, + trimming_threshold=trimming_threshold) + + # synchronize the sample splitting + dml_obj_ext.set_sample_splitting(all_smpls=all_smpls) + + prediction_dict = {'d': {'ml_g0': dml_obj.predictions['ml_g0'].reshape(-1, 1), + 'ml_g1': dml_obj.predictions['ml_g1'].reshape(-1, 1), + 'ml_m': dml_obj.predictions['ml_m'].reshape(-1, 1)}} + dml_obj_ext.fit(external_predictions=prediction_dict) + + res_dict = {'coef': dml_obj.coef, 'coef_manual': res_manual['theta'], - 'coef_ext': dml_obj.coef_extern, + 'coef_ext': dml_obj_ext.coef, 'se': dml_obj.se, - 'se_manual': res_manual['se']} + 'se_manual': res_manual['se'], + 'se_ext': dml_obj_ext.se} return res_dict @@ -100,4 +122,14 @@ def test_dml_apo_coef(dml_apo_fixture): rel_tol=1e-9, abs_tol=1e-4) assert math.isclose(dml_apo_fixture['coef'][0], dml_apo_fixture['coef_ext'][0], - rel_tol=1e-9, abs_tol=1e-4) \ No newline at end of file + rel_tol=1e-9, abs_tol=1e-4) + + +@pytest.mark.ci +def test_dml_apo_se(dml_apo_fixture): + assert math.isclose(dml_apo_fixture['se'][0], + dml_apo_fixture['se_manual'], + rel_tol=1e-9, abs_tol=1e-4) + assert math.isclose(dml_apo_fixture['se'][0], + dml_apo_fixture['se_ext'][0], + rel_tol=1e-9, abs_tol=1e-4) From b814fce6f4135c9fa747384174489112efd69bea Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 20 Jun 2024 13:57:26 +0200 Subject: [PATCH 12/98] add sensitivity and bootstrap test for apo --- doubleml/irm/tests/_utils_apo_manual.py | 80 ++++++++++++++++++++++ doubleml/irm/tests/test_apo.py | 89 +++++++++++++++++++++---- 2 files changed, 155 insertions(+), 14 deletions(-) diff --git a/doubleml/irm/tests/_utils_apo_manual.py b/doubleml/irm/tests/_utils_apo_manual.py index 38bb0870c..24506cf0e 100644 --- a/doubleml/irm/tests/_utils_apo_manual.py +++ b/doubleml/irm/tests/_utils_apo_manual.py @@ -123,3 +123,83 @@ def apo_orth(g_hat0, g_hat1, m_hat, u_hat0, u_hat1, treated, score): def var_apo(theta, g_hat0, g_hat1, m_hat, u_hat0, u_hat1, treated, score, n_obs): var = 1/n_obs * np.mean(np.power(g_hat1 + np.divide(np.multiply(treated, u_hat1), m_hat) - theta, 2)) return var + + +def boot_apo(y, d, treatment_level, thetas, ses, all_g_hat0, all_g_hat1, all_m_hat, + all_smpls, score, bootstrap, n_rep_boot, + n_rep=1, apply_cross_fitting=True, normalize_ipw=True): + treated = (d == treatment_level) + all_boot_t_stat = list() + for i_rep in range(n_rep): + smpls = all_smpls[i_rep] + if apply_cross_fitting: + n_obs = len(y) + else: + test_index = smpls[0][1] + n_obs = len(test_index) + weights = draw_weights(bootstrap, n_rep_boot, n_obs) + boot_t_stat = boot_apo_single_split( + thetas[i_rep], y, d, treated, + all_g_hat0[i_rep], all_g_hat1[i_rep], all_m_hat[i_rep], smpls, + score, ses[i_rep], weights, n_rep_boot, apply_cross_fitting, normalize_ipw) + all_boot_t_stat.append(boot_t_stat) + + boot_t_stat = np.hstack(all_boot_t_stat) + + return boot_t_stat + + +def boot_apo_single_split(theta, y, d, treated, g_hat0_list, g_hat1_list, m_hat_list, + smpls, score, se, weights, n_rep_boot, apply_cross_fitting, normalize_ipw): + _, u_hat1, _, g_hat1, m_hat = compute_residuals( + y, g_hat0_list, g_hat1_list, m_hat_list, smpls) + + m_hat_adj = np.full_like(m_hat, np.nan, dtype='float64') + if normalize_ipw: + m_hat_adj = _normalize_ipw(m_hat, treated) + else: + m_hat_adj = m_hat + + J = -1.0 + psi = g_hat1 + np.divide(np.multiply(treated, u_hat1), m_hat_adj) - theta + boot_t_stat = boot_manual(psi, J, smpls, se, weights, n_rep_boot, apply_cross_fitting) + + return boot_t_stat + + +def fit_sensitivity_elements_apo(y, d, treatment_level, all_coef, predictions, score, n_rep): + n_treat = 1 + n_obs = len(y) + treated = (d == treatment_level) + + sigma2 = np.full(shape=(1, n_rep, n_treat), fill_value=np.nan) + nu2 = np.full(shape=(1, n_rep, n_treat), fill_value=np.nan) + psi_sigma2 = np.full(shape=(n_obs, n_rep, n_treat), fill_value=np.nan) + psi_nu2 = np.full(shape=(n_obs, n_rep, n_treat), fill_value=np.nan) + + for i_rep in range(n_rep): + + m_hat = predictions['ml_m'][:, i_rep, 0] + g_hat0 = predictions['ml_g0'][:, i_rep, 0] + g_hat1 = predictions['ml_g1'][:, i_rep, 0] + + weights = np.ones_like(d) + weights_bar = np.ones_like(d) + + sigma2_score_element = np.square(y - np.multiply(treated, g_hat1) - np.multiply(1.0-treated, g_hat0)) + sigma2[0, i_rep, 0] = np.mean(sigma2_score_element) + psi_sigma2[:, i_rep, 0] = sigma2_score_element - sigma2[0, i_rep, 0] + + # calc m(W,alpha) and Riesz representer + m_alpha = np.multiply(weights, np.multiply(weights_bar, np.divide(1.0, m_hat))) + rr = np.multiply(weights_bar, np.divide(treated, m_hat)) + + nu2_score_element = np.multiply(2.0, m_alpha) - np.square(rr) + nu2[0, i_rep, 0] = np.mean(nu2_score_element) + psi_nu2[:, i_rep, 0] = nu2_score_element - nu2[0, i_rep, 0] + + element_dict = {'sigma2': sigma2, + 'nu2': nu2, + 'psi_sigma2': psi_sigma2, + 'psi_nu2': psi_nu2} + return element_dict diff --git a/doubleml/irm/tests/test_apo.py b/doubleml/irm/tests/test_apo.py index 1f96a5d51..535f2397d 100644 --- a/doubleml/irm/tests/test_apo.py +++ b/doubleml/irm/tests/test_apo.py @@ -13,7 +13,7 @@ from doubleml.utils.resampling import DoubleMLResampling from ...tests._utils import draw_smpls -from ._utils_apo_manual import fit_apo +from ._utils_apo_manual import fit_apo, boot_apo, fit_sensitivity_elements_apo @pytest.fixture(scope='module', @@ -42,7 +42,7 @@ def dml_apo_fixture(generate_data_irm, learner, normalize_ipw, trimming_threshol boot_methods = ['normal'] n_folds = 2 n_rep_boot = 499 - + treatment_level = 0 # Set machine learning methods for m & g ml_g = clone(learner[0]) @@ -64,13 +64,13 @@ def dml_apo_fixture(generate_data_irm, learner, normalize_ipw, trimming_threshol np.random.seed(3141) dml_obj = dml.DoubleMLAPO(dml_data, - ml_g, ml_m, - treatment_level=0, - n_folds=n_folds, - score='APO', - normalize_ipw=normalize_ipw, - draw_sample_splitting=False, - trimming_threshold=trimming_threshold) + ml_g, ml_m, + treatment_level=treatment_level, + n_folds=n_folds, + score='APO', + normalize_ipw=normalize_ipw, + draw_sample_splitting=False, + trimming_threshold=trimming_threshold) # synchronize the sample splitting dml_obj.set_sample_splitting(all_smpls=all_smpls) @@ -79,7 +79,7 @@ def dml_apo_fixture(generate_data_irm, learner, normalize_ipw, trimming_threshol np.random.seed(3141) res_manual = fit_apo(y, x, d, clone(learner[0]), clone(learner[1]), - treatment_level=0, + treatment_level=treatment_level, all_smpls=all_smpls, score='APO', normalize_ipw=normalize_ipw, @@ -89,7 +89,7 @@ def dml_apo_fixture(generate_data_irm, learner, normalize_ipw, trimming_threshol # test with external nuisance predictions dml_obj_ext = dml.DoubleMLAPO(dml_data, ml_g, ml_m, - treatment_level=0, + treatment_level=treatment_level, n_folds=n_folds, score='APO', normalize_ipw=normalize_ipw, @@ -104,14 +104,45 @@ def dml_apo_fixture(generate_data_irm, learner, normalize_ipw, trimming_threshol 'ml_m': dml_obj.predictions['ml_m'].reshape(-1, 1)}} dml_obj_ext.fit(external_predictions=prediction_dict) - res_dict = {'coef': dml_obj.coef, 'coef_manual': res_manual['theta'], 'coef_ext': dml_obj_ext.coef, 'se': dml_obj.se, 'se_manual': res_manual['se'], - 'se_ext': dml_obj_ext.se} - + 'se_ext': dml_obj_ext.se, + 'boot_methods': boot_methods} + + for bootstrap in boot_methods: + np.random.seed(3141) + boot_t_stat = boot_apo(y, d, treatment_level, res_manual['thetas'], res_manual['ses'], + res_manual['all_g_hat0'], res_manual['all_g_hat1'], + res_manual['all_m_hat'], + all_smpls, + score='APO', + bootstrap=bootstrap, + n_rep_boot=n_rep_boot, + normalize_ipw=normalize_ipw) + + np.random.seed(3141) + dml_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) + np.random.seed(3141) + dml_obj_ext.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) + res_dict['boot_t_stat' + bootstrap] = dml_obj.boot_t_stat + res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1) + res_dict['boot_t_stat' + bootstrap + '_ext'] = dml_obj_ext.boot_t_stat + + # check if sensitivity score with rho=0 gives equal asymptotic standard deviation + dml_obj.sensitivity_analysis(rho=0.0) + res_dict['sensitivity_ses'] = dml_obj.sensitivity_params['se'] + + # sensitivity tests + res_dict['sensitivity_elements'] = dml_obj.sensitivity_elements + res_dict['sensitivity_elements_manual'] = fit_sensitivity_elements_apo(y, d, + treatment_level, + all_coef=dml_obj.all_coef, + predictions=dml_obj.predictions, + score='APO', + n_rep=1) return res_dict @@ -133,3 +164,33 @@ def test_dml_apo_se(dml_apo_fixture): assert math.isclose(dml_apo_fixture['se'][0], dml_apo_fixture['se_ext'][0], rel_tol=1e-9, abs_tol=1e-4) + + +@pytest.mark.ci +def test_dml_apo_boot(dml_apo_fixture): + for bootstrap in dml_apo_fixture['boot_methods']: + assert np.allclose(dml_apo_fixture['boot_t_stat' + bootstrap], + dml_apo_fixture['boot_t_stat' + bootstrap + '_manual'], + rtol=1e-9, atol=1e-4) + assert np.allclose(dml_apo_fixture['boot_t_stat' + bootstrap], + dml_apo_fixture['boot_t_stat' + bootstrap + '_ext'], + rtol=1e-9, atol=1e-4) + + +@pytest.mark.ci +def test_dml_apo_sensitivity_rho0(dml_apo_fixture): + assert np.allclose(dml_apo_fixture['se'], + dml_apo_fixture['sensitivity_ses']['lower'], + rtol=1e-9, atol=1e-4) + assert np.allclose(dml_apo_fixture['se'], + dml_apo_fixture['sensitivity_ses']['upper'], + rtol=1e-9, atol=1e-4) + + +@pytest.mark.ci +def test_dml_apo_sensitivity(dml_apo_fixture): + sensitivity_element_names = ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2'] + for sensitivity_element in sensitivity_element_names: + assert np.allclose(dml_apo_fixture['sensitivity_elements'][sensitivity_element], + dml_apo_fixture['sensitivity_elements_manual'][sensitivity_element], + rtol=1e-9, atol=1e-4) From 118f42ad4c4cedfefcebcfd0b5d985f1691e5c32 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 20 Jun 2024 14:21:55 +0200 Subject: [PATCH 13/98] add test for external predictions in apo --- doubleml/irm/tests/_utils_apo_manual.py | 2 +- doubleml/irm/tests/test_apo.py | 1 - .../tests/test_apo_external_predictions.py | 96 +++++++++++++++++++ 3 files changed, 97 insertions(+), 2 deletions(-) create mode 100644 doubleml/irm/tests/test_apo_external_predictions.py diff --git a/doubleml/irm/tests/_utils_apo_manual.py b/doubleml/irm/tests/_utils_apo_manual.py index 24506cf0e..7b07caafe 100644 --- a/doubleml/irm/tests/_utils_apo_manual.py +++ b/doubleml/irm/tests/_utils_apo_manual.py @@ -2,7 +2,7 @@ from sklearn.base import clone, is_classifier from ...tests._utils_boot import boot_manual, draw_weights -from ...tests._utils import fit_predict, fit_predict_proba, tune_grid_search +from ...tests._utils import fit_predict, fit_predict_proba from ...utils._estimation import _normalize_ipw from ...utils._checks import _check_is_propensity diff --git a/doubleml/irm/tests/test_apo.py b/doubleml/irm/tests/test_apo.py index 535f2397d..002c96cd4 100644 --- a/doubleml/irm/tests/test_apo.py +++ b/doubleml/irm/tests/test_apo.py @@ -10,7 +10,6 @@ import doubleml as dml from doubleml.datasets import make_irm_data_discrete_treatements -from doubleml.utils.resampling import DoubleMLResampling from ...tests._utils import draw_smpls from ._utils_apo_manual import fit_apo, boot_apo, fit_sensitivity_elements_apo diff --git a/doubleml/irm/tests/test_apo_external_predictions.py b/doubleml/irm/tests/test_apo_external_predictions.py new file mode 100644 index 000000000..533d3bffd --- /dev/null +++ b/doubleml/irm/tests/test_apo_external_predictions.py @@ -0,0 +1,96 @@ +import pytest +import numpy as np +import pandas as pd +import math + +from sklearn.linear_model import LinearRegression, LogisticRegression +from doubleml import DoubleMLAPO, DoubleMLData +from doubleml.datasets import make_irm_data_discrete_treatements +from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier + +from ...tests._utils import draw_smpls + + +@pytest.fixture(scope="module", params=[1, 3]) +def n_rep(request): + return request.param + + +@pytest.fixture(scope="module", params=[True, False]) +def set_ml_m_ext(request): + return request.param + + +@pytest.fixture(scope="module", params=[True, False]) +def set_ml_g_ext(request): + return request.param + + +@pytest.fixture(scope="module") +def doubleml_apo_fixture(n_rep, set_ml_m_ext, set_ml_g_ext): + + score = "APO" + treatment_level = 0 + ext_predictions = {"d": {}} + + np.random.seed(3141) + n_obs = 500 + data_apo = make_irm_data_discrete_treatements(n_obs=n_obs) + df_apo = pd.DataFrame( + np.column_stack((data_apo['y'], data_apo['d'], data_apo['x'])), + columns=['y', 'd'] + ['x' + str(i) for i in range(data_apo['x'].shape[1])] + ) + + dml_data = DoubleMLData(df_apo, 'y', 'd') + d = data_apo['d'] + all_smpls = draw_smpls(n_obs, n_folds=5, n_rep=n_rep, groups=d) + + kwargs = { + "obj_dml_data": dml_data, + "score": score, + "treatment_level": treatment_level, + "n_rep": n_rep, + "draw_sample_splitting": False + } + + dml_obj = DoubleMLAPO(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs) + dml_obj.set_sample_splitting(all_smpls=all_smpls) + + np.random.seed(3141) + dml_obj.fit(store_predictions=True) + + if set_ml_m_ext: + ext_predictions["d"]["ml_m"] = dml_obj.predictions["ml_m"][:, :, 0] + ml_m = DMLDummyClassifier() + else: + ml_m = LogisticRegression(random_state=42) + + if set_ml_g_ext: + ext_predictions["d"]["ml_g0"] = dml_obj.predictions["ml_g0"][:, :, 0] + ext_predictions["d"]["ml_g1"] = dml_obj.predictions["ml_g1"][:, :, 0] + ml_g = DMLDummyRegressor() + else: + ml_g = LinearRegression() + + dml_obj_ext = DoubleMLAPO(ml_g=ml_g, ml_m=ml_m, **kwargs) + dml_obj_ext.set_sample_splitting(all_smpls=all_smpls) + + np.random.seed(3141) + dml_obj_ext.fit(external_predictions=ext_predictions) + + res_dict = { + "coef_normal": dml_obj.coef[0], + "coef_ext": dml_obj_ext.coef[0] + } + + return res_dict + + +@pytest.mark.ci +def test_doubleml_apo_coef(doubleml_apo_fixture): + assert math.isclose( + doubleml_apo_fixture["coef_normal"], + doubleml_apo_fixture["coef_ext"], + rel_tol=1e-9, + abs_tol=1e-4 + ) From f1845602b068e818080d256637b2a470446d6044 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 20 Jun 2024 14:26:37 +0200 Subject: [PATCH 14/98] Update test_apo_external_predictions.py --- doubleml/irm/tests/test_apo_external_predictions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doubleml/irm/tests/test_apo_external_predictions.py b/doubleml/irm/tests/test_apo_external_predictions.py index 533d3bffd..c60ee516a 100644 --- a/doubleml/irm/tests/test_apo_external_predictions.py +++ b/doubleml/irm/tests/test_apo_external_predictions.py @@ -27,7 +27,7 @@ def set_ml_g_ext(request): @pytest.fixture(scope="module") -def doubleml_apo_fixture(n_rep, set_ml_m_ext, set_ml_g_ext): +def doubleml_apo_ext_fixture(n_rep, set_ml_m_ext, set_ml_g_ext): score = "APO" treatment_level = 0 @@ -87,10 +87,10 @@ def doubleml_apo_fixture(n_rep, set_ml_m_ext, set_ml_g_ext): @pytest.mark.ci -def test_doubleml_apo_coef(doubleml_apo_fixture): +def test_doubleml_apo_ext_coef(doubleml_apo_ext_fixture): assert math.isclose( - doubleml_apo_fixture["coef_normal"], - doubleml_apo_fixture["coef_ext"], + doubleml_apo_ext_fixture["coef_normal"], + doubleml_apo_ext_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4 ) From 827345b1634fb2a2dbc0a8bb02340ae0aa7eb8b1 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 20 Jun 2024 14:31:47 +0200 Subject: [PATCH 15/98] add classifier unit test for apo --- doubleml/irm/tests/test_apo_classifier.py | 117 ++++++++++++++++++++++ doubleml/irm/tests/test_irm_classifier.py | 1 - 2 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 doubleml/irm/tests/test_apo_classifier.py diff --git a/doubleml/irm/tests/test_apo_classifier.py b/doubleml/irm/tests/test_apo_classifier.py new file mode 100644 index 000000000..860a61ef3 --- /dev/null +++ b/doubleml/irm/tests/test_apo_classifier.py @@ -0,0 +1,117 @@ +import numpy as np +import pytest +import math + +from sklearn.base import clone + +from sklearn.linear_model import LogisticRegression +from sklearn.ensemble import RandomForestClassifier + +import doubleml as dml + +from ...tests._utils import draw_smpls +from ._utils_apo_manual import fit_apo, boot_apo + + +@pytest.fixture(scope='module', + params=[[LogisticRegression(solver='lbfgs', max_iter=250), + LogisticRegression(solver='lbfgs', max_iter=250)], + [RandomForestClassifier(max_depth=2, n_estimators=10, random_state=42), + RandomForestClassifier(max_depth=2, n_estimators=10, random_state=42)]]) +def learner(request): + return request.param + + +@pytest.fixture(scope='module', + params=[True, False]) +def normalize_ipw(request): + return request.param + + +@pytest.fixture(scope='module', + params=[0.01, 0.05]) +def trimming_threshold(request): + return request.param + + +@pytest.fixture(scope='module') +def dml_apo_classifier_fixture(generate_data_irm_binary, learner, normalize_ipw, trimming_threshold): + boot_methods = ['normal'] + n_folds = 2 + n_rep_boot = 499 + + treatment_level = 0 + score = "APO" + + # collect data + (x, y, d) = generate_data_irm_binary + n_obs = len(y) + all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d) + + # Set machine learning methods for m & g + ml_g = clone(learner[0]) + ml_m = clone(learner[1]) + + np.random.seed(3141) + obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d) + dml_obj = dml.DoubleMLAPO(obj_dml_data, + ml_g, ml_m, + treatment_level=treatment_level, + n_folds=n_folds, + score=score, + normalize_ipw=normalize_ipw, + trimming_threshold=trimming_threshold, + draw_sample_splitting=False) + # synchronize the sample splitting + dml_obj.set_sample_splitting(all_smpls=all_smpls) + dml_obj.fit() + + np.random.seed(3141) + res_manual = fit_apo(y, x, d, + clone(learner[0]), clone(learner[1]), + treatment_level, + all_smpls, score, + normalize_ipw=normalize_ipw, trimming_threshold=trimming_threshold) + + res_dict = {'coef': dml_obj.coef, + 'coef_manual': res_manual['theta'], + 'se': dml_obj.se, + 'se_manual': res_manual['se'], + 'boot_methods': boot_methods} + + for bootstrap in boot_methods: + np.random.seed(3141) + boot_t_stat = boot_apo(y, d, treatment_level, res_manual['thetas'], res_manual['ses'], + res_manual['all_g_hat0'], res_manual['all_g_hat1'], + res_manual['all_m_hat'], + all_smpls, score, bootstrap, n_rep_boot, + normalize_ipw=normalize_ipw) + + np.random.seed(3141) + dml_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) + res_dict['boot_t_stat' + bootstrap] = dml_obj.boot_t_stat + res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1) + + return res_dict + + +@pytest.mark.ci +def test_dml_apo_coef(dml_apo_classifier_fixture): + assert math.isclose(dml_apo_classifier_fixture['coef'], + dml_apo_classifier_fixture['coef_manual'], + rel_tol=1e-9, abs_tol=1e-4) + + +@pytest.mark.ci +def test_dml_apo_se(dml_apo_classifier_fixture): + assert math.isclose(dml_apo_classifier_fixture['se'], + dml_apo_classifier_fixture['se_manual'], + rel_tol=1e-9, abs_tol=1e-4) + + +@pytest.mark.ci +def test_dml_apo_boot(dml_apo_classifier_fixture): + for bootstrap in dml_apo_classifier_fixture['boot_methods']: + assert np.allclose(dml_apo_classifier_fixture['boot_t_stat' + bootstrap], + dml_apo_classifier_fixture['boot_t_stat' + bootstrap + '_manual'], + rtol=1e-9, atol=1e-4) diff --git a/doubleml/irm/tests/test_irm_classifier.py b/doubleml/irm/tests/test_irm_classifier.py index 46cdfb779..cfea434d0 100644 --- a/doubleml/irm/tests/test_irm_classifier.py +++ b/doubleml/irm/tests/test_irm_classifier.py @@ -1,4 +1,3 @@ - import numpy as np import pytest import math From 66f7df01953dde7f5ed3e7d4916e5777ae69e33d Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 20 Jun 2024 15:22:16 +0200 Subject: [PATCH 16/98] add tune with unit test for apo --- doubleml/irm/apo.py | 49 +++++++- doubleml/irm/tests/_utils_apo_manual.py | 22 +++- doubleml/irm/tests/test_apo_tune.py | 159 ++++++++++++++++++++++++ 3 files changed, 224 insertions(+), 6 deletions(-) create mode 100644 doubleml/irm/tests/test_apo_tune.py diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py index d841475af..c37ced88f 100644 --- a/doubleml/irm/apo.py +++ b/doubleml/irm/apo.py @@ -8,7 +8,7 @@ from ..double_ml_score_mixins import LinearScoreMixin from ..double_ml_data import DoubleMLData -from ..utils._estimation import _dml_cv_predict, _get_cond_smpls, _cond_targets, _trimm, \ +from ..utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls, _cond_targets, _trimm, \ _normalize_ipw from ..utils._checks import _check_score, _check_trimming, _check_weights, _check_finite_predictions, \ _check_is_propensity @@ -280,10 +280,49 @@ def _sensitivity_element_est(self, preds): } return element_dict - def _nuisance_tuning(self): - # Tune nuisance parameters - # This is a placeholder for tuning logic - print("Tuning nuisance parameters...") + def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, + search_mode, n_iter_randomized_search): + x, y = check_X_y(self._dml_data.x, self._dml_data.y, + force_all_finite=False) + x, treated = check_X_y(x, self.treated, + force_all_finite=False) + # get train indices for d == 0 and d == 1 + smpls_d0, smpls_d1 = _get_cond_smpls(smpls, treated) + + if scoring_methods is None: + scoring_methods = {'ml_g': None, + 'ml_m': None} + + train_inds = [train_index for (train_index, _) in smpls] + train_inds_d0 = [train_index for (train_index, _) in smpls_d0] + train_inds_d1 = [train_index for (train_index, _) in smpls_d1] + g0_tune_res = _dml_tune(y, x, train_inds_d0, + self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'], + n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search) + g1_tune_res = list() + g1_tune_res = _dml_tune(y, x, train_inds_d1, + self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'], + n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search) + + m_tune_res = _dml_tune(treated, x, train_inds, + self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'], + n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search) + + g0_best_params = [xx.best_params_ for xx in g0_tune_res] + g1_best_params = [xx.best_params_ for xx in g1_tune_res] + m_best_params = [xx.best_params_ for xx in m_tune_res] + + params = {'ml_g0': g0_best_params, + 'ml_g1': g1_best_params, + 'ml_m': m_best_params} + tune_res = {'g0_tune': g0_tune_res, + 'g1_tune': g1_tune_res, + 'm_tune': m_tune_res} + + res = {'params': params, + 'tune_res': tune_res} + + return res def _check_data(self, obj_dml_data): if not isinstance(obj_dml_data, DoubleMLData): diff --git a/doubleml/irm/tests/_utils_apo_manual.py b/doubleml/irm/tests/_utils_apo_manual.py index 7b07caafe..bc952be49 100644 --- a/doubleml/irm/tests/_utils_apo_manual.py +++ b/doubleml/irm/tests/_utils_apo_manual.py @@ -2,7 +2,7 @@ from sklearn.base import clone, is_classifier from ...tests._utils_boot import boot_manual, draw_weights -from ...tests._utils import fit_predict, fit_predict_proba +from ...tests._utils import fit_predict, fit_predict_proba, tune_grid_search from ...utils._estimation import _normalize_ipw from ...utils._checks import _check_is_propensity @@ -203,3 +203,23 @@ def fit_sensitivity_elements_apo(y, d, treatment_level, all_coef, predictions, s 'psi_sigma2': psi_sigma2, 'psi_nu2': psi_nu2} return element_dict + + +def tune_nuisance_apo(y, x, d, treatment_level, ml_g, ml_m, smpls, score, n_folds_tune, + param_grid_g, param_grid_m): + train_cond0 = np.where(d != treatment_level)[0] + g0_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, + train_cond=train_cond0) + + train_cond1 = np.where(d == treatment_level)[0] + g1_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, + train_cond=train_cond1) + + treated = (d == treatment_level) + m_tune_res = tune_grid_search(treated, x, ml_m, smpls, param_grid_m, n_folds_tune) + + g0_best_params = [xx.best_params_ for xx in g0_tune_res] + g1_best_params = [xx.best_params_ for xx in g1_tune_res] + m_best_params = [xx.best_params_ for xx in m_tune_res] + + return g0_best_params, g1_best_params, m_best_params diff --git a/doubleml/irm/tests/test_apo_tune.py b/doubleml/irm/tests/test_apo_tune.py new file mode 100644 index 000000000..3a818fcae --- /dev/null +++ b/doubleml/irm/tests/test_apo_tune.py @@ -0,0 +1,159 @@ +import numpy as np +import pytest +import math + +from sklearn.base import clone + +from sklearn.linear_model import LogisticRegression +from sklearn.ensemble import RandomForestRegressor + +import doubleml as dml + +from ...tests._utils import draw_smpls +from ._utils_apo_manual import fit_apo, boot_apo, tune_nuisance_apo + + +@pytest.fixture(scope='module', + params=[RandomForestRegressor(random_state=42)]) +def learner_g(request): + return request.param + + +@pytest.fixture(scope='module', + params=[LogisticRegression(random_state=42)]) +def learner_m(request): + return request.param + + +@pytest.fixture(scope='module', + params=['APO']) +def score(request): + return request.param + + +@pytest.fixture(scope='module', + params=[True, False]) +def normalize_ipw(request): + return request.param + + +@pytest.fixture(scope='module', + params=[True, False]) +def tune_on_folds(request): + return request.param + + +def get_par_grid(learner): + if learner.__class__ in [RandomForestRegressor]: + par_grid = {'n_estimators': [5, 10, 20]} + else: + assert learner.__class__ in [LogisticRegression] + par_grid = {'C': np.logspace(-4, 2, 10)} + return par_grid + + +@pytest.fixture(scope='module') +def dml_apo_tune_fixture(generate_data_irm, learner_g, learner_m, score, normalize_ipw, tune_on_folds): + par_grid = {'ml_g': get_par_grid(learner_g), + 'ml_m': get_par_grid(learner_m)} + n_folds_tune = 4 + + boot_methods = ['normal'] + n_folds = 2 + n_rep_boot = 499 + treatment_level = 0 + + # collect data + (x, y, d) = generate_data_irm + n_obs = len(y) + all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d) + + # Set machine learning methods for m & g + ml_g = clone(learner_g) + ml_m = clone(learner_m) + + np.random.seed(3141) + obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d) + dml_obj = dml.DoubleMLAPO(obj_dml_data, + ml_g, ml_m, + treatment_level=treatment_level, + n_folds=n_folds, + score=score, + normalize_ipw=normalize_ipw, + draw_sample_splitting=False) + # synchronize the sample splitting + dml_obj.set_sample_splitting(all_smpls=all_smpls) + np.random.seed(3141) + # tune hyperparameters + tune_res = dml_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune, + return_tune_res=False) + assert isinstance(tune_res, dml.DoubleMLAPO) + + dml_obj.fit() + + np.random.seed(3141) + smpls = all_smpls[0] + + if tune_on_folds: + g0_params, g1_params, m_params = tune_nuisance_apo(y, x, d, treatment_level, + clone(learner_g), clone(learner_m), smpls, score, + n_folds_tune, + par_grid['ml_g'], par_grid['ml_m']) + else: + xx = [(np.arange(len(y)), np.array([]))] + g0_params, g1_params, m_params = tune_nuisance_apo(y, x, d, treatment_level, + clone(learner_g), clone(learner_m), xx, score, + n_folds_tune, + par_grid['ml_g'], par_grid['ml_m']) + g0_params = g0_params * n_folds + m_params = m_params * n_folds + g1_params = g1_params * n_folds + + res_manual = fit_apo(y, x, d, clone(learner_g), clone(learner_m), + treatment_level, + all_smpls, score, + normalize_ipw=normalize_ipw, + g0_params=g0_params, g1_params=g1_params, m_params=m_params) + + res_dict = {'coef': dml_obj.coef, + 'coef_manual': res_manual['theta'], + 'se': dml_obj.se, + 'se_manual': res_manual['se'], + 'boot_methods': boot_methods} + + for bootstrap in boot_methods: + np.random.seed(3141) + boot_t_stat = boot_apo(y, d, treatment_level, res_manual['thetas'], res_manual['ses'], + res_manual['all_g_hat0'], res_manual['all_g_hat1'], + res_manual['all_m_hat'], + all_smpls, score, bootstrap, n_rep_boot, + normalize_ipw=normalize_ipw) + + np.random.seed(3141) + dml_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) + res_dict['boot_t_stat' + bootstrap] = dml_obj.boot_t_stat + res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1) + + return res_dict + + +@pytest.mark.ci +def test_dml_apo_tune_coef(dml_apo_tune_fixture): + assert math.isclose(dml_apo_tune_fixture['coef'], + dml_apo_tune_fixture['coef_manual'], + rel_tol=1e-9, abs_tol=1e-4) + + +@pytest.mark.ci +def test_dml_apo_tune_se(dml_apo_tune_fixture): + assert math.isclose(dml_apo_tune_fixture['se'], + dml_apo_tune_fixture['se_manual'], + rel_tol=1e-9, abs_tol=1e-4) + + +@pytest.mark.ci +def test_dml_apo_tune_boot(dml_apo_tune_fixture): + for bootstrap in dml_apo_tune_fixture['boot_methods']: + assert np.allclose(dml_apo_tune_fixture['boot_t_stat' + bootstrap], + dml_apo_tune_fixture['boot_t_stat' + bootstrap + '_manual'], + rtol=1e-9, atol=1e-4) From a43ab19a9ad076d82227db7cbfdb0d1df83f858f Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 20 Jun 2024 15:55:36 +0200 Subject: [PATCH 17/98] Create test_apo_weighted_scores.py --- .../irm/tests/test_apo_weighted_scores.py | 94 +++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 doubleml/irm/tests/test_apo_weighted_scores.py diff --git a/doubleml/irm/tests/test_apo_weighted_scores.py b/doubleml/irm/tests/test_apo_weighted_scores.py new file mode 100644 index 000000000..36de26811 --- /dev/null +++ b/doubleml/irm/tests/test_apo_weighted_scores.py @@ -0,0 +1,94 @@ +import pytest +import numpy as np + +from sklearn.base import clone +from sklearn.linear_model import LogisticRegression, LinearRegression +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor + +from ...tests._utils import draw_smpls +import doubleml as dml + +@pytest.fixture(scope='module', + params=[[LinearRegression(), + LogisticRegression(solver='lbfgs', max_iter=250)], + [RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42), + RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42)]]) +def learner(request): + return request.param + + +@pytest.fixture(scope='module', + params=['APO']) +def score(request): + return request.param + + +@pytest.fixture(scope='module', + params=[False, True]) +def normalize_ipw(request): + return request.param + + +@pytest.fixture(scope='module', + params=[0.2, 0.15]) +def trimming_threshold(request): + return request.param + + +@pytest.fixture(scope='module', + params=[0, 1]) +def treatment_level(request): + return request.param + + +@pytest.fixture(scope='module') +def weighted_apo_score_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_threshold, + treatment_level): + n_folds = 2 + + # collect data + (x, y, d) = generate_data_irm + n_obs = len(y) + all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d) + obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d) + + # Set machine learning methods for m & g + ml_g = clone(learner[0]) + ml_m = clone(learner[1]) + + np.random.seed(3141) + dml_obj = dml.DoubleMLAPO(obj_dml_data, + ml_g, ml_m, + treatment_level, + n_folds, + score=score, + normalize_ipw=normalize_ipw, + trimming_threshold=trimming_threshold, + draw_sample_splitting=False) + dml_obj.set_sample_splitting(all_smpls=all_smpls) + dml_obj.fit() + + weights = 0.5 * np.ones_like(obj_dml_data.y) + dml_obj_weighted = dml.DoubleMLAPO(obj_dml_data, + ml_g, ml_m, + treatment_level, + n_folds, + score=score, + weights=weights, + normalize_ipw=normalize_ipw, + trimming_threshold=trimming_threshold, + draw_sample_splitting=False) + dml_obj_weighted.set_sample_splitting(all_smpls=all_smpls) + dml_obj_weighted.fit() + + result_dict = { + 'coef': dml_obj.coef, + 'weighted_coef': dml_obj_weighted.coef, + } + return result_dict + + +@pytest.mark.ci +def test_apo_weighted_coef(weighted_apo_score_fixture): + assert np.allclose(0.5 * weighted_apo_score_fixture['coef'], + weighted_apo_score_fixture['weighted_coef']) From aed2227f2c9a1d06f3f4ccc4a4ee24de5f0d29d8 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 20 Jun 2024 16:02:16 +0200 Subject: [PATCH 18/98] Update test_apo_weighted_scores.py --- doubleml/irm/tests/test_apo_weighted_scores.py | 1 + 1 file changed, 1 insertion(+) diff --git a/doubleml/irm/tests/test_apo_weighted_scores.py b/doubleml/irm/tests/test_apo_weighted_scores.py index 36de26811..94d81170c 100644 --- a/doubleml/irm/tests/test_apo_weighted_scores.py +++ b/doubleml/irm/tests/test_apo_weighted_scores.py @@ -8,6 +8,7 @@ from ...tests._utils import draw_smpls import doubleml as dml + @pytest.fixture(scope='module', params=[[LinearRegression(), LogisticRegression(solver='lbfgs', max_iter=250)], From 8f15923660f9277ac3487b8ccebcce61225812e9 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 20 Jun 2024 16:30:32 +0200 Subject: [PATCH 19/98] adding capo and gapo to apo class --- doubleml/irm/apo.py | 70 ++++++++++++++++++++++++++++++++++ doubleml/irm/tests/test_apo.py | 56 +++++++++++++++++++++++++-- 2 files changed, 123 insertions(+), 3 deletions(-) diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py index c37ced88f..e530625de 100644 --- a/doubleml/irm/apo.py +++ b/doubleml/irm/apo.py @@ -1,10 +1,13 @@ import numpy as np +import pandas as pd +import warnings from sklearn.utils import check_X_y from sklearn.utils.multiclass import type_of_target from ..double_ml import DoubleML +from ..utils.blp import DoubleMLBLP from ..double_ml_score_mixins import LinearScoreMixin from ..double_ml_data import DoubleMLData @@ -341,3 +344,70 @@ def _check_data(self, obj_dml_data): ) return + + def capo(self, basis, is_gate=False): + """ + Calculate conditional average potential outcomes (CAPO) for a given basis. + + Parameters + ---------- + basis : :class:`pandas.DataFrame` + The basis for estimating the best linear predictor. Has to have the shape ``(n_obs, d)``, + where ``n_obs`` is the number of observations and ``d`` is the number of predictors. + is_gate : bool + Indicates whether the basis is constructed for GATE/GAPOs (dummy-basis). + Default is ``False``. + + Returns + ------- + model : :class:`doubleML.DoubleMLBLP` + Best linear Predictor model. + """ + valid_score = ['APO'] + if self.score not in valid_score: + raise ValueError('Invalid score ' + self.score + '. ' + + 'Valid score ' + ' or '.join(valid_score) + '.') + + if self.n_rep != 1: + raise NotImplementedError('Only implemented for one repetition. ' + + f'Number of repetitions is {str(self.n_rep)}.') + + # define the orthogonal signal + orth_signal = self.psi_elements['psi_b'].reshape(-1) + # fit the best linear predictor + model = DoubleMLBLP(orth_signal, basis=basis, is_gate=is_gate) + model.fit() + return model + + def gapo(self, groups): + """ + Calculate group average potential outcomes (GAPO) for groups. + + Parameters + ---------- + groups : :class:`pandas.DataFrame` + The group indicator for estimating the best linear predictor. Groups should be mutually exclusive. + Has to be dummy coded with shape ``(n_obs, d)``, where ``n_obs`` is the number of observations + and ``d`` is the number of groups or ``(n_obs, 1)`` and contain the corresponding groups (as str). + + Returns + ------- + model : :class:`doubleML.DoubleMLBLP` + Best linear Predictor model for group average potential outcomes. + """ + if not isinstance(groups, pd.DataFrame): + raise TypeError('Groups must be of DataFrame type. ' + f'Groups of type {str(type(groups))} was passed.') + + if not all(groups.dtypes == bool) or all(groups.dtypes == int): + if groups.shape[1] == 1: + groups = pd.get_dummies(groups, prefix='Group', prefix_sep='_') + else: + raise TypeError('Columns of groups must be of bool type or int type (dummy coded). ' + 'Alternatively, groups should only contain one column.') + + if any(groups.sum(0) <= 5): + warnings.warn('At least one group effect is estimated with less than 6 observations.') + + model = self.capo(groups, is_gate=True) + return model diff --git a/doubleml/irm/tests/test_apo.py b/doubleml/irm/tests/test_apo.py index 002c96cd4..20effa0cb 100644 --- a/doubleml/irm/tests/test_apo.py +++ b/doubleml/irm/tests/test_apo.py @@ -9,7 +9,7 @@ from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor import doubleml as dml -from doubleml.datasets import make_irm_data_discrete_treatements +from doubleml.datasets import make_irm_data_discrete_treatements, make_irm_data from ...tests._utils import draw_smpls from ._utils_apo_manual import fit_apo, boot_apo, fit_sensitivity_elements_apo @@ -36,12 +36,17 @@ def trimming_threshold(request): return request.param +@pytest.fixture(scope='module', + params=[0, 1]) +def treatment_level(request): + return request.param + + @pytest.fixture(scope='module') -def dml_apo_fixture(generate_data_irm, learner, normalize_ipw, trimming_threshold): +def dml_apo_fixture(generate_data_irm, learner, normalize_ipw, trimming_threshold, treatment_level): boot_methods = ['normal'] n_folds = 2 n_rep_boot = 499 - treatment_level = 0 # Set machine learning methods for m & g ml_g = clone(learner[0]) @@ -193,3 +198,48 @@ def test_dml_apo_sensitivity(dml_apo_fixture): assert np.allclose(dml_apo_fixture['sensitivity_elements'][sensitivity_element], dml_apo_fixture['sensitivity_elements_manual'][sensitivity_element], rtol=1e-9, atol=1e-4) + + +@pytest.mark.ci +def test_dml_apo_capo_gapo(treatment_level): + n = 20 + # collect data + np.random.seed(42) + obj_dml_data = make_irm_data(n_obs=n, dim_x=2) + + # First stage estimation + ml_g = RandomForestRegressor(n_estimators=10) + ml_m = RandomForestClassifier(n_estimators=10) + + dml_obj = dml.DoubleMLAPO(obj_dml_data, + ml_m=ml_m, + ml_g=ml_g, + treatment_level=treatment_level, + trimming_threshold=0.05, + n_folds=5) + + dml_obj.fit() + # create a random basis + random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 5))) + capo = dml_obj.capo(random_basis) + assert isinstance(capo, dml.utils.blp.DoubleMLBLP) + assert isinstance(capo.confint(), pd.DataFrame) + + groups_1 = pd.DataFrame(np.column_stack([obj_dml_data.data['X1'] <= -1.0, + obj_dml_data.data['X1'] > 0.2]), + columns=['Group 1', 'Group 2']) + msg = ('At least one group effect is estimated with less than 6 observations.') + with pytest.warns(UserWarning, match=msg): + gapo_1 = dml_obj.gapo(groups_1) + assert isinstance(gapo_1, dml.utils.blp.DoubleMLBLP) + assert isinstance(gapo_1.confint(), pd.DataFrame) + assert all(gapo_1.confint().index == groups_1.columns.to_list()) + + np.random.seed(42) + groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n, p=[0.1, 0.9])) + msg = ('At least one group effect is estimated with less than 6 observations.') + with pytest.warns(UserWarning, match=msg): + gapo_2 = dml_obj.gapo(groups_2) + assert isinstance(gapo_2, dml.utils.blp.DoubleMLBLP) + assert isinstance(gapo_2.confint(), pd.DataFrame) + assert all(gapo_2.confint().index == ["Group_1", "Group_2"]) From 134fc4ced66d7ada89ada97da116de066bcd7c15 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 20 Jun 2024 16:43:18 +0200 Subject: [PATCH 20/98] small fixes to remove unnecessary lines --- doubleml/irm/apo.py | 2 -- doubleml/irm/irm.py | 2 -- doubleml/irm/tests/_utils_apo_manual.py | 1 - 3 files changed, 5 deletions(-) diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py index e530625de..76a0372f1 100644 --- a/doubleml/irm/apo.py +++ b/doubleml/irm/apo.py @@ -239,7 +239,6 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa return psi_elements, preds def _score_elements(self, y, treated, g_hat0, g_hat1, m_hat, smpls): - m_hat_adj = np.full_like(m_hat, np.nan, dtype='float64') if self.normalize_ipw: m_hat_adj = _normalize_ipw(m_hat, treated) else: @@ -302,7 +301,6 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_ g0_tune_res = _dml_tune(y, x, train_inds_d0, self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'], n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search) - g1_tune_res = list() g1_tune_res = _dml_tune(y, x, train_inds_d1, self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'], n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search) diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py index 0fa6ec749..3cf98ec36 100644 --- a/doubleml/irm/irm.py +++ b/doubleml/irm/irm.py @@ -340,7 +340,6 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls): - m_hat_adj = np.full_like(m_hat, np.nan, dtype='float64') if self.normalize_ipw: m_hat_adj = _normalize_ipw(m_hat, d) else: @@ -420,7 +419,6 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_ g0_tune_res = _dml_tune(y, x, train_inds_d0, self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'], n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search) - g1_tune_res = list() g1_tune_res = _dml_tune(y, x, train_inds_d1, self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'], n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search) diff --git a/doubleml/irm/tests/_utils_apo_manual.py b/doubleml/irm/tests/_utils_apo_manual.py index bc952be49..862a2793d 100644 --- a/doubleml/irm/tests/_utils_apo_manual.py +++ b/doubleml/irm/tests/_utils_apo_manual.py @@ -154,7 +154,6 @@ def boot_apo_single_split(theta, y, d, treated, g_hat0_list, g_hat1_list, m_hat_ _, u_hat1, _, g_hat1, m_hat = compute_residuals( y, g_hat0_list, g_hat1_list, m_hat_list, smpls) - m_hat_adj = np.full_like(m_hat, np.nan, dtype='float64') if normalize_ipw: m_hat_adj = _normalize_ipw(m_hat, treated) else: From 0d5ecd5e1b40bbb3016503b983e9d21c4496ee11 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Fri, 21 Jun 2024 11:25:33 +0200 Subject: [PATCH 21/98] update dgp --- doubleml/datasets.py | 149 +++++++++++++++++++++++++-------- doubleml/irm/tests/test_apo.py | 2 +- 2 files changed, 117 insertions(+), 34 deletions(-) diff --git a/doubleml/datasets.py b/doubleml/datasets.py index cfbebdd99..346d79376 100644 --- a/doubleml/datasets.py +++ b/doubleml/datasets.py @@ -1435,60 +1435,143 @@ def make_ssm_data(n_obs=8000, dim_x=100, theta=1, mar=True, return_type='DoubleM raise ValueError('Invalid return_type.') -def make_irm_data_discrete_treatements(n_obs=200, p=10, support_size=5, n_levels=3, random_state=42): +def make_irm_data_discrete_treatements(n_obs=200, n_levels=3, random_state=42, **kwargs): """ - Generates data from a interactive regression (IRM) model with multiple treatment levels. + Generates data from a interactive regression (IRM) model with multiple treatment levels (based on an + underlying continous treatment). + + The data generating process is defined as follows (similar to the Monte Carlo simulation used + in Sant'Anna and Zhao (2020)). + + Let :math:`X= (X_1, X_2, X_3, X_4, X_5)^T \\sim \\mathcal{N}(0, \\Sigma)`, where :math:`\\Sigma` corresponds + to the identity matrix. + Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`, + where + + .. math:: + + \\tilde{Z}_1 &= \\exp(0.5 \\cdot X_1) + + \\tilde{Z}_2 &= 10 + X_2/(1 + \\exp(X_1)) + + \\tilde{Z}_3 &= (0.6 + X_1 \\cdot X_3 / 25)^3 + + \\tilde{Z}_4 &= (20 + X_2 + X_4)^2 + + \\tilde{Z}_5 &= X_5. + + A continuous treatment :math:`D_{\\text{cont}}` is generated as + + .. math:: + + D_{\\text{cont}} = \\xi (-Z_1 + 0.5 Z_2 - 0.25 Z_3 - 0.1 Z_4) + \\varepsilon_D, + + where :math:`\\varepsilon_D \\sim \\mathcal{N}(0,1)` and :math:`\\xi=0.3`. The corresponding treatment + effect is defined as + + .. math:: + + \\text{\\theta}(d) = 0.1 \\exp(d) + 10 \\sin(0.7 d) + 2 d - 0.2 d^2. + + Based on the continous treatment, a discrete treatment :math:`D` is generated as with a baseline level of + :math:`D=0` and additional levels based on the quantiles of :math:`D_{\\text{cont}}`. The number of levels + is defined by :math:`n_{\\text{levels}}`. Each level is chosen to have the same probability of being selected. + + The potential outcomes are defined as + + .. math:: + + Y(0) &= 210 + 27.4 Z_1 + 13.7 (Z_2 + Z_3 + Z_4) + \\varepsilon_Y + + Y(1) &= \\text{\\theta}(D_{\\text{cont}}) 1\\{D_{\\text{cont}} > 0\\} + Y(0), + + where :math:`\\varepsilon_Y \\sim \\mathcal{N}(0,5)`. Further, the observed outcome is defined as + + .. math:: + + Y = Y(1) 1\\{D > 0\\} + Y(0) 1\\{D = 0\\}. + + The data is returned as a dictionary with the entries ``x``, ``y``, ``d`` and ``oracle_values``. + + Parameters + ---------- + n_obs : int + The number of observations to simulate. + Default is ``200``. + + n_levels : int + The number of treatment levels. + Default is ``3``. + + random_state : int + Random seed for reproducibility. + Default is ``42``. + + Returns + ------- + res_dict : dictionary + Dictionary with entries ``x``, ``y``, ``d`` and ``oracle_values``. + """ np.random.seed(random_state) + xi = kwargs.get('xi', 0.3) + c = kwargs.get('c', 0.0) + dim_x = kwargs.get('dim_x', 5) - # define continous treatment effect - def treatment_effect(x): - return np.exp(2 * x[:, 0]) + 3 * np.sin(4 * x[:, 0]) + # observed covariates + cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)]) + x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ]) - # Outcome support and coefficients - support_y = np.random.choice(np.arange(p), size=support_size, replace=False) - coefs_y = np.random.uniform(0, 1, size=support_size) - # treatment support and coefficients - support_d = support_y - range_coefs_d = [0.2, 0.3] - coefs_d = np.random.uniform(range_coefs_d[0], range_coefs_d[1], size=support_size) + def f_reg(w): + res = 210 + 27.4*w[:, 0] + 13.7*(w[:, 1] + w[:, 2] + w[:, 3]) + return res - # noise - epsilon = np.random.uniform(-1, 1, size=n_obs) + def f_treatment(w, xi): + res = xi * (-w[:, 0] + 0.5*w[:, 1] - 0.25*w[:, 2] - 0.1*w[:, 3]) + return res - # Generate controls, covariates, treatments and outcomes - x = np.random.uniform(0, 1, size=(n_obs, p)) - # Heterogeneous treatment effects - te = treatment_effect(x) + def treatment_effect(d): + return 0.1 * np.exp(d) + 10 * np.sin(0.7 * d) + 2 * d - 0.2 * np.square(d) + + z_tilde_1 = np.exp(0.5*x[:, 0]) + z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0])) + z_tilde_3 = (0.6 + x[:, 0] * x[:, 2]/25)**3 + z_tilde_4 = (20 + x[:, 1] + x[:, 3])**2 + + z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, x[:, 4:])) + z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0) - # set d to be a discrete number of levels - range_cont_d = support_size * range_coefs_d - # devide the range into n_levels - levels = np.linspace(range_cont_d[0], range_cont_d[1], n_levels - 1) + # error terms + var_eps_y = 5 + eps_y = np.random.normal(loc=0, scale=np.sqrt(var_eps_y), size=n_obs) + var_eps_d = 1 + eps_d = np.random.normal(loc=0, scale=np.sqrt(var_eps_d), size=n_obs) - # define a discrete treatment version (with a baseline probability) + cont_d = f_treatment(z, xi) + eps_d + level_bounds = np.quantile(cont_d, q=np.linspace(0, 1, n_levels + 1)) + potential_level = sum([1.0 * (cont_d >= bound) for bound in level_bounds[1:-1]]) + 1 eta = np.random.uniform(0, 1, size=n_obs) - potential_level = sum([1.0 * (np.dot(x[:, support_d], coefs_d) >= level) for level in levels]) + 1 - d = 1.0 * (eta >= 1/n_levels) * potential_level + observed_d = 1.0 * (eta >= 1/n_levels) * potential_level + ite = treatment_effect(cont_d) + y0 = f_reg(z) + eps_y # only treated for d > 0 compared to the baseline - y = te * (d > 0) + np.dot(x[:, support_y], coefs_y) + epsilon + y = ite * (observed_d > 0) + y0 oracle_values = { - 'levels': levels, - 'support_y': support_y, - 'coefs_y': coefs_y, - 'support_d': support_d, - 'coefs_d': coefs_d, - 'te': te, + 'cont_d': cont_d, + 'level_bounds': level_bounds, + 'potential_level': potential_level, + 'ite': ite, + 'y0': y0, 'treatment_effect': treatment_effect } resul_dict = { 'x': x, 'y': y, - 'd': d, + 'd': observed_d, 'oracle_values': oracle_values } diff --git a/doubleml/irm/tests/test_apo.py b/doubleml/irm/tests/test_apo.py index 20effa0cb..e365bc1e3 100644 --- a/doubleml/irm/tests/test_apo.py +++ b/doubleml/irm/tests/test_apo.py @@ -53,7 +53,7 @@ def dml_apo_fixture(generate_data_irm, learner, normalize_ipw, trimming_threshol ml_m = clone(learner[1]) np.random.seed(3141) - n_obs = 100 + n_obs = 500 data_apo = make_irm_data_discrete_treatements(n_obs=n_obs) y = data_apo['y'] x = data_apo['x'] From b86f8aaf4e5ac6c54c6c66c7f73e36d5f06386b3 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Fri, 21 Jun 2024 11:32:47 +0200 Subject: [PATCH 22/98] fix typo --- doubleml/datasets.py | 2 +- doubleml/irm/tests/test_apo.py | 4 ++-- doubleml/irm/tests/test_apo_exceptions.py | 4 ++-- doubleml/irm/tests/test_apo_external_predictions.py | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/doubleml/datasets.py b/doubleml/datasets.py index 346d79376..ea782bda5 100644 --- a/doubleml/datasets.py +++ b/doubleml/datasets.py @@ -1435,7 +1435,7 @@ def make_ssm_data(n_obs=8000, dim_x=100, theta=1, mar=True, return_type='DoubleM raise ValueError('Invalid return_type.') -def make_irm_data_discrete_treatements(n_obs=200, n_levels=3, random_state=42, **kwargs): +def make_irm_data_discrete_treatments(n_obs=200, n_levels=3, random_state=42, **kwargs): """ Generates data from a interactive regression (IRM) model with multiple treatment levels (based on an underlying continous treatment). diff --git a/doubleml/irm/tests/test_apo.py b/doubleml/irm/tests/test_apo.py index e365bc1e3..7082e399b 100644 --- a/doubleml/irm/tests/test_apo.py +++ b/doubleml/irm/tests/test_apo.py @@ -9,7 +9,7 @@ from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor import doubleml as dml -from doubleml.datasets import make_irm_data_discrete_treatements, make_irm_data +from doubleml.datasets import make_irm_data_discrete_treatments, make_irm_data from ...tests._utils import draw_smpls from ._utils_apo_manual import fit_apo, boot_apo, fit_sensitivity_elements_apo @@ -54,7 +54,7 @@ def dml_apo_fixture(generate_data_irm, learner, normalize_ipw, trimming_threshol np.random.seed(3141) n_obs = 500 - data_apo = make_irm_data_discrete_treatements(n_obs=n_obs) + data_apo = make_irm_data_discrete_treatments(n_obs=n_obs) y = data_apo['y'] x = data_apo['x'] d = data_apo['d'] diff --git a/doubleml/irm/tests/test_apo_exceptions.py b/doubleml/irm/tests/test_apo_exceptions.py index cf5227957..ccab71855 100644 --- a/doubleml/irm/tests/test_apo_exceptions.py +++ b/doubleml/irm/tests/test_apo_exceptions.py @@ -3,12 +3,12 @@ import numpy as np from doubleml import DoubleMLAPO, DoubleMLData -from doubleml.datasets import make_irm_data_discrete_treatements, make_iivm_data +from doubleml.datasets import make_irm_data_discrete_treatments, make_iivm_data from sklearn.linear_model import Lasso, LogisticRegression n = 100 -data_apo = make_irm_data_discrete_treatements(n_obs=n) +data_apo = make_irm_data_discrete_treatments(n_obs=n) df_apo = pd.DataFrame(np.column_stack((data_apo['y'], data_apo['d'], data_apo['x'])), columns=['y', 'd'] + ['x' + str(i) for i in range(data_apo['x'].shape[1])]) diff --git a/doubleml/irm/tests/test_apo_external_predictions.py b/doubleml/irm/tests/test_apo_external_predictions.py index c60ee516a..a3f77dea1 100644 --- a/doubleml/irm/tests/test_apo_external_predictions.py +++ b/doubleml/irm/tests/test_apo_external_predictions.py @@ -5,7 +5,7 @@ from sklearn.linear_model import LinearRegression, LogisticRegression from doubleml import DoubleMLAPO, DoubleMLData -from doubleml.datasets import make_irm_data_discrete_treatements +from doubleml.datasets import make_irm_data_discrete_treatments from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier from ...tests._utils import draw_smpls @@ -35,7 +35,7 @@ def doubleml_apo_ext_fixture(n_rep, set_ml_m_ext, set_ml_g_ext): np.random.seed(3141) n_obs = 500 - data_apo = make_irm_data_discrete_treatements(n_obs=n_obs) + data_apo = make_irm_data_discrete_treatments(n_obs=n_obs) df_apo = pd.DataFrame( np.column_stack((data_apo['y'], data_apo['d'], data_apo['x'])), columns=['y', 'd'] + ['x' + str(i) for i in range(data_apo['x'].shape[1])] From 70b1b00253a1607467aca5187fd1f325fa229b94 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Fri, 21 Jun 2024 11:35:14 +0200 Subject: [PATCH 23/98] remove seed from dgp --- doubleml/datasets.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doubleml/datasets.py b/doubleml/datasets.py index ea782bda5..fe06ad3d1 100644 --- a/doubleml/datasets.py +++ b/doubleml/datasets.py @@ -1435,7 +1435,7 @@ def make_ssm_data(n_obs=8000, dim_x=100, theta=1, mar=True, return_type='DoubleM raise ValueError('Invalid return_type.') -def make_irm_data_discrete_treatments(n_obs=200, n_levels=3, random_state=42, **kwargs): +def make_irm_data_discrete_treatments(n_obs=200, n_levels=3, random_state=None, **kwargs): """ Generates data from a interactive regression (IRM) model with multiple treatment levels (based on an underlying continous treatment). @@ -1513,8 +1513,8 @@ def make_irm_data_discrete_treatments(n_obs=200, n_levels=3, random_state=42, ** Dictionary with entries ``x``, ``y``, ``d`` and ``oracle_values``. """ - - np.random.seed(random_state) + if random_state is not None: + np.random.seed(random_state) xi = kwargs.get('xi', 0.3) c = kwargs.get('c', 0.0) dim_x = kwargs.get('dim_x', 5) From 5be62d62db187e7a2070dfc3edfcbc407608f4ff Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Fri, 21 Jun 2024 11:40:51 +0200 Subject: [PATCH 24/98] Add basic unit tests for dgp --- doubleml/datasets.py | 6 +++++- doubleml/tests/test_datasets.py | 34 ++++++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/doubleml/datasets.py b/doubleml/datasets.py index fe06ad3d1..fd18affdc 100644 --- a/doubleml/datasets.py +++ b/doubleml/datasets.py @@ -1519,6 +1519,11 @@ def make_irm_data_discrete_treatments(n_obs=200, n_levels=3, random_state=None, c = kwargs.get('c', 0.0) dim_x = kwargs.get('dim_x', 5) + if not isinstance(n_levels, int): + raise ValueError('n_levels must be an integer.') + if n_levels < 2: + raise ValueError('n_levels must be at least 2.') + # observed covariates cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)]) x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ]) @@ -1565,7 +1570,6 @@ def treatment_effect(d): 'potential_level': potential_level, 'ite': ite, 'y0': y0, - 'treatment_effect': treatment_effect } resul_dict = { diff --git a/doubleml/tests/test_datasets.py b/doubleml/tests/test_datasets.py index d662cd075..a46754d05 100644 --- a/doubleml/tests/test_datasets.py +++ b/doubleml/tests/test_datasets.py @@ -5,7 +5,8 @@ from doubleml import DoubleMLData, DoubleMLClusterData from doubleml.datasets import fetch_401K, fetch_bonus, make_plr_CCDDHNR2018, make_plr_turrell2018, \ make_irm_data, make_iivm_data, _make_pliv_data, make_pliv_CHS2015, make_pliv_multiway_cluster_CKMS2021, \ - make_did_SZ2020, make_confounded_irm_data, make_confounded_plr_data, make_heterogeneous_data, make_ssm_data + make_did_SZ2020, make_confounded_irm_data, make_confounded_plr_data, make_heterogeneous_data, make_ssm_data, \ + make_irm_data_discrete_treatments msg_inv_return_type = 'Invalid return_type.' @@ -277,3 +278,34 @@ def test_make_ssm_data_return_types(): assert isinstance(s, np.ndarray) with pytest.raises(ValueError, match=msg_inv_return_type): _ = make_ssm_data(n_obs=100, return_type='matrix') + + +@pytest.fixture(scope='function', + params=[3, 5]) +def n_levels(request): + return request.param + + +def test_make_data_discrete_treatments(n_levels): + np.random.seed(3141) + n = 100 + data_apo = make_irm_data_discrete_treatments(n_obs=n, n_levels=3) + assert isinstance(data_apo, dict) + assert isinstance(data_apo['y'], np.ndarray) + assert isinstance(data_apo['d'], np.ndarray) + assert isinstance(data_apo['x'], np.ndarray) + assert isinstance(data_apo['oracle_values'], dict) + + assert isinstance(data_apo['oracle_values']['cont_d'], np.ndarray) + assert isinstance(data_apo['oracle_values']['level_bounds'], np.ndarray) + assert isinstance(data_apo['oracle_values']['potential_level'], np.ndarray) + assert isinstance(data_apo['oracle_values']['ite'], np.ndarray) + assert isinstance(data_apo['oracle_values']['y0'], np.ndarray) + + msg = 'n_levels must be at least 2.' + with pytest.raises(ValueError, match=msg): + _ = make_irm_data_discrete_treatments(n_obs=n, n_levels=1) + + msg = 'n_levels must be an integer.' + with pytest.raises(ValueError, match=msg): + _ = make_irm_data_discrete_treatments(n_obs=n, n_levels=1.1) From 2a1903f00c26ac1b0f8b46df2d9b4bc7f95bfaec Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Fri, 21 Jun 2024 12:10:03 +0200 Subject: [PATCH 25/98] add docstring for APO model --- doubleml/irm/apo.py | 57 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py index 76a0372f1..7fae6c292 100644 --- a/doubleml/irm/apo.py +++ b/doubleml/irm/apo.py @@ -18,9 +18,64 @@ class DoubleMLAPO(LinearScoreMixin, DoubleML): - """Double machine learning average potential outcomes for interactive regression models + """Double machine learning average potential outcomes for interactive regression models. Parameters + ---------- + obj_dml_data : :class:`DoubleMLData` object + The :class:`DoubleMLData` object providing the data and specifying the variables for the causal model. + + ml_g : estimator implementing ``fit()`` and ``predict()`` + A machine learner implementing ``fit()`` and ``predict()`` methods (e.g. + :py:class:`sklearn.ensemble.RandomForestRegressor`) for the nuisance function :math:`g_0(D,X) = E[Y|X,D]`. + For a binary outcome variable :math:`Y` (with values 0 and 1), a classifier implementing ``fit()`` and + ``predict_proba()`` can also be specified. If :py:func:`sklearn.base.is_classifier` returns ``True``, + ``predict_proba()`` is used otherwise ``predict()``. + + ml_m : classifier implementing ``fit()`` and ``predict_proba()`` + A machine learner implementing ``fit()`` and ``predict_proba()`` methods (e.g. + :py:class:`sklearn.ensemble.RandomForestClassifier`) for the nuisance function :math:`m_0(X) = E[D|X]`. + + treatment_level : int or float + Chosen treatment level for average potential outcomes. + + n_folds : int + Number of folds. + Default is ``5``. + + n_rep : int + Number of repetitons for the sample splitting. + Default is ``1``. + + score : str or callable + A str (``'APO'``) specifying the score function. + Default is ``'APO'``. + + weights : array, dict or None + An numpy array of weights for each individual observation. If None, then the ``'APO'`` score + is applied (corresponds to weights equal to 1). + An array has to be of shape ``(n,)``, where ``n`` is the number of observations. + A dictionary can be used to specify weights which depend on the treatment variable. + In this case, the dictionary has to contain two keys ``weights`` and ``weights_bar``, where the values + have to be arrays of shape ``(n,)`` and ``(n, n_rep)``. + Default is ``None``. + + normalize_ipw : bool + Indicates whether the inverse probability weights are normalized. + Default is ``False``. + + trimming_rule : str + A str (``'truncate'`` is the only choice) specifying the trimming approach. + Default is ``'truncate'``. + + trimming_threshold : float + The threshold used for trimming. + Default is ``1e-2``. + + draw_sample_splitting : bool + Indicates whether the sample splitting should be drawn during initialization of the object. + Default is ``True``. + """ def __init__(self, obj_dml_data, From 87cfa7665cacecb96ed535074a58e509f57caa5c Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Fri, 21 Jun 2024 12:51:32 +0200 Subject: [PATCH 26/98] add warning for low percentange of treatment level --- doubleml/irm/apo.py | 4 ++++ doubleml/irm/tests/test_apo_exceptions.py | 13 +++++++++++++ 2 files changed, 17 insertions(+) diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py index 7fae6c292..423f3fa7e 100644 --- a/doubleml/irm/apo.py +++ b/doubleml/irm/apo.py @@ -396,6 +396,10 @@ def _check_data(self, obj_dml_data): f'Number of treated observations: {np.sum(self.treated)} for treatment level {self.treatment_level}.' ) + if np.mean(self.treated) <= 0.05: + warnings.warn(f'The proportion of observations with treatment level {self.treatment_level} is less than 5%.' + f' Got {np.mean(self.treated) * 100:.2f}%.') + return def capo(self, basis, is_gate=False): diff --git a/doubleml/irm/tests/test_apo_exceptions.py b/doubleml/irm/tests/test_apo_exceptions.py index ccab71855..2e9a7a1bd 100644 --- a/doubleml/irm/tests/test_apo_exceptions.py +++ b/doubleml/irm/tests/test_apo_exceptions.py @@ -33,6 +33,19 @@ def test_apo_exception_data(): with pytest.raises(ValueError, match=msg): _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=1.1) + msg = r'The proportion of observations with treatment level 42 is less than 5\%. Got 0.70\%.' + # test warning + with pytest.warns(UserWarning, match=msg): + data_apo_warn = make_irm_data_discrete_treatments(n_obs=1000) + data_apo_warn['d'][0:7] = 42 + df_apo_warn = pd.DataFrame( + np.column_stack((data_apo_warn['y'], data_apo_warn['d'], data_apo_warn['x'])), + columns=['y', 'd'] + ['x' + str(i) for i in range(data_apo_warn['x'].shape[1])] + ) + dml_data_warn = DoubleMLData(df_apo_warn, 'y', 'd') + + _ = DoubleMLAPO(dml_data_warn, ml_g, ml_m, treatment_level=42) + @pytest.mark.ci def test_apo_exception_scores(): From 5c0501c8e0d0c751c9d53a14db91f47b87040664 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Fri, 21 Jun 2024 13:22:06 +0200 Subject: [PATCH 27/98] remove double check --- doubleml/irm/qte.py | 1 - 1 file changed, 1 deletion(-) diff --git a/doubleml/irm/qte.py b/doubleml/irm/qte.py index 9fd220f19..9dd88def8 100644 --- a/doubleml/irm/qte.py +++ b/doubleml/irm/qte.py @@ -138,7 +138,6 @@ def __init__(self, self._trimming_threshold = trimming_threshold _check_trimming(self._trimming_rule, self._trimming_threshold) - self._check_quantile() if not isinstance(self.normalize_ipw, bool): raise TypeError('Normalization indicator has to be boolean. ' + f'Object of type {str(type(self.normalize_ipw))} passed.') From 1506474816ef516e88f0d310976f97a5a0a80535 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Fri, 21 Jun 2024 16:44:46 +0200 Subject: [PATCH 28/98] remove self_i_quant from qte --- doubleml/irm/qte.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/doubleml/irm/qte.py b/doubleml/irm/qte.py index 9dd88def8..8609a31af 100644 --- a/doubleml/irm/qte.py +++ b/doubleml/irm/qte.py @@ -411,14 +411,13 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_ framework_list = [None] * self.n_quantiles for i_quant in range(self.n_quantiles): - self._i_quant = i_quant # save the parallel fitted models in the right list - self._modellist_0[self._i_quant] = fitted_models[self._i_quant][0] - self._modellist_1[self._i_quant] = fitted_models[self._i_quant][1] + self._modellist_0[i_quant] = fitted_models[i_quant][0] + self._modellist_1[i_quant] = fitted_models[i_quant][1] # set up the framework - framework_list[self._i_quant] = self._modellist_1[self._i_quant].framework - \ - self._modellist_0[self._i_quant].framework + framework_list[i_quant] = self._modellist_1[i_quant].framework - \ + self._modellist_0[i_quant].framework # aggregate all frameworks self._framework = concat(framework_list) @@ -558,7 +557,6 @@ def _initialize_models(self): 'draw_sample_splitting': False } for i_quant in range(self.n_quantiles): - self._i_quant = i_quant # initialize models for both potential quantiles if self.score == 'PQ': From 64a0e20bb63c386c85ce8fa6855f8b63b5058b12 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Fri, 21 Jun 2024 16:44:56 +0200 Subject: [PATCH 29/98] add first apos model --- doubleml/__init__.py | 40 +-- doubleml/irm/apos.py | 293 +++++++++++++++++++++ doubleml/irm/tests/test_apos_exceptions.py | 72 +++++ 3 files changed, 387 insertions(+), 18 deletions(-) create mode 100644 doubleml/irm/apos.py create mode 100644 doubleml/irm/tests/test_apos_exceptions.py diff --git a/doubleml/__init__.py b/doubleml/__init__.py index 69e064a00..c97bddf79 100644 --- a/doubleml/__init__.py +++ b/doubleml/__init__.py @@ -6,6 +6,7 @@ from .plm.pliv import DoubleMLPLIV from .irm.irm import DoubleMLIRM from .irm.apo import DoubleMLAPO +from .irm.apos import DoubleMLAPOS from .irm.iivm import DoubleMLIIVM from .double_ml_data import DoubleMLData, DoubleMLClusterData from .did.did import DoubleMLDID @@ -19,23 +20,26 @@ from .utils.blp import DoubleMLBLP from .utils.policytree import DoubleMLPolicyTree -__all__ = ['concat', - 'DoubleMLFramework', - 'DoubleMLPLR', - 'DoubleMLPLIV', - 'DoubleMLIRM', - 'DoubleMLAPO', - 'DoubleMLIIVM', - 'DoubleMLData', - 'DoubleMLClusterData', - 'DoubleMLDID', - 'DoubleMLDIDCS', - 'DoubleMLPQ', - 'DoubleMLQTE', - 'DoubleMLLPQ', - 'DoubleMLCVAR', - 'DoubleMLBLP', - 'DoubleMLPolicyTree', - 'DoubleMLSSM'] +__all__ = [ + 'concat', + 'DoubleMLFramework', + 'DoubleMLPLR', + 'DoubleMLPLIV', + 'DoubleMLIRM', + 'DoubleMLAPO', + 'DoubleMLAPOS', + 'DoubleMLIIVM', + 'DoubleMLData', + 'DoubleMLClusterData', + 'DoubleMLDID', + 'DoubleMLDIDCS', + 'DoubleMLPQ', + 'DoubleMLQTE', + 'DoubleMLLPQ', + 'DoubleMLCVAR', + 'DoubleMLBLP', + 'DoubleMLPolicyTree', + 'DoubleMLSSM' +] __version__ = importlib.metadata.version('doubleml') diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py new file mode 100644 index 000000000..24768bf9d --- /dev/null +++ b/doubleml/irm/apos.py @@ -0,0 +1,293 @@ +import numpy as np +import pandas as pd + +from sklearn.base import clone + +from joblib import Parallel, delayed + +from ..double_ml_data import DoubleMLData, DoubleMLClusterData +from .apo import DoubleMLAPO +from ..double_ml_framework import concat + +from ..utils.resampling import DoubleMLResampling +from ..utils._checks import _check_score, _check_trimming + + +class DoubleMLAPOS: + """Double machine learning for interactive regression models with multiple discrete treatments. + """ + def __init__(self, + obj_dml_data, + ml_g, + ml_m, + treatment_levels, + n_folds=5, + n_rep=1, + score='APO', + weights=None, + normalize_ipw=False, + trimming_rule='truncate', + trimming_threshold=1e-2, + draw_sample_splitting=True): + + self._dml_data = obj_dml_data + self._is_cluster_data = isinstance(obj_dml_data, DoubleMLClusterData) + self._check_data(self._dml_data) + + self._treatment_levels = np.asarray(treatment_levels).reshape((-1, )) + self._check_treatment_levels() + self._n_levels = len(self._treatment_levels) + + self._normalize_ipw = normalize_ipw + self._n_folds = n_folds + self._n_rep = n_rep + + # check score + self._score = score + valid_scores = ['APO'] + _check_score(self.score, valid_scores, allow_callable=False) + + # initialize framework which is constructed after the fit method is called + self._framework = None + + # initialize and check trimming + self._trimming_rule = trimming_rule + self._trimming_threshold = trimming_threshold + _check_trimming(self._trimming_rule, self._trimming_threshold) + + if not isinstance(self.normalize_ipw, bool): + raise TypeError('Normalization indicator has to be boolean. ' + + f'Object of type {str(type(self.normalize_ipw))} passed.') + + # perform sample splitting + self._smpls = None + if draw_sample_splitting: + self.draw_sample_splitting() + + self._learner = {'ml_g': clone(ml_g), 'ml_m': clone(ml_m)} + self._predict_method = {'ml_g': 'predict', 'ml_m': 'predict_proba'} + + # initialize all models + self._modellist = self._initialize_models() + + @property + def score(self): + """ + The score function. + """ + return self._score + + @property + def n_levels(self): + """ + The number of treatment levels. + """ + return self._n_levels + + @property + def normalize_ipw(self): + """ + Indicates whether the inverse probability weights are normalized. + """ + return self._normalize_ipw + + @property + def trimming_rule(self): + """ + Specifies the used trimming rule. + """ + return self._trimming_rule + + @property + def trimming_threshold(self): + """ + Specifies the used trimming threshold. + """ + return self._trimming_threshold + + @property + def weights(self): + """ + Specifies the weights for a weighted average potential outcome. + """ + return self._weights + + @property + def n_folds(self): + """ + Number of folds. + """ + return self._n_folds + + @property + def n_rep(self): + """ + Number of repetitions for the sample splitting. + """ + return self._n_rep + + @property + def coef(self): + """ + Estimates for the causal parameter(s) after calling :meth:`fit` (shape (``n_quantiles``,)). + """ + if self._framework is None: + coef = None + else: + coef = self.framework.thetas + return coef + + @property + def framework(self): + """ + The corresponding :class:`doubleml.DoubleMLFramework` object. + """ + return self._framework + + @property + def modellist(self): + """ + The list of models for each level. + """ + return self._modellist + + def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_models=False, external_predictions=None): + """ + Estimate DoubleMLAPOS models. + + Parameters + ---------- + n_jobs_models : None or int + The number of CPUs to use to fit the treatment_levels. ``None`` means ``1``. + Default is ``None``. + + n_jobs_cv : None or int + The number of CPUs to use to fit the learners. ``None`` means ``1``. + Does not speed up computation for quantile models. + Default is ``None``. + + store_predictions : bool + Indicates whether the predictions for the nuisance functions should be stored in ``predictions``. + Default is ``True``. + + store_models : bool + Indicates whether the fitted models for the nuisance functions should be stored in ``models``. This allows + to analyze the fitted models or extract information like variable importance. + Default is ``False``. + + Returns + ------- + self : object + """ + + if external_predictions is not None: + raise NotImplementedError(f"External predictions not implemented for {self.__class__.__name__}.") + + # parallel estimation of the quantiles + parallel = Parallel(n_jobs=n_jobs_models, verbose=0, pre_dispatch='2*n_jobs') + fitted_models = parallel(delayed(self._fit_model)(i_level, n_jobs_cv, store_predictions, store_models) + for i_level in range(self.n_treatment_levels)) + + # combine the estimates and scores + framework_list = [None] * self.n_levels + + for i_level in range(self.n_levels): + self._modellist[i_level] = fitted_models[i_level][0] + framework_list[i_level] = self._modellist[i_level].framework + + # aggregate all frameworks + self._framework = concat(framework_list) + + return self + + def confint(self, joint=False, level=0.95): + """ + Confidence intervals for DoubleML models. + + Parameters + ---------- + joint : bool + Indicates whether joint confidence intervals are computed. + Default is ``False`` + + level : float + The confidence level. + Default is ``0.95``. + + Returns + ------- + df_ci : pd.DataFrame + A data frame with the confidence interval(s). + """ + + if self.framework is None: + raise ValueError('Apply fit() before confint().') + + df_ci = self.framework.confint(joint=joint, level=level) + df_ci.set_index(pd.Index(self._treatment_levels), inplace=True) + + return df_ci + + def draw_sample_splitting(self): + """ + Draw sample splitting for DoubleML models. + + The samples are drawn according to the attributes + ``n_folds`` and ``n_rep``. + + Returns + ------- + self : object + """ + obj_dml_resampling = DoubleMLResampling(n_folds=self.n_folds, + n_rep=self.n_rep, + n_obs=self._dml_data.n_obs, + stratify=self._dml_data.d) + self._smpls = obj_dml_resampling.split_samples() + + return self + + def _fit_model(self, i_level, n_jobs_cv=None, store_predictions=True, store_models=False): + + model = self.modellist_0[i_level] + model.fit(n_jobs_cv=n_jobs_cv, store_predictions=store_predictions, store_models=store_models) + return model + + def _check_treatment_levels(self): + if not np.all(np.isin(self._treatment_levels, np.unique(self._dml_data.d))): + raise ValueError('The treatment levels have to be a subset of the unique treatment levels in the data.') + + def _check_data(self, obj_dml_data): + if not isinstance(obj_dml_data, DoubleMLData): + raise TypeError('The data must be of DoubleMLData or DoubleMLClusterData type.') + if obj_dml_data.z is not None: + raise ValueError('The data must not contain instrumental variables.') + return + + def _initialize_models(self): + modellist = [None] * self.n_levels + kwargs = { + 'obj_dml_data': self._dml_data, + 'ml_g': self._learner['ml_g'], + 'ml_m': self._learner['ml_m'], + 'score': self.score, + 'n_folds': self.n_folds, + 'n_rep': self.n_rep, + 'weights': self.weights, + 'trimming_rule': self.trimming_rule, + 'trimming_threshold': self.trimming_threshold, + 'normalize_ipw': self.normalize_ipw, + 'draw_sample_splitting': False + } + for i_level in range(self.n_levels): + # initialize models for all levels + model = DoubleMLAPO( + treatment_level=self._treatment_levels[i_level], + **kwargs + ) + + # synchronize the sample splitting + model.set_sample_splitting(all_smpls=self.smpls) + modellist[i_level] = model + + return modellist diff --git a/doubleml/irm/tests/test_apos_exceptions.py b/doubleml/irm/tests/test_apos_exceptions.py new file mode 100644 index 000000000..9081a4e4a --- /dev/null +++ b/doubleml/irm/tests/test_apos_exceptions.py @@ -0,0 +1,72 @@ +import pytest +import pandas as pd +import numpy as np + +from doubleml import DoubleMLAPOS, DoubleMLData +from doubleml.datasets import make_irm_data_discrete_treatments, make_iivm_data + +from sklearn.linear_model import Lasso, LogisticRegression + +n = 100 +data = make_irm_data_discrete_treatments(n_obs=n) +df = pd.DataFrame( + np.column_stack((data['y'], data['d'], data['x'])), + columns=['y', 'd'] + ['x' + str(i) for i in range(data['x'].shape[1])] +) + +dml_data = DoubleMLData(df, 'y', 'd') + +ml_g = Lasso() +ml_m = LogisticRegression() + + +@pytest.mark.ci +def test_apos_exception_data(): + msg = 'The data must be of DoubleMLData or DoubleMLClusterData type.' + with pytest.raises(TypeError, match=msg): + _ = DoubleMLAPOS(pd.DataFrame(), ml_g, ml_m, treatment_levels=0) + + msg = 'The data must not contain instrumental variables.' + with pytest.raises(ValueError, match=msg): + dml_data_z = make_iivm_data() + _ = DoubleMLAPOS(dml_data_z, ml_g, ml_m, treatment_levels=0) + + msg = 'The treatment levels have to be a subset of the unique treatment levels in the data.' + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=[1.1]) + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=1.1) + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=[1, 2.2]) + + +@pytest.mark.ci +def test_apos_exception_scores(): + msg = 'Invalid score MAR. Valid score APO.' + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, score='MAR') + + +@pytest.mark.ci +def test_apos_exception_trimming_rule(): + msg = 'Invalid trimming_rule discard. Valid trimming_rule truncate.' + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, trimming_rule='discard') + + # check the trimming_threshold exceptions + msg = "trimming_threshold has to be a float. Object of type passed." + with pytest.raises(TypeError, match=msg): + _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, + trimming_rule='truncate', trimming_threshold="0.1") + + msg = 'Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5.' + with pytest.raises(ValueError, match=msg): + _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, + trimming_rule='truncate', trimming_threshold=0.6) + + +@pytest.mark.ci +def test_apos_exception_ipw_normalization(): + msg = "Normalization indicator has to be boolean. Object of type passed." + with pytest.raises(TypeError, match=msg): + _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, normalize_ipw=1) From f849bd034fae2e0817e5369986c23dcf27639794 Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Mon, 15 Jul 2024 20:31:24 +0200 Subject: [PATCH 30/98] update irm dgp --- doubleml/datasets.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/doubleml/datasets.py b/doubleml/datasets.py index fd18affdc..b3f6d745d 100644 --- a/doubleml/datasets.py +++ b/doubleml/datasets.py @@ -1435,7 +1435,7 @@ def make_ssm_data(n_obs=8000, dim_x=100, theta=1, mar=True, return_type='DoubleM raise ValueError('Invalid return_type.') -def make_irm_data_discrete_treatments(n_obs=200, n_levels=3, random_state=None, **kwargs): +def make_irm_data_discrete_treatments(n_obs=200, n_levels=3, linear=False, random_state=None, **kwargs): """ Generates data from a interactive regression (IRM) model with multiple treatment levels (based on an underlying continous treatment). @@ -1536,10 +1536,10 @@ def f_treatment(w, xi): res = xi * (-w[:, 0] + 0.5*w[:, 1] - 0.25*w[:, 2] - 0.1*w[:, 3]) return res - def treatment_effect(d): - return 0.1 * np.exp(d) + 10 * np.sin(0.7 * d) + 2 * d - 0.2 * np.square(d) + def treatment_effect(d, scale=5): + return scale * (1 / (1 + np.exp(-d - 1.2 * np.cos(d)))) - 2 - z_tilde_1 = np.exp(0.5*x[:, 0]) + z_tilde_1 = np.exp(0.5 * x[:, 0]) z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0])) z_tilde_3 = (0.6 + x[:, 0] * x[:, 2]/25)**3 z_tilde_4 = (20 + x[:, 1] + x[:, 3])**2 @@ -1553,16 +1553,24 @@ def treatment_effect(d): var_eps_d = 1 eps_d = np.random.normal(loc=0, scale=np.sqrt(var_eps_d), size=n_obs) - cont_d = f_treatment(z, xi) + eps_d + if linear: + g = f_reg(x) + m = f_treatment(x, xi) + else: + assert not linear + g = f_reg(z) + m = f_treatment(z, xi) + + cont_d = m + eps_d level_bounds = np.quantile(cont_d, q=np.linspace(0, 1, n_levels + 1)) potential_level = sum([1.0 * (cont_d >= bound) for bound in level_bounds[1:-1]]) + 1 eta = np.random.uniform(0, 1, size=n_obs) - observed_d = 1.0 * (eta >= 1/n_levels) * potential_level + d = 1.0 * (eta >= 1/n_levels) * potential_level ite = treatment_effect(cont_d) - y0 = f_reg(z) + eps_y + y0 = g + eps_y # only treated for d > 0 compared to the baseline - y = ite * (observed_d > 0) + y0 + y = ite * (d > 0) + y0 oracle_values = { 'cont_d': cont_d, @@ -1575,7 +1583,7 @@ def treatment_effect(d): resul_dict = { 'x': x, 'y': y, - 'd': observed_d, + 'd': d, 'oracle_values': oracle_values } From ec246689c1ba965ab0b09ba849a368c36d3e1025 Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Mon, 15 Jul 2024 20:31:28 +0200 Subject: [PATCH 31/98] Update apo.py --- doubleml/irm/apo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py index 423f3fa7e..838e55460 100644 --- a/doubleml/irm/apo.py +++ b/doubleml/irm/apo.py @@ -133,7 +133,7 @@ def __init__(self, self._sensitivity_implemented = True self._external_predictions_implemented = True - # ATE weights are the standard case + # APO weights _check_weights(weights, score="ATE", n_obs=obj_dml_data.n_obs, n_rep=self.n_rep) self._initialize_weights(weights) From 58f2b3962779cc0ff875fe5c4faf4b0b7b057e9e Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Mon, 15 Jul 2024 20:31:32 +0200 Subject: [PATCH 32/98] Update apos.py --- doubleml/irm/apos.py | 46 +++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index 24768bf9d..fb4b7aeeb 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -10,7 +10,7 @@ from ..double_ml_framework import concat from ..utils.resampling import DoubleMLResampling -from ..utils._checks import _check_score, _check_trimming +from ..utils._checks import _check_score, _check_trimming, _check_weights class DoubleMLAPOS: @@ -22,7 +22,7 @@ def __init__(self, ml_m, treatment_levels, n_folds=5, - n_rep=1, + n_rep=1, score='APO', weights=None, normalize_ipw=False, @@ -36,7 +36,7 @@ def __init__(self, self._treatment_levels = np.asarray(treatment_levels).reshape((-1, )) self._check_treatment_levels() - self._n_levels = len(self._treatment_levels) + self._n_treatment_levels = len(self._treatment_levels) self._normalize_ipw = normalize_ipw self._n_folds = n_folds @@ -67,6 +67,10 @@ def __init__(self, self._learner = {'ml_g': clone(ml_g), 'ml_m': clone(ml_m)} self._predict_method = {'ml_g': 'predict', 'ml_m': 'predict_proba'} + # APO weights + _check_weights(weights, score="ATE", n_obs=obj_dml_data.n_obs, n_rep=self.n_rep) + self._initialize_weights(weights) + # initialize all models self._modellist = self._initialize_models() @@ -78,11 +82,11 @@ def score(self): return self._score @property - def n_levels(self): + def n_treatment_levels(self): """ The number of treatment levels. """ - return self._n_levels + return self._n_treatment_levels @property def normalize_ipw(self): @@ -137,6 +141,17 @@ def coef(self): coef = self.framework.thetas return coef + @property + def smpls(self): + """ + The partition used for cross-fitting. + """ + if self._smpls is None: + err_msg = ('Sample splitting not specified. Draw samples via .draw_sample splitting(). ' + + 'External samples not implemented yet.') + raise ValueError(err_msg) + return self._smpls + @property def framework(self): """ @@ -189,10 +204,10 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_ for i_level in range(self.n_treatment_levels)) # combine the estimates and scores - framework_list = [None] * self.n_levels + framework_list = [None] * self.n_treatment_levels - for i_level in range(self.n_levels): - self._modellist[i_level] = fitted_models[i_level][0] + for i_level in range(self.n_treatment_levels): + self._modellist[i_level] = fitted_models[i_level] framework_list[i_level] = self._modellist[i_level].framework # aggregate all frameworks @@ -249,7 +264,7 @@ def draw_sample_splitting(self): def _fit_model(self, i_level, n_jobs_cv=None, store_predictions=True, store_models=False): - model = self.modellist_0[i_level] + model = self.modellist[i_level] model.fit(n_jobs_cv=n_jobs_cv, store_predictions=store_predictions, store_models=store_models) return model @@ -264,8 +279,17 @@ def _check_data(self, obj_dml_data): raise ValueError('The data must not contain instrumental variables.') return + def _initialize_weights(self, weights): + if weights is None: + weights = np.ones(self._dml_data.n_obs) + if isinstance(weights, np.ndarray): + self._weights = weights + else: + assert isinstance(weights, dict) + self._weights = weights + def _initialize_models(self): - modellist = [None] * self.n_levels + modellist = [None] * self.n_treatment_levels kwargs = { 'obj_dml_data': self._dml_data, 'ml_g': self._learner['ml_g'], @@ -279,7 +303,7 @@ def _initialize_models(self): 'normalize_ipw': self.normalize_ipw, 'draw_sample_splitting': False } - for i_level in range(self.n_levels): + for i_level in range(self.n_treatment_levels): # initialize models for all levels model = DoubleMLAPO( treatment_level=self._treatment_levels[i_level], From 9ab05aa8ea92df2ff1d65c4bff532606f8431336 Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Wed, 17 Jul 2024 20:04:31 +0200 Subject: [PATCH 33/98] update set sample splitting documentation --- doubleml/double_ml.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index 46e18825f..a48dbbc13 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -1188,9 +1188,6 @@ def set_sample_splitting(self, all_smpls): >>> ml_m = learner >>> obj_dml_data = make_plr_CCDDHNR2018(n_obs=10, alpha=0.5) >>> dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m) - >>> # simple sample splitting with two folds and without cross-fitting - >>> smpls = ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]) - >>> dml_plr_obj.set_sample_splitting(smpls) >>> # sample splitting with two folds and cross-fitting >>> smpls = [([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]), >>> ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])] From b3f4f77d86b8fcc28f1e02b496ad0517c069f7d1 Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Wed, 17 Jul 2024 20:47:15 +0200 Subject: [PATCH 34/98] add set_sample_slit to apos.py --- doubleml/irm/apos.py | 164 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 160 insertions(+), 4 deletions(-) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index fb4b7aeeb..d291423dc 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -10,7 +10,8 @@ from ..double_ml_framework import concat from ..utils.resampling import DoubleMLResampling -from ..utils._checks import _check_score, _check_trimming, _check_weights +from ..utils._checks import _check_score, _check_trimming, _check_weights, _check_is_partition, \ + _check_smpl_split_tpl, _check_smpl_split, _check_all_smpls class DoubleMLAPOS: @@ -64,6 +65,9 @@ def __init__(self, if draw_sample_splitting: self.draw_sample_splitting() + # initialize all models if splits are known + self._modellist = self._initialize_models() + self._learner = {'ml_g': clone(ml_g), 'ml_m': clone(ml_m)} self._predict_method = {'ml_g': 'predict', 'ml_m': 'predict_proba'} @@ -71,8 +75,6 @@ def __init__(self, _check_weights(weights, score="ATE", n_obs=obj_dml_data.n_obs, n_rep=self.n_rep) self._initialize_weights(weights) - # initialize all models - self._modellist = self._initialize_models() @property def score(self): @@ -133,7 +135,7 @@ def n_rep(self): @property def coef(self): """ - Estimates for the causal parameter(s) after calling :meth:`fit` (shape (``n_quantiles``,)). + Estimates for the causal parameter(s) after calling :meth:`fit` (shape (``n_treatment_levels``,)). """ if self._framework is None: coef = None @@ -141,6 +143,41 @@ def coef(self): coef = self.framework.thetas return coef + @property + def all_coef(self): + """ + Estimates of the causal parameter(s) for the ``n_rep`` different sample splits after calling :meth:`fit` + (shape (``n_treatment_levels``, ``n_rep``)). + """ + if self._framework is None: + all_coef = None + else: + all_coef = self.framework.all_thetas + return all_coef + + @property + def se(self): + """ + Standard errors for the causal parameter(s) after calling :meth:`fit` (shape (``n_treatment_levels``,)). + """ + if self._framework is None: + se = None + else: + se = self.framework.ses + return se + + @property + def all_se(self): + """ + Standard errors of the causal parameter(s) for the ``n_rep`` different sample splits after calling :meth:`fit` + (shape (``n_treatment_levels``, ``n_rep``)). + """ + if self._framework is None: + all_se = None + else: + all_se = self.framework.all_ses + return all_se + @property def smpls(self): """ @@ -262,6 +299,125 @@ def draw_sample_splitting(self): return self + def set_sample_splitting(self, all_smpls): + """ + Set the sample splitting for DoubleML models. + + The attributes ``n_folds`` and ``n_rep`` are derived from the provided partition. + + Parameters + ---------- + all_smpls : list or tuple + If nested list of lists of tuples: + The outer list needs to provide an entry per repeated sample splitting (length of list is set as + ``n_rep``). + The inner list needs to provide a tuple (train_ind, test_ind) per fold (length of list is set as + ``n_folds``). test_ind must form a partition for each inner list. + If list of tuples: + The list needs to provide a tuple (train_ind, test_ind) per fold (length of list is set as + ``n_folds``). test_ind must form a partition. ``n_rep=1`` is always set. + If tuple: + Must be a tuple with two elements train_ind and test_ind. Only viable option is to set + train_ind and test_ind to np.arange(n_obs), which corresponds to no sample splitting. + ``n_folds=1`` and ``n_rep=1`` is always set. + + Returns + ------- + self : object + + Examples + -------- + >>> import numpy as np + >>> import doubleml as dml + >>> from doubleml.datasets import make_plr_CCDDHNR2018 + >>> from sklearn.ensemble import RandomForestRegressor + >>> from sklearn.base import clone + >>> np.random.seed(3141) + >>> learner = RandomForestRegressor(max_depth=2, n_estimators=10) + >>> ml_g = learner + >>> ml_m = learner + >>> obj_dml_data = make_plr_CCDDHNR2018(n_obs=10, alpha=0.5) + >>> dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m) + >>> # sample splitting with two folds and cross-fitting + >>> smpls = [([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]), + >>> ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])] + >>> dml_plr_obj.set_sample_splitting(smpls) + >>> # sample splitting with two folds and repeated cross-fitting with n_rep = 2 + >>> smpls = [[([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]), + >>> ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])], + >>> [([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]), + >>> ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])]] + >>> dml_plr_obj.set_sample_splitting(smpls) + """ + if self._is_cluster_data: + raise NotImplementedError('Externally setting the sample splitting for DoubleML is ' + 'not yet implemented with clustering.') + if isinstance(all_smpls, tuple): + if not len(all_smpls) == 2: + raise ValueError('Invalid partition provided. ' + 'Tuple for train_ind and test_ind must consist of exactly two elements.') + all_smpls = _check_smpl_split_tpl(all_smpls, self._dml_data.n_obs) + if (_check_is_partition([all_smpls], self._dml_data.n_obs) & + _check_is_partition([(all_smpls[1], all_smpls[0])], self._dml_data.n_obs)): + self._n_rep = 1 + self._n_folds = 1 + self._smpls = [[all_smpls]] + else: + raise ValueError('Invalid partition provided. ' + 'Tuple provided that doesn\'t form a partition.') + else: + if not isinstance(all_smpls, list): + raise TypeError('all_smpls must be of list or tuple type. ' + f'{str(all_smpls)} of type {str(type(all_smpls))} was passed.') + all_tuple = all([isinstance(tpl, tuple) for tpl in all_smpls]) + if all_tuple: + if not all([len(tpl) == 2 for tpl in all_smpls]): + raise ValueError('Invalid partition provided. ' + 'All tuples for train_ind and test_ind must consist of exactly two elements.') + self._n_rep = 1 + all_smpls = _check_smpl_split(all_smpls, self._dml_data.n_obs) + if _check_is_partition(all_smpls, self._dml_data.n_obs): + if ((len(all_smpls) == 1) & + _check_is_partition([(all_smpls[0][1], all_smpls[0][0])], self._dml_data.n_obs)): + self._n_folds = 1 + self._smpls = [all_smpls] + else: + self._n_folds = len(all_smpls) + self._smpls = _check_all_smpls([all_smpls], self._dml_data.n_obs, check_intersect=True) + else: + raise ValueError('Invalid partition provided. ' + 'Tuples provided that don\'t form a partition.') + else: + all_list = all([isinstance(smpl, list) for smpl in all_smpls]) + if not all_list: + raise ValueError('Invalid partition provided. ' + 'all_smpls is a list where neither all elements are tuples ' + 'nor all elements are lists.') + all_tuple = all([all([isinstance(tpl, tuple) for tpl in smpl]) for smpl in all_smpls]) + if not all_tuple: + raise TypeError('For repeated sample splitting all_smpls must be list of lists of tuples.') + all_pairs = all([all([len(tpl) == 2 for tpl in smpl]) for smpl in all_smpls]) + if not all_pairs: + raise ValueError('Invalid partition provided. ' + 'All tuples for train_ind and test_ind must consist of exactly two elements.') + n_folds_each_smpl = np.array([len(smpl) for smpl in all_smpls]) + if not np.all(n_folds_each_smpl == n_folds_each_smpl[0]): + raise ValueError('Invalid partition provided. ' + 'Different number of folds for repeated sample splitting.') + all_smpls = _check_all_smpls(all_smpls, self._dml_data.n_obs) + smpls_are_partitions = [_check_is_partition(smpl, self._dml_data.n_obs) for smpl in all_smpls] + + if all(smpls_are_partitions): + self._n_rep = len(all_smpls) + self._n_folds = int(n_folds_each_smpl[0]) + self._smpls = _check_all_smpls(all_smpls, self._dml_data.n_obs, check_intersect=True) + else: + raise ValueError('Invalid partition provided. ' + 'At least one inner list does not form a partition.') + self._modellist = self._initialize_models() + + return self + def _fit_model(self, i_level, n_jobs_cv=None, store_predictions=True, store_models=False): model = self.modellist[i_level] From d2ab51244fc07006c5bbb81547e5049621e8f251 Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Wed, 17 Jul 2024 20:47:29 +0200 Subject: [PATCH 35/98] create manual apos version and basic unit test --- doubleml/irm/tests/_utils_apos_manual.py | 59 ++++++++++++ doubleml/irm/tests/test_apos.py | 113 +++++++++++++++++++++++ 2 files changed, 172 insertions(+) create mode 100644 doubleml/irm/tests/_utils_apos_manual.py create mode 100644 doubleml/irm/tests/test_apos.py diff --git a/doubleml/irm/tests/_utils_apos_manual.py b/doubleml/irm/tests/_utils_apos_manual.py new file mode 100644 index 000000000..9356ec815 --- /dev/null +++ b/doubleml/irm/tests/_utils_apos_manual.py @@ -0,0 +1,59 @@ +import numpy as np +from sklearn.base import clone + +from ..apo import DoubleMLAPO +from ...double_ml_data import DoubleMLData + + +def fit_apos(y, x, d, + learner_g, learner_m, treatment_levels, all_smpls, score, + n_rep=1, trimming_rule='truncate', + normalize_ipw=False, trimming_threshold=1e-2): + n_obs = len(y) + n_treatments = len(treatment_levels) + n_folds = len(all_smpls[0]) + + dml_data = DoubleMLData.from_arrays(x, y, d) + + all_apos = np.zeros((n_treatments, n_rep)) + all_se = np.zeros((n_treatments, n_rep)) + apo_scaled_score = np.zeros((n_obs, n_treatments, n_rep)) + + for i_level in range(n_treatments): + model_APO = DoubleMLAPO( + dml_data, + clone(learner_g), + clone(learner_m), + treatment_level=treatment_levels[i_level], + n_folds=n_folds, + n_rep=n_rep, + score=score, + trimming_rule=trimming_rule, + trimming_threshold=trimming_threshold, + normalize_ipw=normalize_ipw, + draw_sample_splitting=False + ) + + # synchronize the sample splitting + model_APO.set_sample_splitting(all_smpls) + model_APO.fit() + + all_apos[i_level, :] = model_APO.all_coef + all_se[i_level, :] = model_APO.all_se + + for i_rep in range(n_rep): + J = model_APO.psi_deriv[:, i_rep, 0].mean() + apo_psi = model_APO.psi[:, i_rep, 0] + + apo_scaled_score[:, i_level, i_rep] = apo_psi / J + + apos = np.median(all_apos, axis=1) + se = np.zeros(n_treatments) + for i_level in range(n_treatments): + se[i_level] = np.sqrt(np.median(np.power(all_se[i_level, :], 2) * n_obs + + np.power(all_apos[i_level, :] - all_apos[i_level], 2)) / n_obs) + + res = {'apos': apos, 'se': se, + 'all_apos': all_apos, 'all_se': all_se, + 'apo_scaled_score': apo_scaled_score} + return res diff --git a/doubleml/irm/tests/test_apos.py b/doubleml/irm/tests/test_apos.py new file mode 100644 index 000000000..1881b39da --- /dev/null +++ b/doubleml/irm/tests/test_apos.py @@ -0,0 +1,113 @@ +import numpy as np +import pandas as pd +import pytest + +from sklearn.base import clone + +from sklearn.linear_model import LogisticRegression, LinearRegression +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor + +import doubleml as dml +from doubleml.datasets import make_irm_data_discrete_treatments + +from ...tests._utils import draw_smpls +from ._utils_apos_manual import fit_apos + + +@pytest.fixture(scope='module', + params=[[LinearRegression(), + LogisticRegression(solver='lbfgs', max_iter=250, random_state=42)], + [RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42), + RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42)]]) +def learner(request): + return request.param + + +@pytest.fixture(scope='module', + params=[1]) +def n_rep(request): + return request.param + + +@pytest.fixture(scope='module', + params=[False, True]) +def normalize_ipw(request): + return request.param + + +@pytest.fixture(scope='module', + params=[0.2, 0.15]) +def trimming_threshold(request): + return request.param + + +@pytest.fixture(scope='module', + params=[[0, 1, 2], [0]]) +def treatment_levels(request): + return request.param + + +@pytest.fixture(scope='module') +def dml_apos_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimming_threshold, treatment_levels): + boot_methods = ['normal'] + n_folds = 2 + n_rep_boot = 499 + + # Set machine learning methods for m & g + ml_g = clone(learner[0]) + ml_m = clone(learner[1]) + + np.random.seed(3141) + n_obs = 500 + data = make_irm_data_discrete_treatments(n_obs=n_obs) + y = data['y'] + x = data['x'] + d = data['d'] + df = pd.DataFrame( + np.column_stack((y, d, x)), + columns=['y', 'd'] + ['x' + str(i) for i in range(data['x'].shape[1])] + ) + + dml_data = dml.DoubleMLData(df, 'y', 'd') + all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d) + + np.random.seed(3141) + dml_obj = dml.DoubleMLAPOS( + dml_data, + ml_g, ml_m, + treatment_levels=treatment_levels, + n_folds=n_folds, + n_rep=n_rep, + score='APO', + normalize_ipw=normalize_ipw, + trimming_rule='truncate', + trimming_threshold=trimming_threshold, + draw_sample_splitting=False) + + # synchronize the sample splitting + dml_obj.set_sample_splitting(all_smpls) + dml_obj.fit() + + np.random.seed(3141) + res_manual = fit_apos( + y, x, d, + clone(learner[0]), clone(learner[1]), + treatment_levels=treatment_levels, + all_smpls=all_smpls, + score='APO', + trimming_rule='truncate', + normalize_ipw=normalize_ipw, + trimming_threshold=trimming_threshold) + + res_dict = {'coef': dml_obj.coef, + 'coef_manual': res_manual['apos'], + 'se': dml_obj.se, + 'se_manual': res_manual['se']} + return res_dict + + +@pytest.mark.ci +def test_dml_apos_coef(dml_apos_fixture): + assert np.allclose(dml_apos_fixture['coef'], + dml_apos_fixture['coef_manual'], + rtol=1e-9, atol=1e-9) From e6d680cde78deb304d18c1e85c0c3453c3608591 Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Wed, 17 Jul 2024 20:52:03 +0200 Subject: [PATCH 36/98] Update _utils_apos_manual.py --- doubleml/irm/tests/_utils_apos_manual.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doubleml/irm/tests/_utils_apos_manual.py b/doubleml/irm/tests/_utils_apos_manual.py index 9356ec815..db5f41c55 100644 --- a/doubleml/irm/tests/_utils_apos_manual.py +++ b/doubleml/irm/tests/_utils_apos_manual.py @@ -47,11 +47,11 @@ def fit_apos(y, x, d, apo_scaled_score[:, i_level, i_rep] = apo_psi / J - apos = np.median(all_apos, axis=1) - se = np.zeros(n_treatments) - for i_level in range(n_treatments): - se[i_level] = np.sqrt(np.median(np.power(all_se[i_level, :], 2) * n_obs + - np.power(all_apos[i_level, :] - all_apos[i_level], 2)) / n_obs) + apos = np.median(all_apos, axis=1) + se = np.zeros(n_treatments) + for i_level in range(n_treatments): + se[i_level] = np.sqrt(np.median(np.power(all_se[i_level, :], 2) * n_obs + + np.power(all_apos[i_level, :] - all_apos[i_level], 2)) / n_obs) res = {'apos': apos, 'se': se, 'all_apos': all_apos, 'all_se': all_se, From 2adc6d40157c662ce0f89bd641a71f43853e8bbe Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Mon, 22 Jul 2024 13:42:11 +0200 Subject: [PATCH 37/98] update set_sample_splitting in apos.py --- doubleml/irm/apos.py | 91 ++++++--------------------------- doubleml/irm/tests/test_apos.py | 43 +++++++++------- 2 files changed, 40 insertions(+), 94 deletions(-) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index d291423dc..735fda63b 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -10,8 +10,7 @@ from ..double_ml_framework import concat from ..utils.resampling import DoubleMLResampling -from ..utils._checks import _check_score, _check_trimming, _check_weights, _check_is_partition, \ - _check_smpl_split_tpl, _check_smpl_split, _check_all_smpls +from ..utils._checks import _check_score, _check_trimming, _check_weights, _check_sample_splitting class DoubleMLAPOS: @@ -60,14 +59,6 @@ def __init__(self, raise TypeError('Normalization indicator has to be boolean. ' + f'Object of type {str(type(self.normalize_ipw))} passed.') - # perform sample splitting - self._smpls = None - if draw_sample_splitting: - self.draw_sample_splitting() - - # initialize all models if splits are known - self._modellist = self._initialize_models() - self._learner = {'ml_g': clone(ml_g), 'ml_m': clone(ml_m)} self._predict_method = {'ml_g': 'predict', 'ml_m': 'predict_proba'} @@ -75,6 +66,13 @@ def __init__(self, _check_weights(weights, score="ATE", n_obs=obj_dml_data.n_obs, n_rep=self.n_rep) self._initialize_weights(weights) + # perform sample splitting + self._smpls = None + if draw_sample_splitting: + self.draw_sample_splitting() + + # initialize all models if splits are known + self._modellist = self._initialize_models() @property def score(self): @@ -227,6 +225,9 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_ to analyze the fitted models or extract information like variable importance. Default is ``False``. + external_predictions : None + Not implemented for DoubleMLAPOS. + Returns ------- self : object @@ -299,7 +300,7 @@ def draw_sample_splitting(self): return self - def set_sample_splitting(self, all_smpls): + def set_sample_splitting(self, all_smpls, all_smpls_cluster=None): """ Set the sample splitting for DoubleML models. @@ -349,71 +350,9 @@ def set_sample_splitting(self, all_smpls): >>> ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])]] >>> dml_plr_obj.set_sample_splitting(smpls) """ - if self._is_cluster_data: - raise NotImplementedError('Externally setting the sample splitting for DoubleML is ' - 'not yet implemented with clustering.') - if isinstance(all_smpls, tuple): - if not len(all_smpls) == 2: - raise ValueError('Invalid partition provided. ' - 'Tuple for train_ind and test_ind must consist of exactly two elements.') - all_smpls = _check_smpl_split_tpl(all_smpls, self._dml_data.n_obs) - if (_check_is_partition([all_smpls], self._dml_data.n_obs) & - _check_is_partition([(all_smpls[1], all_smpls[0])], self._dml_data.n_obs)): - self._n_rep = 1 - self._n_folds = 1 - self._smpls = [[all_smpls]] - else: - raise ValueError('Invalid partition provided. ' - 'Tuple provided that doesn\'t form a partition.') - else: - if not isinstance(all_smpls, list): - raise TypeError('all_smpls must be of list or tuple type. ' - f'{str(all_smpls)} of type {str(type(all_smpls))} was passed.') - all_tuple = all([isinstance(tpl, tuple) for tpl in all_smpls]) - if all_tuple: - if not all([len(tpl) == 2 for tpl in all_smpls]): - raise ValueError('Invalid partition provided. ' - 'All tuples for train_ind and test_ind must consist of exactly two elements.') - self._n_rep = 1 - all_smpls = _check_smpl_split(all_smpls, self._dml_data.n_obs) - if _check_is_partition(all_smpls, self._dml_data.n_obs): - if ((len(all_smpls) == 1) & - _check_is_partition([(all_smpls[0][1], all_smpls[0][0])], self._dml_data.n_obs)): - self._n_folds = 1 - self._smpls = [all_smpls] - else: - self._n_folds = len(all_smpls) - self._smpls = _check_all_smpls([all_smpls], self._dml_data.n_obs, check_intersect=True) - else: - raise ValueError('Invalid partition provided. ' - 'Tuples provided that don\'t form a partition.') - else: - all_list = all([isinstance(smpl, list) for smpl in all_smpls]) - if not all_list: - raise ValueError('Invalid partition provided. ' - 'all_smpls is a list where neither all elements are tuples ' - 'nor all elements are lists.') - all_tuple = all([all([isinstance(tpl, tuple) for tpl in smpl]) for smpl in all_smpls]) - if not all_tuple: - raise TypeError('For repeated sample splitting all_smpls must be list of lists of tuples.') - all_pairs = all([all([len(tpl) == 2 for tpl in smpl]) for smpl in all_smpls]) - if not all_pairs: - raise ValueError('Invalid partition provided. ' - 'All tuples for train_ind and test_ind must consist of exactly two elements.') - n_folds_each_smpl = np.array([len(smpl) for smpl in all_smpls]) - if not np.all(n_folds_each_smpl == n_folds_each_smpl[0]): - raise ValueError('Invalid partition provided. ' - 'Different number of folds for repeated sample splitting.') - all_smpls = _check_all_smpls(all_smpls, self._dml_data.n_obs) - smpls_are_partitions = [_check_is_partition(smpl, self._dml_data.n_obs) for smpl in all_smpls] - - if all(smpls_are_partitions): - self._n_rep = len(all_smpls) - self._n_folds = int(n_folds_each_smpl[0]) - self._smpls = _check_all_smpls(all_smpls, self._dml_data.n_obs, check_intersect=True) - else: - raise ValueError('Invalid partition provided. ' - 'At least one inner list does not form a partition.') + self._smpls, self._smpls_cluster, self._n_rep, self._n_folds = _check_sample_splitting( + all_smpls, all_smpls_cluster, self._dml_data, self._is_cluster_data) + self._modellist = self._initialize_models() return self diff --git a/doubleml/irm/tests/test_apos.py b/doubleml/irm/tests/test_apos.py index 1881b39da..55d5252e0 100644 --- a/doubleml/irm/tests/test_apos.py +++ b/doubleml/irm/tests/test_apos.py @@ -10,7 +10,6 @@ import doubleml as dml from doubleml.datasets import make_irm_data_discrete_treatments -from ...tests._utils import draw_smpls from ._utils_apos_manual import fit_apos @@ -69,26 +68,29 @@ def dml_apos_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimming_ ) dml_data = dml.DoubleMLData(df, 'y', 'd') - all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d) - np.random.seed(3141) - dml_obj = dml.DoubleMLAPOS( - dml_data, - ml_g, ml_m, - treatment_levels=treatment_levels, - n_folds=n_folds, - n_rep=n_rep, - score='APO', - normalize_ipw=normalize_ipw, - trimming_rule='truncate', - trimming_threshold=trimming_threshold, - draw_sample_splitting=False) - - # synchronize the sample splitting - dml_obj.set_sample_splitting(all_smpls) + input_args = { + "treatment_levels": treatment_levels, + "n_folds": n_folds, + "n_rep": n_rep, + "score": 'APO', + "normalize_ipw": normalize_ipw, + "trimming_rule": 'truncate', + "trimming_threshold": trimming_threshold, + } + + np.random.seed(42) + dml_obj = dml.DoubleMLAPOS(dml_data, ml_g, ml_m, **input_args) dml_obj.fit() + # get the sample splitting + all_smpls = dml_obj.smpls - np.random.seed(3141) + np.random.seed(42) + dml_obj_ext_smpls = dml.DoubleMLAPOS(dml_data, ml_g, ml_m, **input_args, draw_sample_splitting=False) + dml_obj_ext_smpls.set_sample_splitting(dml_obj.smpls) + dml_obj_ext_smpls.fit() + + np.random.seed(42) res_manual = fit_apos( y, x, d, clone(learner[0]), clone(learner[1]), @@ -100,8 +102,10 @@ def dml_apos_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimming_ trimming_threshold=trimming_threshold) res_dict = {'coef': dml_obj.coef, + 'coef_ext_smpls': dml_obj_ext_smpls.coef, 'coef_manual': res_manual['apos'], 'se': dml_obj.se, + 'se_ext_smpls': dml_obj_ext_smpls.se, 'se_manual': res_manual['se']} return res_dict @@ -111,3 +115,6 @@ def test_dml_apos_coef(dml_apos_fixture): assert np.allclose(dml_apos_fixture['coef'], dml_apos_fixture['coef_manual'], rtol=1e-9, atol=1e-9) + assert np.allclose(dml_apos_fixture['coef'], + dml_apos_fixture['coef_ext_smpls'], + rtol=1e-9, atol=1e-9) From 29f67c4297067659eee3ee58cf230af7dfbd978e Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Mon, 22 Jul 2024 13:54:27 +0200 Subject: [PATCH 38/98] create manual confint version for qte and apos --- doubleml/irm/tests/_utils_qte_manual.py | 20 -------------------- doubleml/irm/tests/test_qte.py | 12 ++++++------ doubleml/tests/_utils.py | 20 ++++++++++++++++++++ 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/doubleml/irm/tests/_utils_qte_manual.py b/doubleml/irm/tests/_utils_qte_manual.py index ccaf96ee0..5c177907c 100644 --- a/doubleml/irm/tests/_utils_qte_manual.py +++ b/doubleml/irm/tests/_utils_qte_manual.py @@ -1,7 +1,5 @@ import numpy as np from sklearn.base import clone -import pandas as pd -from scipy.stats import norm from ..pq import DoubleMLPQ from ...double_ml_data import DoubleMLData @@ -99,21 +97,3 @@ def boot_qte(scaled_scores, ses, quantiles, all_smpls, n_rep, bootstrap, n_rep_b (n_obs * ses[i_quant, i_rep]) return boot_t_stat - - -def confint_qte(coef, se, quantiles, boot_t_stat=None, joint=True, level=0.95): - a = (1 - level) - ab = np.array([a / 2, 1. - a / 2]) - if joint: - assert boot_t_stat.shape[2] == 1 - sim = np.amax(np.abs(boot_t_stat[:, :, 0]), 1) - hatc = np.quantile(sim, 1 - a) - ci = np.vstack((coef - se * hatc, coef + se * hatc)).T - else: - fac = norm.ppf(ab) - ci = np.vstack((coef + se * fac[0], coef + se * fac[1])).T - - df_ci = pd.DataFrame(ci, - columns=['{:.1f} %'.format(i * 100) for i in ab], - index=quantiles) - return df_ci diff --git a/doubleml/irm/tests/test_qte.py b/doubleml/irm/tests/test_qte.py index bdcd695da..636a59fac 100644 --- a/doubleml/irm/tests/test_qte.py +++ b/doubleml/irm/tests/test_qte.py @@ -9,8 +9,8 @@ from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier -from ...tests._utils import draw_smpls -from ._utils_qte_manual import fit_qte, boot_qte, confint_qte +from ...tests._utils import draw_smpls, confint_manual +from ._utils_qte_manual import fit_qte, boot_qte from doubleml.datasets import make_irm_data from ...utils._estimation import _default_kde @@ -94,8 +94,8 @@ def dml_qte_fixture(generate_data_quantiles, learner, normalize_ipw, kde): draw_sample_splitting=True) ci = dml_qte_obj.confint(joint=False, level=0.95) - ci_manual = confint_qte(res_manual['qte'], res_manual['se'], quantiles, - boot_t_stat=None, joint=False, level=0.95) + ci_manual = confint_manual(res_manual['qte'], res_manual['se'], quantiles, + boot_t_stat=None, joint=False, level=0.95) res_dict = {'coef': dml_qte_obj.coef, 'coef_manual': res_manual['qte'], 'coef_ext_smpls': dml_qte_obj_ext_smpls.coef, @@ -120,8 +120,8 @@ def dml_qte_fixture(generate_data_quantiles, learner, normalize_ipw, kde): res_dict['boot_t_stat_' + bootstrap + '_manual'] = boot_t_stat ci = dml_qte_obj.confint(joint=True, level=0.95) - ci_manual = confint_qte(res_manual['qte'], res_manual['se'], quantiles, - boot_t_stat=boot_t_stat, joint=True, level=0.95) + ci_manual = confint_manual(res_manual['qte'], res_manual['se'], quantiles, + boot_t_stat=boot_t_stat, joint=True, level=0.95) res_dict['boot_ci_' + bootstrap] = ci.to_numpy() res_dict['boot_ci_' + bootstrap + '_manual'] = ci_manual.to_numpy() return res_dict diff --git a/doubleml/tests/_utils.py b/doubleml/tests/_utils.py index b6c8fbc28..fb85b2410 100644 --- a/doubleml/tests/_utils.py +++ b/doubleml/tests/_utils.py @@ -1,6 +1,8 @@ import numpy as np from sklearn.model_selection import KFold, GridSearchCV, StratifiedKFold from sklearn.base import clone +import pandas as pd +from scipy.stats import norm from ..utils._estimation import _var_est, _aggregate_coefs_and_ses @@ -111,3 +113,21 @@ def generate_dml_dict(psi_a, psi_b): } return doubleml_dict + + +def confint_manual(coef, se, index_names, boot_t_stat=None, joint=True, level=0.95): + a = (1 - level) + ab = np.array([a / 2, 1. - a / 2]) + if joint: + assert boot_t_stat.shape[2] == 1 + sim = np.amax(np.abs(boot_t_stat[:, :, 0]), 1) + hatc = np.quantile(sim, 1 - a) + ci = np.vstack((coef - se * hatc, coef + se * hatc)).T + else: + fac = norm.ppf(ab) + ci = np.vstack((coef + se * fac[0], coef + se * fac[1])).T + + df_ci = pd.DataFrame(ci, + columns=['{:.1f} %'.format(i * 100) for i in ab], + index=index_names) + return df_ci From d488dd571ce7c021732496c763f164e3be937010 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Mon, 22 Jul 2024 14:09:41 +0200 Subject: [PATCH 39/98] add boostrap() to apos --- doubleml/irm/apos.py | 57 +++++++++++++++++++++ doubleml/irm/tests/_utils_apos_manual.py | 15 ++++++ doubleml/irm/tests/test_apos.py | 65 +++++++++++++++++++++--- 3 files changed, 130 insertions(+), 7 deletions(-) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index 735fda63b..c4dbd787a 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -130,6 +130,28 @@ def n_rep(self): """ return self._n_rep + @property + def n_rep_boot(self): + """ + The number of bootstrap replications. + """ + if self._framework is None: + n_rep_boot = None + else: + n_rep_boot = self._framework.n_rep_boot + return n_rep_boot + + @property + def boot_method(self): + """ + The method to construct the bootstrap replications. + """ + if self._framework is None: + method = None + else: + method = self._framework.boot_method + return method + @property def coef(self): """ @@ -194,6 +216,18 @@ def framework(self): """ return self._framework + @property + def boot_t_stat(self): + """ + Bootstrapped t-statistics for the causal parameter(s) after calling :meth:`fit` and :meth:`bootstrap` + (shape (``n_rep_boot``, ``n_quantiles``, ``n_rep``)). + """ + if self._framework is None: + boot_t_stat = None + else: + boot_t_stat = self._framework.boot_t_stat + return boot_t_stat + @property def modellist(self): """ @@ -281,6 +315,29 @@ def confint(self, joint=False, level=0.95): return df_ci + def bootstrap(self, method='normal', n_rep_boot=500): + """ + Multiplier bootstrap for DoubleML models. + + Parameters + ---------- + method : str + A str (``'Bayes'``, ``'normal'`` or ``'wild'``) specifying the multiplier bootstrap method. + Default is ``'normal'`` + + n_rep_boot : int + The number of bootstrap replications. + + Returns + ------- + self : object + """ + if self._framework is None: + raise ValueError('Apply fit() before bootstrap().') + self._framework.bootstrap(method=method, n_rep_boot=n_rep_boot) + + return self + def draw_sample_splitting(self): """ Draw sample splitting for DoubleML models. diff --git a/doubleml/irm/tests/_utils_apos_manual.py b/doubleml/irm/tests/_utils_apos_manual.py index db5f41c55..cf47d6450 100644 --- a/doubleml/irm/tests/_utils_apos_manual.py +++ b/doubleml/irm/tests/_utils_apos_manual.py @@ -4,6 +4,8 @@ from ..apo import DoubleMLAPO from ...double_ml_data import DoubleMLData +from ...tests._utils_boot import draw_weights + def fit_apos(y, x, d, learner_g, learner_m, treatment_levels, all_smpls, score, @@ -57,3 +59,16 @@ def fit_apos(y, x, d, 'all_apos': all_apos, 'all_se': all_se, 'apo_scaled_score': apo_scaled_score} return res + + +def boot_apos(scaled_scores, ses, treatment_levels, all_smpls, n_rep, bootstrap, n_rep_boot): + n_treatment_levels = len(treatment_levels) + boot_t_stat = np.zeros((n_rep_boot, n_treatment_levels, n_rep)) + for i_rep in range(n_rep): + n_obs = scaled_scores.shape[0] + weights = draw_weights(bootstrap, n_rep_boot, n_obs) + for i_treatment_levels in range(n_treatment_levels): + boot_t_stat[:, i_treatment_levels, i_rep] = np.matmul(weights, scaled_scores[:, i_treatment_levels, i_rep]) / \ + (n_obs * ses[i_treatment_levels, i_rep]) + + return boot_t_stat diff --git a/doubleml/irm/tests/test_apos.py b/doubleml/irm/tests/test_apos.py index 55d5252e0..6c39a9678 100644 --- a/doubleml/irm/tests/test_apos.py +++ b/doubleml/irm/tests/test_apos.py @@ -10,7 +10,8 @@ import doubleml as dml from doubleml.datasets import make_irm_data_discrete_treatments -from ._utils_apos_manual import fit_apos +from ._utils_apos_manual import fit_apos, boot_apos +from ...tests._utils import confint_manual @pytest.fixture(scope='module', @@ -101,12 +102,44 @@ def dml_apos_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimming_ normalize_ipw=normalize_ipw, trimming_threshold=trimming_threshold) - res_dict = {'coef': dml_obj.coef, - 'coef_ext_smpls': dml_obj_ext_smpls.coef, - 'coef_manual': res_manual['apos'], - 'se': dml_obj.se, - 'se_ext_smpls': dml_obj_ext_smpls.se, - 'se_manual': res_manual['se']} + ci = dml_obj.confint(joint=False, level=0.95) + ci_ext_smpls = dml_obj_ext_smpls.confint(joint=False, level=0.95) + ci_manual = confint_manual( + res_manual['apos'], res_manual['se'], treatment_levels, + boot_t_stat=None, joint=False, level=0.95 + ) + + res_dict = { + 'coef': dml_obj.coef, + 'coef_ext_smpls': dml_obj_ext_smpls.coef, + 'coef_manual': res_manual['apos'], + 'se': dml_obj.se, + 'se_ext_smpls': dml_obj_ext_smpls.se, + 'se_manual': res_manual['se'], + 'boot_methods': boot_methods, + 'ci': ci.to_numpy(), + 'ci_ext_smpls': ci_ext_smpls.to_numpy(), + 'ci_manual': ci_manual.to_numpy(), + 'apo_model': dml_obj + } + + for bootstrap in boot_methods: + np.random.seed(42) + boot_t_stat = boot_apos(res_manual['apo_scaled_score'], res_manual['all_se'], treatment_levels, + all_smpls, n_rep, bootstrap, n_rep_boot) + + np.random.seed(42) + dml_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) + + res_dict['boot_t_stat_' + bootstrap] = dml_obj.boot_t_stat + res_dict['boot_t_stat_' + bootstrap + '_manual'] = boot_t_stat + + ci = dml_obj.confint(joint=True, level=0.95) + ci_manual = confint_manual(res_manual['apos'], res_manual['se'], treatment_levels, + boot_t_stat=boot_t_stat, joint=True, level=0.95) + res_dict['boot_ci_' + bootstrap] = ci.to_numpy() + res_dict['boot_ci_' + bootstrap + '_manual'] = ci_manual.to_numpy() + return res_dict @@ -118,3 +151,21 @@ def test_dml_apos_coef(dml_apos_fixture): assert np.allclose(dml_apos_fixture['coef'], dml_apos_fixture['coef_ext_smpls'], rtol=1e-9, atol=1e-9) + + +@pytest.mark.ci +def test_dml_apos_se(dml_apos_fixture): + assert np.allclose(dml_apos_fixture['se'], + dml_apos_fixture['se_manual'], + rtol=1e-9, atol=1e-9) + assert np.allclose(dml_apos_fixture['se'], + dml_apos_fixture['se_ext_smpls'], + rtol=1e-9, atol=1e-9) + + +@pytest.mark.ci +def test_dml_apos_boot(dml_apos_fixture): + for bootstrap in dml_apos_fixture['boot_methods']: + assert np.allclose(dml_apos_fixture['boot_t_stat_' + bootstrap], + dml_apos_fixture['boot_t_stat_' + bootstrap + '_manual'], + rtol=1e-9, atol=1e-4) From 3a415295758a6fb780128c5c81a8344fd6d6d615 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Mon, 22 Jul 2024 14:25:27 +0200 Subject: [PATCH 40/98] add generate summary to utils --- doubleml/irm/qte.py | 13 +++++-------- doubleml/irm/tests/test_qte.py | 1 + doubleml/utils/_descriptive.py | 13 +++++++++++++ 3 files changed, 19 insertions(+), 8 deletions(-) create mode 100644 doubleml/utils/_descriptive.py diff --git a/doubleml/irm/qte.py b/doubleml/irm/qte.py index bbfc7a411..2a212d77d 100644 --- a/doubleml/irm/qte.py +++ b/doubleml/irm/qte.py @@ -15,6 +15,8 @@ from ..utils.resampling import DoubleMLResampling from ..utils._checks import _check_score, _check_trimming, _check_zero_one_treatment, _check_sample_splitting +from ..utils._descriptive import generate_summary + class DoubleMLQTE: """Double machine learning for quantile treatment effects @@ -355,18 +357,13 @@ def summary(self): """ A summary for the estimated causal effect after calling :meth:`fit`. """ - col_names = ['coef', 'std err', 't', 'P>|t|'] if self.framework is None: + col_names = ['coef', 'std err', 't', 'P>|t|'] df_summary = pd.DataFrame(columns=col_names) else: - summary_stats = np.transpose(np.vstack( - [self.coef, self.se, - self.t_stat, self.pval])) - df_summary = pd.DataFrame(summary_stats, - columns=col_names, - index=self.quantiles) ci = self.confint() - df_summary = df_summary.join(ci) + df_summary = generate_summary(self.coef, self.se, self.t_stat, + self.pval, ci, self.quantiles) return df_summary def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_models=False, external_predictions=None): diff --git a/doubleml/irm/tests/test_qte.py b/doubleml/irm/tests/test_qte.py index 636a59fac..7c7b8c1df 100644 --- a/doubleml/irm/tests/test_qte.py +++ b/doubleml/irm/tests/test_qte.py @@ -181,6 +181,7 @@ def test_doubleml_qte_exceptions(): _ = dml_obj.smpls +@pytest.mark.ci def test_doubleml_qte_return_types(dml_qte_fixture): assert isinstance(dml_qte_fixture['qte_model'].__str__(), str) assert isinstance(dml_qte_fixture['qte_model'].summary, pd.DataFrame) diff --git a/doubleml/utils/_descriptive.py b/doubleml/utils/_descriptive.py new file mode 100644 index 000000000..79924a17e --- /dev/null +++ b/doubleml/utils/_descriptive.py @@ -0,0 +1,13 @@ +import numpy as np +import pandas as pd + + +def generate_summary(coef, se, t_stat, pval, ci, index_names): + col_names = ['coef', 'std err', 't', 'P>|t|'] + summary_stats = np.transpose(np.vstack( + [coef, se, t_stat, pval])) + df_summary = pd.DataFrame(summary_stats, + columns=col_names, + index=index_names) + df_summary = df_summary.join(ci) + return df_summary From 3bde8fd68cc62b3047923b4361fff099a0e2a6c5 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Mon, 22 Jul 2024 14:32:57 +0200 Subject: [PATCH 41/98] add summary and properties to apos.py --- doubleml/irm/apos.py | 38 +++++++++++++++++++++++++++++++++ doubleml/irm/tests/test_apos.py | 32 ++++++++++++++++++++++++++- 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index c4dbd787a..bf86644b6 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -10,6 +10,7 @@ from ..double_ml_framework import concat from ..utils.resampling import DoubleMLResampling +from ..utils._descriptive import generate_summary from ..utils._checks import _check_score, _check_trimming, _check_weights, _check_sample_splitting @@ -74,6 +75,14 @@ def __init__(self, # initialize all models if splits are known self._modellist = self._initialize_models() + def __str__(self): + class_name = self.__class__.__name__ + header = f'================== {class_name} Object ==================\n' + fit_summary = str(self.summary) + res = header + \ + '\n------------------ Fit summary ------------------\n' + fit_summary + return res + @property def score(self): """ @@ -198,6 +207,21 @@ def all_se(self): all_se = self.framework.all_ses return all_se + @property + def t_stat(self): + """ + t-statistics for the causal parameter(s) after calling :meth:`fit` (shape (``n_quantiles``,)). + """ + t_stat = self.coef / self.se + return t_stat + + @property + def pval(self): + """ + p-values for the causal parameter(s) (shape (``n_quantiles``,)). + """ + return self.framework.pvals + @property def smpls(self): """ @@ -235,6 +259,20 @@ def modellist(self): """ return self._modellist + @property + def summary(self): + """ + A summary for the estimated causal effect after calling :meth:`fit`. + """ + if self.framework is None: + col_names = ['coef', 'std err', 't', 'P>|t|'] + df_summary = pd.DataFrame(columns=col_names) + else: + ci = self.confint() + df_summary = generate_summary(self.coef, self.se, self.t_stat, + self.pval, ci, self._treatment_levels) + return df_summary + def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_models=False, external_predictions=None): """ Estimate DoubleMLAPOS models. diff --git a/doubleml/irm/tests/test_apos.py b/doubleml/irm/tests/test_apos.py index 6c39a9678..23ba99c6c 100644 --- a/doubleml/irm/tests/test_apos.py +++ b/doubleml/irm/tests/test_apos.py @@ -80,6 +80,7 @@ def dml_apos_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimming_ "trimming_threshold": trimming_threshold, } + unfitted_apos_model = dml.DoubleMLAPOS(dml_data, ml_g, ml_m, **input_args) np.random.seed(42) dml_obj = dml.DoubleMLAPOS(dml_data, ml_g, ml_m, **input_args) dml_obj.fit() @@ -117,10 +118,13 @@ def dml_apos_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimming_ 'se_ext_smpls': dml_obj_ext_smpls.se, 'se_manual': res_manual['se'], 'boot_methods': boot_methods, + 'n_treatment_levels': len(treatment_levels), + 'n_rep': n_rep, 'ci': ci.to_numpy(), 'ci_ext_smpls': ci_ext_smpls.to_numpy(), 'ci_manual': ci_manual.to_numpy(), - 'apo_model': dml_obj + 'apos_model': dml_obj, + 'unfitted_apos_model': unfitted_apos_model } for bootstrap in boot_methods: @@ -169,3 +173,29 @@ def test_dml_apos_boot(dml_apos_fixture): assert np.allclose(dml_apos_fixture['boot_t_stat_' + bootstrap], dml_apos_fixture['boot_t_stat_' + bootstrap + '_manual'], rtol=1e-9, atol=1e-4) + + +@pytest.mark.ci +def test_dml_apos_ci(dml_apos_fixture): + for bootstrap in dml_apos_fixture['boot_methods']: + assert np.allclose(dml_apos_fixture['ci'], + dml_apos_fixture['ci_manual'], + rtol=1e-9, atol=1e-4) + assert np.allclose(dml_apos_fixture['ci'], + dml_apos_fixture['ci_ext_smpls'], + rtol=1e-9, atol=1e-4) + assert np.allclose(dml_apos_fixture['boot_ci_' + bootstrap], + dml_apos_fixture['boot_ci_' + bootstrap + '_manual'], + rtol=1e-9, atol=1e-4) + + +@pytest.mark.ci +def test_doubleml_apos_return_types(dml_apos_fixture): + assert isinstance(dml_apos_fixture['apos_model'].__str__(), str) + assert isinstance(dml_apos_fixture['apos_model'].summary, pd.DataFrame) + + assert dml_apos_fixture['apos_model'].all_coef.shape == ( + dml_apos_fixture['n_treatment_levels'], + dml_apos_fixture['n_rep'] + ) + assert isinstance(dml_apos_fixture['unfitted_apos_model'].summary, pd.DataFrame) From cb6ee7f96869499bbb990e6e255dfb4a6963421a Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Mon, 22 Jul 2024 14:56:21 +0200 Subject: [PATCH 42/98] Update test_apos.py --- doubleml/irm/tests/test_apos.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/doubleml/irm/tests/test_apos.py b/doubleml/irm/tests/test_apos.py index 23ba99c6c..ff2a378f5 100644 --- a/doubleml/irm/tests/test_apos.py +++ b/doubleml/irm/tests/test_apos.py @@ -48,15 +48,11 @@ def treatment_levels(request): @pytest.fixture(scope='module') -def dml_apos_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimming_threshold, treatment_levels): +def dml_apos_fixture(learner, n_rep, normalize_ipw, trimming_threshold, treatment_levels): boot_methods = ['normal'] n_folds = 2 n_rep_boot = 499 - # Set machine learning methods for m & g - ml_g = clone(learner[0]) - ml_m = clone(learner[1]) - np.random.seed(3141) n_obs = 500 data = make_irm_data_discrete_treatments(n_obs=n_obs) @@ -71,24 +67,27 @@ def dml_apos_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimming_ dml_data = dml.DoubleMLData(df, 'y', 'd') input_args = { + 'obj_dml_data': dml_data, + 'ml_g': clone(learner[0]), + 'ml_m': clone(learner[1]), "treatment_levels": treatment_levels, "n_folds": n_folds, "n_rep": n_rep, "score": 'APO', "normalize_ipw": normalize_ipw, "trimming_rule": 'truncate', - "trimming_threshold": trimming_threshold, - } + "trimming_threshold": trimming_threshold, + } - unfitted_apos_model = dml.DoubleMLAPOS(dml_data, ml_g, ml_m, **input_args) + unfitted_apos_model = dml.DoubleMLAPOS(**input_args) np.random.seed(42) - dml_obj = dml.DoubleMLAPOS(dml_data, ml_g, ml_m, **input_args) + dml_obj = dml.DoubleMLAPOS(**input_args) dml_obj.fit() # get the sample splitting all_smpls = dml_obj.smpls np.random.seed(42) - dml_obj_ext_smpls = dml.DoubleMLAPOS(dml_data, ml_g, ml_m, **input_args, draw_sample_splitting=False) + dml_obj_ext_smpls = dml.DoubleMLAPOS(**input_args, draw_sample_splitting=False) dml_obj_ext_smpls.set_sample_splitting(dml_obj.smpls) dml_obj_ext_smpls.fit() From 72ad8595a8c3f695832e48ed9e325a8eae7658d4 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Mon, 22 Jul 2024 14:56:38 +0200 Subject: [PATCH 43/98] Update test_apos.py --- doubleml/irm/tests/test_apos.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doubleml/irm/tests/test_apos.py b/doubleml/irm/tests/test_apos.py index ff2a378f5..0d1dc9da1 100644 --- a/doubleml/irm/tests/test_apos.py +++ b/doubleml/irm/tests/test_apos.py @@ -76,7 +76,7 @@ def dml_apos_fixture(learner, n_rep, normalize_ipw, trimming_threshold, treatmen "score": 'APO', "normalize_ipw": normalize_ipw, "trimming_rule": 'truncate', - "trimming_threshold": trimming_threshold, + "trimming_threshold": trimming_threshold, } unfitted_apos_model = dml.DoubleMLAPOS(**input_args) From c82ca7ea726f758897bdce8829ac42d63fb886a2 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Mon, 22 Jul 2024 15:00:40 +0200 Subject: [PATCH 44/98] Create test_apos_weighted_scores.py --- .../irm/tests/test_apos_weighted_scores.py | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 doubleml/irm/tests/test_apos_weighted_scores.py diff --git a/doubleml/irm/tests/test_apos_weighted_scores.py b/doubleml/irm/tests/test_apos_weighted_scores.py new file mode 100644 index 000000000..84e6ac1c3 --- /dev/null +++ b/doubleml/irm/tests/test_apos_weighted_scores.py @@ -0,0 +1,97 @@ +import pytest +import numpy as np +import pandas as pd + +from sklearn.base import clone +from sklearn.linear_model import LogisticRegression, LinearRegression +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor + +import doubleml as dml +from doubleml.datasets import make_irm_data_discrete_treatments + + +@pytest.fixture(scope='module', + params=[[LinearRegression(), + LogisticRegression(solver='lbfgs', max_iter=250)], + [RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42), + RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42)]]) +def learner(request): + return request.param + + +@pytest.fixture(scope='module', + params=['APO']) +def score(request): + return request.param + + +@pytest.fixture(scope='module', + params=[False, True]) +def normalize_ipw(request): + return request.param + + +@pytest.fixture(scope='module', + params=[0.2, 0.15]) +def trimming_threshold(request): + return request.param + + +@pytest.fixture(scope='module', + params=[[0, 1, 2], [0]]) +def treatment_levels(request): + return request.param + + +@pytest.fixture(scope='module') +def weighted_apos_score_fixture(learner, score, normalize_ipw, trimming_threshold, + treatment_levels): + n_obs = 500 + n_folds = 2 + + # collect data + data = make_irm_data_discrete_treatments(n_obs=n_obs) + y = data['y'] + x = data['x'] + d = data['d'] + df = pd.DataFrame( + np.column_stack((y, d, x)), + columns=['y', 'd'] + ['x' + str(i) for i in range(data['x'].shape[1])] + ) + + obj_dml_data = dml.DoubleMLData(df, 'y', 'd') + + input_args = { + 'obj_dml_data': obj_dml_data, + 'ml_g': clone(learner[0]), + 'ml_m': clone(learner[1]), + 'treatment_levels': treatment_levels, + 'n_folds': n_folds, + 'score': score, + 'normalize_ipw': normalize_ipw, + 'trimming_threshold': trimming_threshold, + 'trimming_rule': 'truncate' + } + + np.random.seed(3141) + dml_obj = dml.DoubleMLAPOS(**input_args) + dml_obj.fit() + + weights = 0.5 * np.ones_like(obj_dml_data.y) + dml_obj_weighted = dml.DoubleMLAPOS(draw_sample_splitting=False, + weights=weights, + **input_args) + dml_obj_weighted.set_sample_splitting(all_smpls=dml_obj.smpls) + dml_obj_weighted.fit() + + result_dict = { + 'coef': dml_obj.coef, + 'weighted_coef': dml_obj_weighted.coef, + } + return result_dict + + +@pytest.mark.ci +def test_apos_weighted_coef(weighted_apos_score_fixture): + assert np.allclose(0.5 * weighted_apos_score_fixture['coef'], + weighted_apos_score_fixture['weighted_coef']) From 5d07fe67ac0b3e9642113bb1e2966660661ae53a Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Mon, 22 Jul 2024 15:04:46 +0200 Subject: [PATCH 45/98] Create test_apos_classfier.py --- doubleml/irm/tests/test_apos_classfier.py | 200 ++++++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 doubleml/irm/tests/test_apos_classfier.py diff --git a/doubleml/irm/tests/test_apos_classfier.py b/doubleml/irm/tests/test_apos_classfier.py new file mode 100644 index 000000000..9c3e7d351 --- /dev/null +++ b/doubleml/irm/tests/test_apos_classfier.py @@ -0,0 +1,200 @@ +import numpy as np +import pandas as pd +import pytest + +from sklearn.base import clone + +from sklearn.linear_model import LogisticRegression +from sklearn.ensemble import RandomForestClassifier + +import doubleml as dml +from doubleml.datasets import make_irm_data_discrete_treatments + +from ._utils_apos_manual import fit_apos, boot_apos +from ...tests._utils import confint_manual + + +@pytest.fixture(scope='module', + params=[[LogisticRegression(solver='lbfgs', max_iter=250), + LogisticRegression(solver='lbfgs', max_iter=250)], + [RandomForestClassifier(max_depth=2, n_estimators=10, random_state=42), + RandomForestClassifier(max_depth=2, n_estimators=10, random_state=42)]]) +def learner(request): + return request.param + + +@pytest.fixture(scope='module', + params=[1]) +def n_rep(request): + return request.param + + +@pytest.fixture(scope='module', + params=[False, True]) +def normalize_ipw(request): + return request.param + + +@pytest.fixture(scope='module', + params=[0.2, 0.15]) +def trimming_threshold(request): + return request.param + + +@pytest.fixture(scope='module', + params=[[0, 1, 2], [0]]) +def treatment_levels(request): + return request.param + + +@pytest.fixture(scope='module') +def dml_apos_classifier_fixture(learner, n_rep, normalize_ipw, trimming_threshold, treatment_levels): + boot_methods = ['normal'] + n_folds = 2 + n_rep_boot = 499 + + np.random.seed(3141) + n_obs = 500 + data = make_irm_data_discrete_treatments(n_obs=n_obs) + y = np.random.binomial(1, 0.5, n_obs) + x = data['x'] + d = data['d'] + df = pd.DataFrame( + np.column_stack((y, d, x)), + columns=['y', 'd'] + ['x' + str(i) for i in range(data['x'].shape[1])] + ) + + dml_data = dml.DoubleMLData(df, 'y', 'd') + + input_args = { + 'obj_dml_data': dml_data, + 'ml_g': clone(learner[0]), + 'ml_m': clone(learner[1]), + "treatment_levels": treatment_levels, + "n_folds": n_folds, + "n_rep": n_rep, + "score": 'APO', + "normalize_ipw": normalize_ipw, + "trimming_rule": 'truncate', + "trimming_threshold": trimming_threshold, + } + + unfitted_apos_model = dml.DoubleMLAPOS(**input_args) + np.random.seed(42) + dml_obj = dml.DoubleMLAPOS(**input_args) + dml_obj.fit() + # get the sample splitting + all_smpls = dml_obj.smpls + + np.random.seed(42) + dml_obj_ext_smpls = dml.DoubleMLAPOS(**input_args, draw_sample_splitting=False) + dml_obj_ext_smpls.set_sample_splitting(dml_obj.smpls) + dml_obj_ext_smpls.fit() + + np.random.seed(42) + res_manual = fit_apos( + y, x, d, + clone(learner[0]), clone(learner[1]), + treatment_levels=treatment_levels, + all_smpls=all_smpls, + score='APO', + trimming_rule='truncate', + normalize_ipw=normalize_ipw, + trimming_threshold=trimming_threshold) + + ci = dml_obj.confint(joint=False, level=0.95) + ci_ext_smpls = dml_obj_ext_smpls.confint(joint=False, level=0.95) + ci_manual = confint_manual( + res_manual['apos'], res_manual['se'], treatment_levels, + boot_t_stat=None, joint=False, level=0.95 + ) + + res_dict = { + 'coef': dml_obj.coef, + 'coef_ext_smpls': dml_obj_ext_smpls.coef, + 'coef_manual': res_manual['apos'], + 'se': dml_obj.se, + 'se_ext_smpls': dml_obj_ext_smpls.se, + 'se_manual': res_manual['se'], + 'boot_methods': boot_methods, + 'n_treatment_levels': len(treatment_levels), + 'n_rep': n_rep, + 'ci': ci.to_numpy(), + 'ci_ext_smpls': ci_ext_smpls.to_numpy(), + 'ci_manual': ci_manual.to_numpy(), + 'apos_model': dml_obj, + 'unfitted_apos_model': unfitted_apos_model + } + + for bootstrap in boot_methods: + np.random.seed(42) + boot_t_stat = boot_apos(res_manual['apo_scaled_score'], res_manual['all_se'], treatment_levels, + all_smpls, n_rep, bootstrap, n_rep_boot) + + np.random.seed(42) + dml_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) + + res_dict['boot_t_stat_' + bootstrap] = dml_obj.boot_t_stat + res_dict['boot_t_stat_' + bootstrap + '_manual'] = boot_t_stat + + ci = dml_obj.confint(joint=True, level=0.95) + ci_manual = confint_manual(res_manual['apos'], res_manual['se'], treatment_levels, + boot_t_stat=boot_t_stat, joint=True, level=0.95) + res_dict['boot_ci_' + bootstrap] = ci.to_numpy() + res_dict['boot_ci_' + bootstrap + '_manual'] = ci_manual.to_numpy() + + return res_dict + + +@pytest.mark.ci +def test_dml_apos_coef(dml_apos_classifier_fixture): + assert np.allclose(dml_apos_classifier_fixture['coef'], + dml_apos_classifier_fixture['coef_manual'], + rtol=1e-9, atol=1e-9) + assert np.allclose(dml_apos_classifier_fixture['coef'], + dml_apos_classifier_fixture['coef_ext_smpls'], + rtol=1e-9, atol=1e-9) + + +@pytest.mark.ci +def test_dml_apos_se(dml_apos_classifier_fixture): + assert np.allclose(dml_apos_classifier_fixture['se'], + dml_apos_classifier_fixture['se_manual'], + rtol=1e-9, atol=1e-9) + assert np.allclose(dml_apos_classifier_fixture['se'], + dml_apos_classifier_fixture['se_ext_smpls'], + rtol=1e-9, atol=1e-9) + + +@pytest.mark.ci +def test_dml_apos_boot(dml_apos_classifier_fixture): + for bootstrap in dml_apos_classifier_fixture['boot_methods']: + assert np.allclose(dml_apos_classifier_fixture['boot_t_stat_' + bootstrap], + dml_apos_classifier_fixture['boot_t_stat_' + bootstrap + '_manual'], + rtol=1e-9, atol=1e-4) + + +@pytest.mark.ci +def test_dml_apos_ci(dml_apos_classifier_fixture): + for bootstrap in dml_apos_classifier_fixture['boot_methods']: + assert np.allclose(dml_apos_classifier_fixture['ci'], + dml_apos_classifier_fixture['ci_manual'], + rtol=1e-9, atol=1e-4) + assert np.allclose(dml_apos_classifier_fixture['ci'], + dml_apos_classifier_fixture['ci_ext_smpls'], + rtol=1e-9, atol=1e-4) + assert np.allclose(dml_apos_classifier_fixture['boot_ci_' + bootstrap], + dml_apos_classifier_fixture['boot_ci_' + bootstrap + '_manual'], + rtol=1e-9, atol=1e-4) + + +@pytest.mark.ci +def test_doubleml_apos_return_types(dml_apos_classifier_fixture): + assert isinstance(dml_apos_classifier_fixture['apos_model'].__str__(), str) + assert isinstance(dml_apos_classifier_fixture['apos_model'].summary, pd.DataFrame) + + assert dml_apos_classifier_fixture['apos_model'].all_coef.shape == ( + dml_apos_classifier_fixture['n_treatment_levels'], + dml_apos_classifier_fixture['n_rep'] + ) + assert isinstance(dml_apos_classifier_fixture['unfitted_apos_model'].summary, pd.DataFrame) From 402a30b45644d6eb101b7867b8aab5e067ecbf8a Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Mon, 22 Jul 2024 16:03:40 +0200 Subject: [PATCH 46/98] add treatment_levels property --- doubleml/irm/apos.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index bf86644b6..2d000db22 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -97,6 +97,13 @@ def n_treatment_levels(self): """ return self._n_treatment_levels + @property + def treatment_levels(self): + """ + The evaluated treatment levels. + """ + return self._treatment_levels + @property def normalize_ipw(self): """ @@ -308,7 +315,7 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_ if external_predictions is not None: raise NotImplementedError(f"External predictions not implemented for {self.__class__.__name__}.") - # parallel estimation of the quantiles + # parallel estimation of the models parallel = Parallel(n_jobs=n_jobs_models, verbose=0, pre_dispatch='2*n_jobs') fitted_models = parallel(delayed(self._fit_model)(i_level, n_jobs_cv, store_predictions, store_models) for i_level in range(self.n_treatment_levels)) From 25e710bd22ccb86e76eadd8c0f8ffdf29d5363bd Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Mon, 22 Jul 2024 21:17:47 +0200 Subject: [PATCH 47/98] add simple average treatment effects --- doubleml/irm/apos.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index 2d000db22..d889d967a 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -459,6 +459,33 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None): return self + def average_treatment_effect(self, baseline_level=None): + """ + Average treatment effects for DoubleMLAPOS models. + + Parameters + ---------- + baseline_level : None or int + The baseline level for the average treatment effect. + Default is ``None``. + + Returns + ------- + ate : pd.Series + A data frame with the average treatment effect(s). + """ + + if self.framework is None: + raise ValueError('Apply fit() before average_treatment_effect().') + + i_baseline_level = self.treatment_levels.tolist().index(baseline_level) + baseline_framework = self.modellist[i_baseline_level].framework + + ate_frameworks = [model.framework - baseline_framework for i, model in + enumerate(self.modellist) if i != i_baseline_level] + ate = concat(ate_frameworks) + return ate + def _fit_model(self, i_level, n_jobs_cv=None, store_predictions=True, store_models=False): model = self.modellist[i_level] From 660fdce19bd40aaba8619d007fb310ac9d97f7fe Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Tue, 23 Jul 2024 08:48:55 +0200 Subject: [PATCH 48/98] add optional treatment_names to framework --- doubleml/double_ml_framework.py | 41 +++++++++++++++++++-- doubleml/tests/test_framework_exceptions.py | 18 +++++++++ doubleml/utils/_descriptive.py | 8 ++-- 3 files changed, 59 insertions(+), 8 deletions(-) diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py index c528fef84..8b603f5d0 100644 --- a/doubleml/double_ml_framework.py +++ b/doubleml/double_ml_framework.py @@ -9,6 +9,7 @@ from .utils._estimation import _draw_weights, _aggregate_coefs_and_ses, _var_est from .utils._checks import _check_bootstrap, _check_framework_compatibility, _check_in_zero_one, \ _check_float, _check_integer, _check_bool, _check_benchmarks +from .utils._descriptive import generate_summary from .utils._plots import _sensitivity_contour_plot @@ -17,10 +18,10 @@ class DoubleMLFramework(): Parameters ---------- - doubleml_dict : :dict + doubleml_dict : :dict A dictionary providing the estimated parameters and normalized scores. Keys have to be 'thetas', 'ses', - 'all_thetas', 'all_ses', 'var_scaling_factors' and 'scaled_psi'. - Values have to be numpy arrays with the corresponding shapes. + 'all_thetas', 'all_ses', 'var_scaling_factors' and 'scaled_psi'. + Values have to be numpy arrays with the corresponding shapes. """ @@ -57,6 +58,12 @@ def __init__( # check if all sizes match self._check_framework_shapes() + + self._treatment_names = None + if 'treatment_names' in doubleml_dict.keys(): + self._check_treatment_names(doubleml_dict['treatment_names']) + self._treatment_names = doubleml_dict['treatment_names'] + # initialize bootstrap distribution self._boot_t_stat = None self._boot_method = None @@ -196,6 +203,16 @@ def sensitivity_params(self): """ return self._sensitivity_params + @property + def summary(self): + """ + A summary for the estimated causal effect ``theta``. + """ + ci = self.confint() + df_summary = generate_summary(self.thetas, self.ses, self.t_stats, + self.pvals, ci, self._treatment_names) + return df_summary + def __add__(self, other): if isinstance(other, DoubleMLFramework): @@ -612,8 +629,11 @@ def confint(self, joint=False, level=0.95): self.all_thetas + self.all_ses * critical_values), axis=1) ci = np.median(self._all_cis, axis=2) - # TODO: add treatment names df_ci = pd.DataFrame(ci, columns=['{:.1f} %'.format(i * 100) for i in percentages]) + + if self._treatment_names is not None: + df_ci.set_index(pd.Index(self._treatment_names), inplace=True) + return df_ci def bootstrap(self, method='normal', n_rep_boot=500): @@ -944,6 +964,19 @@ def _check_framework_shapes(self): return None + def _check_treatment_names(self, treatment_names): + if not isinstance(treatment_names, list): + raise TypeError('treatment_names must be a list. ' + f'Got {str(treatment_names)} of type {str(type(treatment_names))}.') + is_str = [isinstance(name, str) for name in treatment_names] + if not all(is_str): + raise TypeError('treatment_names must be a list of strings. ' + f'At least one element is not a string: {str(treatment_names)}.') + if len(treatment_names) != self._n_thetas: + raise ValueError('The length of treatment_names does not match the number of treatments. ' + f'Got {self._n_thetas} treatments and {len(treatment_names)} treatment names.') + return None + def concat(objs): """ diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py index 7dc8849b2..45cf14cd5 100644 --- a/doubleml/tests/test_framework_exceptions.py +++ b/doubleml/tests/test_framework_exceptions.py @@ -142,6 +142,24 @@ def test_input_exceptions(): test_dict['cluster_dict'] = {'cluster_ids': np.ones(shape=(n_obs, n_rep))} DoubleMLFramework(test_dict) + msg = "treatment_names must be a list. Got 1 of type ." + with pytest.raises(TypeError, match=msg): + test_dict = copy.deepcopy(doubleml_dict) + test_dict['treatment_names'] = 1 + DoubleMLFramework(test_dict) + + msg = r"treatment_names must be a list of strings. At least one element is not a string: \['test', 1\]." + with pytest.raises(TypeError, match=msg): + test_dict = copy.deepcopy(doubleml_dict) + test_dict['treatment_names'] = ['test', 1] + DoubleMLFramework(test_dict) + + msg = "The length of treatment_names does not match the number of treatments. Got 2 treatments and 3 treatment names." + with pytest.raises(ValueError, match=msg): + test_dict = copy.deepcopy(doubleml_dict) + test_dict['treatment_names'] = ['test', 'test2', 'test3'] + DoubleMLFramework(test_dict) + def test_operation_exceptions(): # addition diff --git a/doubleml/utils/_descriptive.py b/doubleml/utils/_descriptive.py index 79924a17e..54144bc8c 100644 --- a/doubleml/utils/_descriptive.py +++ b/doubleml/utils/_descriptive.py @@ -2,12 +2,12 @@ import pandas as pd -def generate_summary(coef, se, t_stat, pval, ci, index_names): +def generate_summary(coef, se, t_stat, pval, ci, index_names=None): col_names = ['coef', 'std err', 't', 'P>|t|'] summary_stats = np.transpose(np.vstack( [coef, se, t_stat, pval])) - df_summary = pd.DataFrame(summary_stats, - columns=col_names, - index=index_names) + df_summary = pd.DataFrame(summary_stats, columns=col_names) + if index_names is not None: + df_summary.index = index_names df_summary = df_summary.join(ci) return df_summary From 02388d6fd1988096ae3904b7ef88f8bd8bbc29ce Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Tue, 23 Jul 2024 08:52:44 +0200 Subject: [PATCH 49/98] fix dimensions in docstrings --- doubleml/irm/apos.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index d889d967a..1fba6d7a1 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -217,15 +217,14 @@ def all_se(self): @property def t_stat(self): """ - t-statistics for the causal parameter(s) after calling :meth:`fit` (shape (``n_quantiles``,)). + t-statistics for the causal parameter(s) after calling :meth:`fit` (shape (``n_treatment_levels``,)). """ - t_stat = self.coef / self.se - return t_stat + return self.framework.t_stats @property def pval(self): """ - p-values for the causal parameter(s) (shape (``n_quantiles``,)). + p-values for the causal parameter(s) (shape (``n_treatment_levels``,)). """ return self.framework.pvals @@ -251,7 +250,7 @@ def framework(self): def boot_t_stat(self): """ Bootstrapped t-statistics for the causal parameter(s) after calling :meth:`fit` and :meth:`bootstrap` - (shape (``n_rep_boot``, ``n_quantiles``, ``n_rep``)). + (shape (``n_rep_boot``, ``n_treatment_levels``, ``n_rep``)). """ if self._framework is None: boot_t_stat = None From a17644f58b426b04bcca0858f427a9ef0227fce9 Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Tue, 23 Jul 2024 09:00:16 +0200 Subject: [PATCH 50/98] add setter for treatment_names in framework --- doubleml/double_ml_framework.py | 14 +++++++++++++- doubleml/tests/test_framework_exceptions.py | 9 +++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py index 8b603f5d0..633d6464e 100644 --- a/doubleml/double_ml_framework.py +++ b/doubleml/double_ml_framework.py @@ -203,10 +203,22 @@ def sensitivity_params(self): """ return self._sensitivity_params + @property + def treatment_names(self): + """ + Names of the treatments. + """ + return self._treatment_names + + @treatment_names.setter + def treatment_names(self, value): + self._check_treatment_names(value) + self._treatment_names = value + @property def summary(self): """ - A summary for the estimated causal effect ``theta``. + A summary for the estimated causal parameters ``thetas``. """ ci = self.confint() df_summary = generate_summary(self.thetas, self.ses, self.t_stats, diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py index 45cf14cd5..b80cfac27 100644 --- a/doubleml/tests/test_framework_exceptions.py +++ b/doubleml/tests/test_framework_exceptions.py @@ -142,23 +142,32 @@ def test_input_exceptions(): test_dict['cluster_dict'] = {'cluster_ids': np.ones(shape=(n_obs, n_rep))} DoubleMLFramework(test_dict) + test_dict = copy.deepcopy(doubleml_dict) + framework_names = DoubleMLFramework(test_dict) + msg = "treatment_names must be a list. Got 1 of type ." with pytest.raises(TypeError, match=msg): test_dict = copy.deepcopy(doubleml_dict) test_dict['treatment_names'] = 1 DoubleMLFramework(test_dict) + with pytest.raises(TypeError, match=msg): + framework_names.treatment_names = 1 msg = r"treatment_names must be a list of strings. At least one element is not a string: \['test', 1\]." with pytest.raises(TypeError, match=msg): test_dict = copy.deepcopy(doubleml_dict) test_dict['treatment_names'] = ['test', 1] DoubleMLFramework(test_dict) + with pytest.raises(TypeError, match=msg): + framework_names.treatment_names = ['test', 1] msg = "The length of treatment_names does not match the number of treatments. Got 2 treatments and 3 treatment names." with pytest.raises(ValueError, match=msg): test_dict = copy.deepcopy(doubleml_dict) test_dict['treatment_names'] = ['test', 'test2', 'test3'] DoubleMLFramework(test_dict) + with pytest.raises(ValueError, match=msg): + framework_names.treatment_names = ['test', 'test2', 'test3'] def test_operation_exceptions(): From 2cea2cedb672cd38c0eb5ddb65bf48eb4f914db5 Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Tue, 23 Jul 2024 09:24:29 +0200 Subject: [PATCH 51/98] rename to causal_contrast --- doubleml/irm/apos.py | 32 ++++++++++++---------- doubleml/irm/tests/test_apos_exceptions.py | 8 ++++++ 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index 1fba6d7a1..8c6ad8e04 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -458,32 +458,36 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None): return self - def average_treatment_effect(self, baseline_level=None): + def causal_contrast(self, reference_level): """ - Average treatment effects for DoubleMLAPOS models. + Average causal contrasts for DoubleMLAPOS models. Estimates the difference in + average potential outcomes between the treatment levels and the reference level. + The reference has to be one of the treatment levels. Parameters ---------- - baseline_level : None or int - The baseline level for the average treatment effect. - Default is ``None``. + reference_level : + The reference level for the difference in average potential outcomes. + Has to be an element of ``treatment_levels``. Returns ------- - ate : pd.Series - A data frame with the average treatment effect(s). + acc : DoubleMLFramework + A DoubleMLFramwork class for average causal contrast(s). """ if self.framework is None: - raise ValueError('Apply fit() before average_treatment_effect().') + raise ValueError('Apply fit() before causal_contrast().') - i_baseline_level = self.treatment_levels.tolist().index(baseline_level) - baseline_framework = self.modellist[i_baseline_level].framework + i_reference_level = self.treatment_levels.tolist().index(reference_level) + reference_framework = self.modellist[i_reference_level].framework - ate_frameworks = [model.framework - baseline_framework for i, model in - enumerate(self.modellist) if i != i_baseline_level] - ate = concat(ate_frameworks) - return ate + acc_frameworks = [model.framework - reference_framework for i, model in + enumerate(self.modellist) if i != i_reference_level] + acc = concat(acc_frameworks) + acc.treatment_names = [f"{self.treatment_levels[i]} vs {reference_level}" for i in + range(self.n_treatment_levels) if i != i_reference_level] + return acc def _fit_model(self, i_level, n_jobs_cv=None, store_predictions=True, store_models=False): diff --git a/doubleml/irm/tests/test_apos_exceptions.py b/doubleml/irm/tests/test_apos_exceptions.py index 9081a4e4a..ab123d815 100644 --- a/doubleml/irm/tests/test_apos_exceptions.py +++ b/doubleml/irm/tests/test_apos_exceptions.py @@ -70,3 +70,11 @@ def test_apos_exception_ipw_normalization(): msg = "Normalization indicator has to be boolean. Object of type passed." with pytest.raises(TypeError, match=msg): _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, normalize_ipw=1) + + +@pytest.mark.ci +def test_causal_contrast_exceptions(): + msg = r"Apply fit() before causal_contrast()." + with pytest.raises(ValueError, match=msg): + dml_obj = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0) + dml_obj.causal_contrast() From 739641e57a21d8d43cf945c8ace80c4bbb74a27d Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Tue, 23 Jul 2024 10:20:26 +0200 Subject: [PATCH 52/98] update treatment_levels to allow for iterable objects --- doubleml/irm/apos.py | 44 +++++++++++++++------- doubleml/irm/tests/test_apos_exceptions.py | 20 ++++++++-- 2 files changed, 47 insertions(+), 17 deletions(-) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index 8c6ad8e04..8efb437c7 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -1,5 +1,6 @@ import numpy as np import pandas as pd +from collections.abc import Iterable from sklearn.base import clone @@ -35,8 +36,7 @@ def __init__(self, self._is_cluster_data = isinstance(obj_dml_data, DoubleMLClusterData) self._check_data(self._dml_data) - self._treatment_levels = np.asarray(treatment_levels).reshape((-1, )) - self._check_treatment_levels() + self._treatment_levels = self._check_treatment_levels(treatment_levels) self._n_treatment_levels = len(self._treatment_levels) self._normalize_ipw = normalize_ipw @@ -458,16 +458,17 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None): return self - def causal_contrast(self, reference_level): + def causal_contrast(self, reference_levels): """ Average causal contrasts for DoubleMLAPOS models. Estimates the difference in - average potential outcomes between the treatment levels and the reference level. - The reference has to be one of the treatment levels. + average potential outcomes between the treatment levels and the reference levels. + The reference levels have to be a subset of the treatment levels or a single + treatment level. Parameters ---------- - reference_level : - The reference level for the difference in average potential outcomes. + reference_levels : + The reference levels for the difference in average potential outcomes. Has to be an element of ``treatment_levels``. Returns @@ -478,14 +479,23 @@ def causal_contrast(self, reference_level): if self.framework is None: raise ValueError('Apply fit() before causal_contrast().') - - i_reference_level = self.treatment_levels.tolist().index(reference_level) + is_iterable = isinstance(reference_levels, Iterable) + if not is_iterable: + reference_levels = [reference_levels] + is_treatment_level_subset = set(reference_levels).issubset(set(self.treatment_levels)) + if not is_treatment_level_subset: + raise ValueError('Invalid reference_levels. reference_levels has to be an iterable subset of treatment_levels or ' + 'a single treatment level.') + + for ref_lvl in reference_levels: + i_ref_lvl = self.treatment_levels.to + i_ref_lvls = self.treatment_levels.tolist().index(reference_levels) reference_framework = self.modellist[i_reference_level].framework acc_frameworks = [model.framework - reference_framework for i, model in enumerate(self.modellist) if i != i_reference_level] acc = concat(acc_frameworks) - acc.treatment_names = [f"{self.treatment_levels[i]} vs {reference_level}" for i in + acc.treatment_names = [f"{self.treatment_levels[i]} vs {reference_levels}" for i in range(self.n_treatment_levels) if i != i_reference_level] return acc @@ -495,9 +505,17 @@ def _fit_model(self, i_level, n_jobs_cv=None, store_predictions=True, store_mode model.fit(n_jobs_cv=n_jobs_cv, store_predictions=store_predictions, store_models=store_models) return model - def _check_treatment_levels(self): - if not np.all(np.isin(self._treatment_levels, np.unique(self._dml_data.d))): - raise ValueError('The treatment levels have to be a subset of the unique treatment levels in the data.') + def _check_treatment_levels(self, treatment_levels): + is_iterable = isinstance(treatment_levels, Iterable) + if not is_iterable: + treatment_level_list = [treatment_levels] + else: + treatment_level_list = [t_lvl for t_lvl in treatment_levels] + is_d_subset = set(treatment_level_list).issubset(set(np.unique(self._dml_data.d))) + if not is_d_subset: + raise ValueError('Invalid reference_levels. reference_levels has to be an iterable subset or ' + 'a single element of the unique treatment levels in the data.') + return treatment_level_list def _check_data(self, obj_dml_data): if not isinstance(obj_dml_data, DoubleMLData): diff --git a/doubleml/irm/tests/test_apos_exceptions.py b/doubleml/irm/tests/test_apos_exceptions.py index ab123d815..a895f0151 100644 --- a/doubleml/irm/tests/test_apos_exceptions.py +++ b/doubleml/irm/tests/test_apos_exceptions.py @@ -31,7 +31,8 @@ def test_apos_exception_data(): dml_data_z = make_iivm_data() _ = DoubleMLAPOS(dml_data_z, ml_g, ml_m, treatment_levels=0) - msg = 'The treatment levels have to be a subset of the unique treatment levels in the data.' + msg = ('Invalid reference_levels. reference_levels has to be an iterable subset or ' + 'a single element of the unique treatment levels in the data.') with pytest.raises(ValueError, match=msg): _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=[1.1]) with pytest.raises(ValueError, match=msg): @@ -74,7 +75,18 @@ def test_apos_exception_ipw_normalization(): @pytest.mark.ci def test_causal_contrast_exceptions(): - msg = r"Apply fit() before causal_contrast()." + msg = r"Apply fit\(\) before causal_contrast\(\)." with pytest.raises(ValueError, match=msg): - dml_obj = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0) - dml_obj.causal_contrast() + dml_obj = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=[0, 1]) + dml_obj.causal_contrast(reference_levels=0) + + dml_obj = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=[0, 1]) + dml_obj.fit() + msg = ('Invalid reference_levels. reference_levels has to be an iterable subset of treatment_levels or ' + 'a single treatment level.') + with pytest.raises(ValueError, match=msg): + dml_obj.causal_contrast(reference_levels=2) + with pytest.raises(ValueError, match=msg): + dml_obj.causal_contrast(reference_levels=[2]) + with pytest.raises(ValueError, match=msg): + dml_obj.causal_contrast(reference_levels=[0, 2]) From 310d532195e7d4d083313eeb7ab665b38d3eab8b Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Tue, 23 Jul 2024 16:52:57 +0200 Subject: [PATCH 53/98] add causal_contrasts to apos with unit tests --- doubleml/irm/apos.py | 25 +++++++++++++-------- doubleml/irm/tests/test_apos.py | 26 ++++++++++++++++++++++ doubleml/irm/tests/test_apos_exceptions.py | 6 +++++ 3 files changed, 48 insertions(+), 9 deletions(-) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index 8efb437c7..88cbb803d 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -479,6 +479,8 @@ def causal_contrast(self, reference_levels): if self.framework is None: raise ValueError('Apply fit() before causal_contrast().') + if self.n_treatment_levels == 1: + raise ValueError('Only one treatment level. No causal contrast can be computed.') is_iterable = isinstance(reference_levels, Iterable) if not is_iterable: reference_levels = [reference_levels] @@ -487,16 +489,21 @@ def causal_contrast(self, reference_levels): raise ValueError('Invalid reference_levels. reference_levels has to be an iterable subset of treatment_levels or ' 'a single treatment level.') + skip_index = [] + all_treatment_names = [] + all_acc_frameworks = [] for ref_lvl in reference_levels: - i_ref_lvl = self.treatment_levels.to - i_ref_lvls = self.treatment_levels.tolist().index(reference_levels) - reference_framework = self.modellist[i_reference_level].framework - - acc_frameworks = [model.framework - reference_framework for i, model in - enumerate(self.modellist) if i != i_reference_level] - acc = concat(acc_frameworks) - acc.treatment_names = [f"{self.treatment_levels[i]} vs {reference_levels}" for i in - range(self.n_treatment_levels) if i != i_reference_level] + i_ref_lvl = self.treatment_levels.index(ref_lvl) + ref_framework = self.modellist[i_ref_lvl].framework + + skip_index += [i_ref_lvl] + all_acc_frameworks += [model.framework - ref_framework for i, model in + enumerate(self.modellist) if i not in skip_index] + all_treatment_names += [f"{self.treatment_levels[i]} vs {self.treatment_levels[i_ref_lvl]}" for + i in range(self.n_treatment_levels) if i not in skip_index] + + acc = concat(all_acc_frameworks) + acc.treatment_names = all_treatment_names return acc def _fit_model(self, i_level, n_jobs_cv=None, store_predictions=True, store_models=False): diff --git a/doubleml/irm/tests/test_apos.py b/doubleml/irm/tests/test_apos.py index 0d1dc9da1..4ba478a41 100644 --- a/doubleml/irm/tests/test_apos.py +++ b/doubleml/irm/tests/test_apos.py @@ -143,6 +143,13 @@ def dml_apos_fixture(learner, n_rep, normalize_ipw, trimming_threshold, treatmen res_dict['boot_ci_' + bootstrap] = ci.to_numpy() res_dict['boot_ci_' + bootstrap + '_manual'] = ci_manual.to_numpy() + # causal contrasts + if len(treatment_levels) > 1: + acc_single = dml_obj.causal_contrast(reference_levels=[treatment_levels[0]]) + res_dict['causal_contrast_single'] = acc_single + acc_multiple = dml_obj.causal_contrast(reference_levels=treatment_levels) + res_dict['causal_contrast_multiple'] = acc_multiple + return res_dict @@ -198,3 +205,22 @@ def test_doubleml_apos_return_types(dml_apos_fixture): dml_apos_fixture['n_rep'] ) assert isinstance(dml_apos_fixture['unfitted_apos_model'].summary, pd.DataFrame) + if dml_apos_fixture['n_treatment_levels'] > 1: + assert isinstance(dml_apos_fixture['causal_contrast_single'], dml.DoubleMLFramework) + assert isinstance(dml_apos_fixture['causal_contrast_multiple'], dml.DoubleMLFramework) + + +@pytest.mark.ci +def test_doubleml_apos_causal_contrast(dml_apos_fixture): + if dml_apos_fixture['n_treatment_levels'] == 1: + pytest.skip("Skipping test as n_treatment_levels is 1") + + acc_single = dml_apos_fixture['coef'][1:] - dml_apos_fixture['coef'][0] + assert np.allclose(dml_apos_fixture['causal_contrast_single'].thetas, + acc_single, + rtol=1e-9, atol=1e-9) + + acc_multiple = np.append(acc_single, dml_apos_fixture['coef'][2] - dml_apos_fixture['coef'][1]) + assert np.allclose(dml_apos_fixture['causal_contrast_multiple'].thetas, + acc_multiple, + rtol=1e-9, atol=1e-9) diff --git a/doubleml/irm/tests/test_apos_exceptions.py b/doubleml/irm/tests/test_apos_exceptions.py index a895f0151..32dc04f21 100644 --- a/doubleml/irm/tests/test_apos_exceptions.py +++ b/doubleml/irm/tests/test_apos_exceptions.py @@ -80,6 +80,12 @@ def test_causal_contrast_exceptions(): dml_obj = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=[0, 1]) dml_obj.causal_contrast(reference_levels=0) + msg = 'Only one treatment level. No causal contrast can be computed.' + with pytest.raises(ValueError, match=msg): + dml_obj = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=[0]) + dml_obj.fit() + dml_obj.causal_contrast(reference_levels=0) + dml_obj = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=[0, 1]) dml_obj.fit() msg = ('Invalid reference_levels. reference_levels has to be an iterable subset of treatment_levels or ' From 3a73ac993e12e22e1a8b4efd051a2ed823324885 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Wed, 24 Jul 2024 16:53:19 +0200 Subject: [PATCH 54/98] update scaling for apos dgp --- doubleml/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doubleml/datasets.py b/doubleml/datasets.py index b3f6d745d..e2f2dcddf 100644 --- a/doubleml/datasets.py +++ b/doubleml/datasets.py @@ -1536,7 +1536,7 @@ def f_treatment(w, xi): res = xi * (-w[:, 0] + 0.5*w[:, 1] - 0.25*w[:, 2] - 0.1*w[:, 3]) return res - def treatment_effect(d, scale=5): + def treatment_effect(d, scale=15): return scale * (1 / (1 + np.exp(-d - 1.2 * np.cos(d)))) - 2 z_tilde_1 = np.exp(0.5 * x[:, 0]) From 879cfb0cfcec6be687ea700403222748da4c0cf9 Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Thu, 25 Jul 2024 08:21:59 +0200 Subject: [PATCH 55/98] reduce irm settings for unit tests --- doubleml/irm/tests/conftest.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doubleml/irm/tests/conftest.py b/doubleml/irm/tests/conftest.py index 3f57b4220..6fe207b06 100644 --- a/doubleml/irm/tests/conftest.py +++ b/doubleml/irm/tests/conftest.py @@ -14,8 +14,7 @@ def _g(x): @pytest.fixture(scope='session', params=[(500, 10), - (1000, 20), - (1000, 100)]) + (1000, 20)]) def generate_data_irm(request): n_p = request.param np.random.seed(1111) From aaa581ea069f0bbbc325081ad6c555870d6f6c33 Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Thu, 25 Jul 2024 08:25:10 +0200 Subject: [PATCH 56/98] extend weight tests for apo --- .../irm/tests/test_apo_weighted_scores.py | 70 +++++++++++++------ 1 file changed, 47 insertions(+), 23 deletions(-) diff --git a/doubleml/irm/tests/test_apo_weighted_scores.py b/doubleml/irm/tests/test_apo_weighted_scores.py index 94d81170c..17fea8a0a 100644 --- a/doubleml/irm/tests/test_apo_weighted_scores.py +++ b/doubleml/irm/tests/test_apo_weighted_scores.py @@ -24,6 +24,12 @@ def score(request): return request.param +@pytest.fixture(scope='module', + params=[1, 3]) +def n_rep(request): + return request.param + + @pytest.fixture(scope='module', params=[False, True]) def normalize_ipw(request): @@ -43,7 +49,7 @@ def treatment_level(request): @pytest.fixture(scope='module') -def weighted_apo_score_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_threshold, +def weighted_apo_score_fixture(generate_data_irm, learner, score, n_rep, normalize_ipw, trimming_threshold, treatment_level): n_folds = 2 @@ -53,38 +59,44 @@ def weighted_apo_score_fixture(generate_data_irm, learner, score, normalize_ipw, all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d) obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d) - # Set machine learning methods for m & g - ml_g = clone(learner[0]) - ml_m = clone(learner[1]) - - np.random.seed(3141) - dml_obj = dml.DoubleMLAPO(obj_dml_data, - ml_g, ml_m, - treatment_level, - n_folds, - score=score, - normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, - draw_sample_splitting=False) + input_args = { + "obj_dml_data": obj_dml_data, + "ml_g": clone(learner[0]), + "ml_m": clone(learner[1]), + "treatment_level": treatment_level, + "n_folds": n_folds, + "n_rep": n_rep, + "score": score, + "normalize_ipw": normalize_ipw, + "trimming_threshold": trimming_threshold, + "draw_sample_splitting": False, + } + + np.random.seed(42) + dml_obj = dml.DoubleMLAPO(**input_args) dml_obj.set_sample_splitting(all_smpls=all_smpls) dml_obj.fit() + np.random.seed(42) weights = 0.5 * np.ones_like(obj_dml_data.y) - dml_obj_weighted = dml.DoubleMLAPO(obj_dml_data, - ml_g, ml_m, - treatment_level, - n_folds, - score=score, - weights=weights, - normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, - draw_sample_splitting=False) + dml_obj_weighted = dml.DoubleMLAPO(weights=weights, **input_args) dml_obj_weighted.set_sample_splitting(all_smpls=all_smpls) dml_obj_weighted.fit() + np.random.seed(42) + weights_dict = { + 'weights': weights, + 'weights_bar': np.tile(weights[:, np.newaxis], (1, n_rep)), + } + dml_obj_weighted_dict = dml.DoubleMLAPO(weights=weights_dict, **input_args) + dml_obj_weighted_dict.set_sample_splitting(all_smpls=all_smpls) + dml_obj_weighted_dict.fit() + result_dict = { 'coef': dml_obj.coef, 'weighted_coef': dml_obj_weighted.coef, + 'weighted_coef_dict': dml_obj_weighted_dict.coef, + 'default_weights': dml_obj.weights, } return result_dict @@ -93,3 +105,15 @@ def weighted_apo_score_fixture(generate_data_irm, learner, score, normalize_ipw, def test_apo_weighted_coef(weighted_apo_score_fixture): assert np.allclose(0.5 * weighted_apo_score_fixture['coef'], weighted_apo_score_fixture['weighted_coef']) + assert np.allclose(0.5 * weighted_apo_score_fixture['coef'], + weighted_apo_score_fixture['weighted_coef_dict']) + + +def test_apo_default_weights(weighted_apo_score_fixture): + assert isinstance(weighted_apo_score_fixture['default_weights'], dict) + + expected_keys = {'weights'} + assert set(weighted_apo_score_fixture['default_weights'].keys()) == expected_keys + + assert np.allclose(weighted_apo_score_fixture['default_weights']['weights'], + np.ones_like(weighted_apo_score_fixture['default_weights']['weights'])) \ No newline at end of file From 9cbc9b3bcef784000feea76396936043480ae849 Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Thu, 25 Jul 2024 08:28:32 +0200 Subject: [PATCH 57/98] add pytest mark.ci to weight test --- doubleml/irm/tests/test_apo_weighted_scores.py | 1 + 1 file changed, 1 insertion(+) diff --git a/doubleml/irm/tests/test_apo_weighted_scores.py b/doubleml/irm/tests/test_apo_weighted_scores.py index 17fea8a0a..062bfb94e 100644 --- a/doubleml/irm/tests/test_apo_weighted_scores.py +++ b/doubleml/irm/tests/test_apo_weighted_scores.py @@ -109,6 +109,7 @@ def test_apo_weighted_coef(weighted_apo_score_fixture): weighted_apo_score_fixture['weighted_coef_dict']) +@pytest.mark.ci def test_apo_default_weights(weighted_apo_score_fixture): assert isinstance(weighted_apo_score_fixture['default_weights'], dict) From e076f972395351616e8f2b5d20f859e31a451c8b Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Thu, 25 Jul 2024 08:58:25 +0200 Subject: [PATCH 58/98] extend weight tests for apos --- doubleml/irm/tests/test_apo_weighted_scores.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doubleml/irm/tests/test_apo_weighted_scores.py b/doubleml/irm/tests/test_apo_weighted_scores.py index 062bfb94e..5551e5dd0 100644 --- a/doubleml/irm/tests/test_apo_weighted_scores.py +++ b/doubleml/irm/tests/test_apo_weighted_scores.py @@ -117,4 +117,4 @@ def test_apo_default_weights(weighted_apo_score_fixture): assert set(weighted_apo_score_fixture['default_weights'].keys()) == expected_keys assert np.allclose(weighted_apo_score_fixture['default_weights']['weights'], - np.ones_like(weighted_apo_score_fixture['default_weights']['weights'])) \ No newline at end of file + np.ones_like(weighted_apo_score_fixture['default_weights']['weights'])) From 64de6007f12d0620b802754f1e49d480ffbf66bc Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Thu, 25 Jul 2024 09:02:39 +0200 Subject: [PATCH 59/98] extend apos weights test --- .../irm/tests/test_apos_weighted_scores.py | 35 +++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/doubleml/irm/tests/test_apos_weighted_scores.py b/doubleml/irm/tests/test_apos_weighted_scores.py index 84e6ac1c3..3ab8db6af 100644 --- a/doubleml/irm/tests/test_apos_weighted_scores.py +++ b/doubleml/irm/tests/test_apos_weighted_scores.py @@ -25,6 +25,12 @@ def score(request): return request.param +@pytest.fixture(scope='module', + params=[1, 3]) +def n_rep(request): + return request.param + + @pytest.fixture(scope='module', params=[False, True]) def normalize_ipw(request): @@ -44,7 +50,7 @@ def treatment_levels(request): @pytest.fixture(scope='module') -def weighted_apos_score_fixture(learner, score, normalize_ipw, trimming_threshold, +def weighted_apos_score_fixture(learner, score, n_rep, normalize_ipw, trimming_threshold, treatment_levels): n_obs = 500 n_folds = 2 @@ -67,16 +73,18 @@ def weighted_apos_score_fixture(learner, score, normalize_ipw, trimming_threshol 'ml_m': clone(learner[1]), 'treatment_levels': treatment_levels, 'n_folds': n_folds, + 'n_rep': n_rep, 'score': score, 'normalize_ipw': normalize_ipw, 'trimming_threshold': trimming_threshold, 'trimming_rule': 'truncate' } - np.random.seed(3141) + np.random.seed(42) dml_obj = dml.DoubleMLAPOS(**input_args) dml_obj.fit() + np.random.seed(42) weights = 0.5 * np.ones_like(obj_dml_data.y) dml_obj_weighted = dml.DoubleMLAPOS(draw_sample_splitting=False, weights=weights, @@ -84,9 +92,22 @@ def weighted_apos_score_fixture(learner, score, normalize_ipw, trimming_threshol dml_obj_weighted.set_sample_splitting(all_smpls=dml_obj.smpls) dml_obj_weighted.fit() + np.random.seed(42) + weights_dict = { + 'weights': weights, + 'weights_bar': np.tile(weights[:, np.newaxis], (1, n_rep)), + } + dml_obj_weighted_dict = dml.DoubleMLAPOS(draw_sample_splitting=False, + weights=weights_dict, + **input_args) + dml_obj_weighted_dict.set_sample_splitting(all_smpls=dml_obj.smpls) + dml_obj_weighted_dict.fit() + result_dict = { 'coef': dml_obj.coef, 'weighted_coef': dml_obj_weighted.coef, + 'weighted_coef_dict': dml_obj_weighted_dict.coef, + 'default_weights': dml_obj.weights, } return result_dict @@ -95,3 +116,13 @@ def weighted_apos_score_fixture(learner, score, normalize_ipw, trimming_threshol def test_apos_weighted_coef(weighted_apos_score_fixture): assert np.allclose(0.5 * weighted_apos_score_fixture['coef'], weighted_apos_score_fixture['weighted_coef']) + assert np.allclose(0.5 * weighted_apos_score_fixture['coef'], + weighted_apos_score_fixture['weighted_coef_dict']) + + +@pytest.mark.ci +def test_apos_default_weights(weighted_apos_score_fixture): + assert isinstance(weighted_apos_score_fixture['default_weights'], np.ndarray) + + assert np.allclose(weighted_apos_score_fixture['default_weights'], + np.ones_like(weighted_apos_score_fixture['default_weights'])) From c4f6a05ffa11e1927afedb539531403003d4f464 Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Thu, 25 Jul 2024 10:07:10 +0200 Subject: [PATCH 60/98] remove apply_cross_fitting from apo_manual --- doubleml/irm/tests/_utils_apo_manual.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/doubleml/irm/tests/_utils_apo_manual.py b/doubleml/irm/tests/_utils_apo_manual.py index 862a2793d..e22f80ffe 100644 --- a/doubleml/irm/tests/_utils_apo_manual.py +++ b/doubleml/irm/tests/_utils_apo_manual.py @@ -127,21 +127,18 @@ def var_apo(theta, g_hat0, g_hat1, m_hat, u_hat0, u_hat1, treated, score, n_obs) def boot_apo(y, d, treatment_level, thetas, ses, all_g_hat0, all_g_hat1, all_m_hat, all_smpls, score, bootstrap, n_rep_boot, - n_rep=1, apply_cross_fitting=True, normalize_ipw=True): + n_rep=1, normalize_ipw=True): treated = (d == treatment_level) all_boot_t_stat = list() for i_rep in range(n_rep): smpls = all_smpls[i_rep] - if apply_cross_fitting: - n_obs = len(y) - else: - test_index = smpls[0][1] - n_obs = len(test_index) + n_obs = len(y) + weights = draw_weights(bootstrap, n_rep_boot, n_obs) boot_t_stat = boot_apo_single_split( thetas[i_rep], y, d, treated, all_g_hat0[i_rep], all_g_hat1[i_rep], all_m_hat[i_rep], smpls, - score, ses[i_rep], weights, n_rep_boot, apply_cross_fitting, normalize_ipw) + score, ses[i_rep], weights, n_rep_boot, normalize_ipw) all_boot_t_stat.append(boot_t_stat) boot_t_stat = np.hstack(all_boot_t_stat) @@ -150,7 +147,7 @@ def boot_apo(y, d, treatment_level, thetas, ses, all_g_hat0, all_g_hat1, all_m_h def boot_apo_single_split(theta, y, d, treated, g_hat0_list, g_hat1_list, m_hat_list, - smpls, score, se, weights, n_rep_boot, apply_cross_fitting, normalize_ipw): + smpls, score, se, weights, n_rep_boot, normalize_ipw): _, u_hat1, _, g_hat1, m_hat = compute_residuals( y, g_hat0_list, g_hat1_list, m_hat_list, smpls) @@ -161,7 +158,7 @@ def boot_apo_single_split(theta, y, d, treated, g_hat0_list, g_hat1_list, m_hat_ J = -1.0 psi = g_hat1 + np.divide(np.multiply(treated, u_hat1), m_hat_adj) - theta - boot_t_stat = boot_manual(psi, J, smpls, se, weights, n_rep_boot, apply_cross_fitting) + boot_t_stat = boot_manual(psi, J, smpls, se, weights, n_rep_boot) return boot_t_stat From 705afb8f61dd24e8a61edb058a025a4cd5129da0 Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Thu, 25 Jul 2024 10:14:00 +0200 Subject: [PATCH 61/98] add test for classifier without binary outcome in apo --- doubleml/irm/tests/test_apo_exceptions.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/doubleml/irm/tests/test_apo_exceptions.py b/doubleml/irm/tests/test_apo_exceptions.py index 2e9a7a1bd..ce6d4fd79 100644 --- a/doubleml/irm/tests/test_apo_exceptions.py +++ b/doubleml/irm/tests/test_apo_exceptions.py @@ -47,6 +47,15 @@ def test_apo_exception_data(): _ = DoubleMLAPO(dml_data_warn, ml_g, ml_m, treatment_level=42) +@pytest.mark.ci +def test_apo_exception_learner(): + msg = (r'The ml_g learner LogisticRegression\(\) was identified as classifier but the outcome variable is not' + ' binary with values 0 and 1.') + with pytest.raises(ValueError, match=msg): + ml_g_classifier = LogisticRegression() + _ = DoubleMLAPO(dml_data, ml_g_classifier, ml_m, treatment_level=0) + + @pytest.mark.ci def test_apo_exception_scores(): msg = 'Invalid score MAR. Valid score APO.' From 8b318154cd970c8172ab8e415a52b6eb9a280649 Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Thu, 25 Jul 2024 12:04:57 +0200 Subject: [PATCH 62/98] Add exception for classfier in DoubleMLAPOS class --- doubleml/irm/apos.py | 12 +++++++++++- doubleml/irm/tests/test_apos_exceptions.py | 9 +++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index 88cbb803d..95c909dfa 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -6,6 +6,7 @@ from joblib import Parallel, delayed +from ..double_ml import DoubleML from ..double_ml_data import DoubleMLData, DoubleMLClusterData from .apo import DoubleMLAPO from ..double_ml_framework import concat @@ -60,8 +61,17 @@ def __init__(self, raise TypeError('Normalization indicator has to be boolean. ' + f'Object of type {str(type(self.normalize_ipw))} passed.') + ml_g_is_classifier = DoubleML._check_learner(ml_g, 'ml_g', regressor=True, classifier=True) + _ = DoubleML._check_learner(ml_m, 'ml_m', regressor=False, classifier=True) self._learner = {'ml_g': clone(ml_g), 'ml_m': clone(ml_m)} - self._predict_method = {'ml_g': 'predict', 'ml_m': 'predict_proba'} + if ml_g_is_classifier: + if obj_dml_data.binary_outcome: + self._predict_method = {'ml_g': 'predict_proba', 'ml_m': 'predict_proba'} + else: + raise ValueError(f'The ml_g learner {str(ml_g)} was identified as classifier ' + 'but the outcome variable is not binary with values 0 and 1.') + else: + self._predict_method = {'ml_g': 'predict', 'ml_m': 'predict_proba'} # APO weights _check_weights(weights, score="ATE", n_obs=obj_dml_data.n_obs, n_rep=self.n_rep) diff --git a/doubleml/irm/tests/test_apos_exceptions.py b/doubleml/irm/tests/test_apos_exceptions.py index 32dc04f21..e722832b4 100644 --- a/doubleml/irm/tests/test_apos_exceptions.py +++ b/doubleml/irm/tests/test_apos_exceptions.py @@ -41,6 +41,15 @@ def test_apos_exception_data(): _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=[1, 2.2]) +@pytest.mark.ci +def test_apos_exception_learner(): + msg = (r'The ml_g learner LogisticRegression\(\) was identified as classifier but the outcome variable is not' + ' binary with values 0 and 1.') + with pytest.raises(ValueError, match=msg): + ml_g_classifier = LogisticRegression() + _ = DoubleMLAPOS(dml_data, ml_g_classifier, ml_m, treatment_level=0) + + @pytest.mark.ci def test_apos_exception_scores(): msg = 'Invalid score MAR. Valid score APO.' From 9d9b7fd709818da003f04ecd9f637feb9dcb3b0e Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Thu, 25 Jul 2024 12:11:43 +0200 Subject: [PATCH 63/98] Update test_apos_exceptions.py --- doubleml/irm/tests/test_apos_exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doubleml/irm/tests/test_apos_exceptions.py b/doubleml/irm/tests/test_apos_exceptions.py index e722832b4..058db5f72 100644 --- a/doubleml/irm/tests/test_apos_exceptions.py +++ b/doubleml/irm/tests/test_apos_exceptions.py @@ -47,7 +47,7 @@ def test_apos_exception_learner(): ' binary with values 0 and 1.') with pytest.raises(ValueError, match=msg): ml_g_classifier = LogisticRegression() - _ = DoubleMLAPOS(dml_data, ml_g_classifier, ml_m, treatment_level=0) + _ = DoubleMLAPOS(dml_data, ml_g_classifier, ml_m, treatment_levels=0) @pytest.mark.ci From 0daf406ba639cd3d5cff440cb992d1a7a7716db7 Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Thu, 25 Jul 2024 15:22:55 +0200 Subject: [PATCH 64/98] add seperate function for binary outcome check --- doubleml/irm/irm.py | 18 +++--------------- doubleml/tests/test_exceptions.py | 2 +- doubleml/utils/_checks.py | 10 ++++++++++ 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py index 3cf98ec36..1b1695c66 100644 --- a/doubleml/irm/irm.py +++ b/doubleml/irm/irm.py @@ -13,7 +13,7 @@ from ..utils._estimation import _dml_cv_predict, _get_cond_smpls, _dml_tune, _trimm, _normalize_ipw, _cond_targets from ..utils._checks import _check_score, _check_trimming, _check_finite_predictions, _check_is_propensity, _check_integer, \ - _check_weights + _check_weights, _check_binary_predictions class DoubleMLIRM(LinearScoreMixin, DoubleML): @@ -275,13 +275,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa g_hat0['targets'] = _cond_targets(g_hat0['targets'], cond_sample=(d == 0)) if self._dml_data.binary_outcome: - binary_preds = (type_of_target(g_hat0['preds']) == 'binary') - zero_one_preds = np.all((np.power(g_hat0['preds'], 2) - g_hat0['preds']) == 0) - if binary_preds & zero_one_preds: - raise ValueError(f'For the binary outcome variable {self._dml_data.y_col}, ' - f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also ' - 'observed to be binary with values 0 and 1. Make sure that for classifiers ' - 'probabilities and not labels are predicted.') + _check_binary_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', self._dml_data.y_col) if g1_external: # use external predictions @@ -297,13 +291,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa g_hat1['targets'] = _cond_targets(g_hat1['targets'], cond_sample=(d == 1)) if self._dml_data.binary_outcome & (self.score != 'ATTE'): - binary_preds = (type_of_target(g_hat1['preds']) == 'binary') - zero_one_preds = np.all((np.power(g_hat1['preds'], 2) - g_hat1['preds']) == 0) - if binary_preds & zero_one_preds: - raise ValueError(f'For the binary outcome variable {self._dml_data.y_col}, ' - f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also ' - 'observed to be binary with values 0 and 1. Make sure that for classifiers ' - 'probabilities and not labels are predicted.') + _check_binary_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', self._dml_data.y_col) # nuisance m if m_external: diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py index 215c0a088..8d0e74070 100644 --- a/doubleml/tests/test_exceptions.py +++ b/doubleml/tests/test_exceptions.py @@ -966,7 +966,7 @@ def test_doubleml_exception_learner(): with pytest.warns(UserWarning, match=msg): dml_irm_hidden_classifier = DoubleMLIRM(dml_data_irm_binary_outcome, log_reg, LogisticRegression()) - msg = (r'For the binary outcome variable y, predictions obtained with the ml_g learner ' + msg = (r'For the binary variable y, predictions obtained with the ml_g learner ' r'LogisticRegressionManipulatedPredict\(\) are also observed to be binary with values 0 and 1. Make sure ' 'that for classifiers probabilities and not labels are predicted.') with pytest.raises(ValueError, match=msg): diff --git a/doubleml/utils/_checks.py b/doubleml/utils/_checks.py index e54c4041b..d7d2881ed 100644 --- a/doubleml/utils/_checks.py +++ b/doubleml/utils/_checks.py @@ -206,6 +206,16 @@ def _check_is_propensity(preds, learner, learner_name, smpls, eps=1e-12): return +def _check_binary_predictions(pred, learner, learner_name, variable_name): + binary_preds = (type_of_target(pred) == 'binary') + zero_one_preds = np.all((np.power(pred, 2) - pred) == 0) + if binary_preds & zero_one_preds: + raise ValueError(f'For the binary variable {variable_name}, ' + f'predictions obtained with the {learner_name} learner {str(learner)} are also ' + 'observed to be binary with values 0 and 1. Make sure that for classifiers ' + 'probabilities and not labels are predicted.') + + def _check_benchmarks(benchmarks): if benchmarks is not None: if not isinstance(benchmarks, dict): From b9d8ff61811f0b751b4d9753e40206696b0e200d Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Thu, 25 Jul 2024 15:25:58 +0200 Subject: [PATCH 65/98] update binary outcome check iivm --- doubleml/irm/iivm.py | 22 +++++----------------- doubleml/tests/test_exceptions.py | 2 +- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/doubleml/irm/iivm.py b/doubleml/irm/iivm.py index c131449bb..c2f85dd4d 100644 --- a/doubleml/irm/iivm.py +++ b/doubleml/irm/iivm.py @@ -7,7 +7,8 @@ from ..double_ml_score_mixins import LinearScoreMixin from ..utils._estimation import _dml_cv_predict, _get_cond_smpls, _dml_tune, _trimm, _normalize_ipw -from ..utils._checks import _check_score, _check_trimming, _check_finite_predictions, _check_is_propensity +from ..utils._checks import _check_score, _check_trimming, _check_finite_predictions, _check_is_propensity, \ + _check_binary_predictions class DoubleMLIIVM(LinearScoreMixin, DoubleML): @@ -264,15 +265,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa g_hat0['targets'][z == 1] = np.nan if self._dml_data.binary_outcome: - binary_preds = (type_of_target(g_hat0['preds']) == 'binary') - zero_one_preds = np.all((np.power(g_hat0['preds'], 2) - g_hat0['preds']) == 0) - if binary_preds & zero_one_preds: - raise ValueError(f'For the binary outcome variable {self._dml_data.y_col}, ' - f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also ' - 'observed to be binary with values 0 and 1. Make sure that for classifiers ' - 'probabilities and not labels are predicted.') - + _check_binary_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', self._dml_data.y_col) _check_is_propensity(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls, eps=1e-12) + if external_predictions['ml_g1'] is not None: g_hat1 = {'preds': external_predictions['ml_g1'], 'targets': None, @@ -287,14 +282,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa g_hat1['targets'][z == 0] = np.nan if self._dml_data.binary_outcome: - binary_preds = (type_of_target(g_hat1['preds']) == 'binary') - zero_one_preds = np.all((np.power(g_hat1['preds'], 2) - g_hat1['preds']) == 0) - if binary_preds & zero_one_preds: - raise ValueError(f'For the binary outcome variable {self._dml_data.y_col}, ' - f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also ' - 'observed to be binary with values 0 and 1. Make sure that for classifiers ' - 'probabilities and not labels are predicted.') - + _check_binary_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', self._dml_data.y_col) _check_is_propensity(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls, eps=1e-12) # nuisance m diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py index 8d0e74070..1dc23dfb5 100644 --- a/doubleml/tests/test_exceptions.py +++ b/doubleml/tests/test_exceptions.py @@ -980,7 +980,7 @@ def test_doubleml_exception_learner(): with pytest.warns(UserWarning, match=msg): dml_iivm_hidden_classifier = DoubleMLIIVM(dml_data_iivm_binary_outcome, log_reg, LogisticRegression(), LogisticRegression()) - msg = (r'For the binary outcome variable y, predictions obtained with the ml_g learner ' + msg = (r'For the binary variable y, predictions obtained with the ml_g learner ' r'LogisticRegressionManipulatedPredict\(\) are also observed to be binary with values 0 and 1. Make sure ' 'that for classifiers probabilities and not labels are predicted.') with pytest.raises(ValueError, match=msg): From ffa77cf0bc5702d51fc0eaf0f62a6f17c1d0f929 Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Thu, 25 Jul 2024 15:28:34 +0200 Subject: [PATCH 66/98] update binary treatment check plr --- doubleml/plm/plr.py | 10 ++-------- doubleml/tests/test_exceptions.py | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/doubleml/plm/plr.py b/doubleml/plm/plr.py index 3b7d90f5b..fd9d78ae9 100644 --- a/doubleml/plm/plr.py +++ b/doubleml/plm/plr.py @@ -12,7 +12,7 @@ from ..utils.blp import DoubleMLBLP from ..utils._estimation import _dml_cv_predict, _dml_tune -from ..utils._checks import _check_score, _check_finite_predictions, _check_is_propensity +from ..utils._checks import _check_score, _check_finite_predictions, _check_is_propensity, _check_binary_predictions class DoubleMLPLR(LinearScoreMixin, DoubleML): @@ -198,13 +198,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12) if self._dml_data.binary_treats[self._dml_data.d_cols[self._i_treat]]: - binary_preds = (type_of_target(m_hat['preds']) == 'binary') - zero_one_preds = np.all((np.power(m_hat['preds'], 2) - m_hat['preds']) == 0) - if binary_preds & zero_one_preds: - raise ValueError(f'For the binary treatment variable {self._dml_data.d_cols[self._i_treat]}, ' - f'predictions obtained with the ml_m learner {str(self._learner["ml_m"])} are also ' - 'observed to be binary with values 0 and 1. Make sure that for classifiers ' - 'probabilities and not labels are predicted.') + _check_binary_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', self._dml_data.d_cols[self._i_treat]) # an estimate of g is obtained for the IV-type score and callable scores g_hat = {'preds': None, 'targets': None, 'models': None} diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py index 1dc23dfb5..3ba85e167 100644 --- a/doubleml/tests/test_exceptions.py +++ b/doubleml/tests/test_exceptions.py @@ -950,7 +950,7 @@ def test_doubleml_exception_learner(): 'nor a classifier. Method predict is used for prediction.') with pytest.warns(UserWarning, match=msg): dml_plr_hidden_classifier = DoubleMLPLR(dml_data_irm, Lasso(), log_reg) - msg = (r'For the binary treatment variable d, predictions obtained with the ml_m learner LogisticRegression\(\) ' + msg = (r'For the binary variable d, predictions obtained with the ml_m learner LogisticRegression\(\) ' 'are also observed to be binary with values 0 and 1. Make sure that for classifiers probabilities and not ' 'labels are predicted.') with pytest.raises(ValueError, match=msg): From d66658c7cace6992a2080b58107fca0edb469534 Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Thu, 25 Jul 2024 15:28:48 +0200 Subject: [PATCH 67/98] Update plr.py --- doubleml/plm/plr.py | 1 - 1 file changed, 1 deletion(-) diff --git a/doubleml/plm/plr.py b/doubleml/plm/plr.py index fd9d78ae9..d5810b972 100644 --- a/doubleml/plm/plr.py +++ b/doubleml/plm/plr.py @@ -1,7 +1,6 @@ import numpy as np import pandas as pd from sklearn.utils import check_X_y -from sklearn.utils.multiclass import type_of_target from sklearn.base import clone import warnings From 50d8b2dee53bcb7c157153a4801f3d73b869e044 Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Thu, 25 Jul 2024 15:30:28 +0200 Subject: [PATCH 68/98] update binary outcome check apo --- doubleml/irm/apo.py | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py index 838e55460..13aae5c32 100644 --- a/doubleml/irm/apo.py +++ b/doubleml/irm/apo.py @@ -3,7 +3,6 @@ import warnings from sklearn.utils import check_X_y -from sklearn.utils.multiclass import type_of_target from ..double_ml import DoubleML @@ -14,7 +13,7 @@ from ..utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls, _cond_targets, _trimm, \ _normalize_ipw from ..utils._checks import _check_score, _check_trimming, _check_weights, _check_finite_predictions, \ - _check_is_propensity + _check_is_propensity, _check_binary_predictions class DoubleMLAPO(LinearScoreMixin, DoubleML): @@ -230,13 +229,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa g_hat0['targets'] = _cond_targets(g_hat0['targets'], cond_sample=(treated == 0)) if self._dml_data.binary_outcome: - binary_preds = (type_of_target(g_hat0['preds']) == 'binary') - zero_one_preds = np.all((np.power(g_hat0['preds'], 2) - g_hat0['preds']) == 0) - if binary_preds & zero_one_preds: - raise ValueError(f'For the binary outcome variable {self._dml_data.y_col}, ' - f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also ' - 'observed to be binary with values 0 and 1. Make sure that for classifiers ' - 'probabilities and not labels are predicted.') + _check_binary_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', self._dml_data.y_col) if g1_external: # use external predictions @@ -252,13 +245,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa g_hat1['targets'] = _cond_targets(g_hat1['targets'], cond_sample=(treated == 1)) if self._dml_data.binary_outcome: - binary_preds = (type_of_target(g_hat1['preds']) == 'binary') - zero_one_preds = np.all((np.power(g_hat1['preds'], 2) - g_hat1['preds']) == 0) - if binary_preds & zero_one_preds: - raise ValueError(f'For the binary outcome variable {self._dml_data.y_col}, ' - f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also ' - 'observed to be binary with values 0 and 1. Make sure that for classifiers ' - 'probabilities and not labels are predicted.') + _check_binary_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', self._dml_data.y_col) # nuisance m if m_external: From 0bfda41a4748ae81fd364c3ac0525c315b307da3 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 25 Jul 2024 17:33:12 +0200 Subject: [PATCH 69/98] adjust check data for APO --- doubleml/irm/apo.py | 4 ---- doubleml/tests/_utils.py | 11 +++++++++++ doubleml/tests/test_exceptions.py | 13 ++----------- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py index 13aae5c32..f52750709 100644 --- a/doubleml/irm/apo.py +++ b/doubleml/irm/apo.py @@ -8,7 +8,6 @@ from ..utils.blp import DoubleMLBLP from ..double_ml_score_mixins import LinearScoreMixin -from ..double_ml_data import DoubleMLData from ..utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls, _cond_targets, _trimm, \ _normalize_ipw @@ -368,9 +367,6 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_ return res def _check_data(self, obj_dml_data): - if not isinstance(obj_dml_data, DoubleMLData): - raise TypeError('The data must be of DoubleMLData type. ' - f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.') if obj_dml_data.z_cols is not None: raise ValueError('Incompatible data. ' + ' and '.join(obj_dml_data.z_cols) + diff --git a/doubleml/tests/_utils.py b/doubleml/tests/_utils.py index fb85b2410..18ceef883 100644 --- a/doubleml/tests/_utils.py +++ b/doubleml/tests/_utils.py @@ -5,6 +5,17 @@ from scipy.stats import norm from ..utils._estimation import _var_est, _aggregate_coefs_and_ses +from ..double_ml_data import DoubleMLBaseData + + +class DummyDataClass(DoubleMLBaseData): + def __init__(self, + data): + DoubleMLBaseData.__init__(self, data) + + @property + def n_coefs(self): + return 1 def draw_smpls(n_obs, n_folds, n_rep=1, groups=None): diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py index 3ba85e167..a694d807f 100644 --- a/doubleml/tests/test_exceptions.py +++ b/doubleml/tests/test_exceptions.py @@ -8,7 +8,8 @@ DoubleMLDIDCS, DoubleMLBLP from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data, \ make_pliv_multiway_cluster_CKMS2021, make_did_SZ2020 -from doubleml.double_ml_data import DoubleMLBaseData + +from ._utils import DummyDataClass from sklearn.linear_model import Lasso, LogisticRegression from sklearn.base import BaseEstimator @@ -38,16 +39,6 @@ dml_data_iivm_binary_outcome = DoubleMLData.from_arrays(x, y, d, z) -class DummyDataClass(DoubleMLBaseData): - def __init__(self, - data): - DoubleMLBaseData.__init__(self, data) - - @property - def n_coefs(self): - return 1 - - @pytest.mark.ci def test_doubleml_exception_data(): msg = 'The data must be of DoubleMLData or DoubleMLClusterData type.' From 4189df586aa239290273e6e4f1ecb6b5377f65dc Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 25 Jul 2024 17:56:09 +0200 Subject: [PATCH 70/98] add apo and gapo exception tests --- doubleml/irm/tests/test_apo_exceptions.py | 51 ++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/doubleml/irm/tests/test_apo_exceptions.py b/doubleml/irm/tests/test_apo_exceptions.py index ce6d4fd79..970ba1900 100644 --- a/doubleml/irm/tests/test_apo_exceptions.py +++ b/doubleml/irm/tests/test_apo_exceptions.py @@ -3,9 +3,10 @@ import numpy as np from doubleml import DoubleMLAPO, DoubleMLData -from doubleml.datasets import make_irm_data_discrete_treatments, make_iivm_data +from doubleml.datasets import make_irm_data_discrete_treatments, make_iivm_data, make_irm_data from sklearn.linear_model import Lasso, LogisticRegression +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor n = 100 data_apo = make_irm_data_discrete_treatments(n_obs=n) @@ -142,3 +143,51 @@ def test_apo_exception_weights(): _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, weights={'weights': np.ones((dml_data.d.shape[0], )), 'weights_bar': np.zeros((dml_data.d.shape[0], 1))}) + + +@pytest.mark.ci +def test_apo_exception_capo_gapo(): + n = 20 + # collect data + np.random.seed(42) + obj_dml_data = make_irm_data(n_obs=n, dim_x=2) + + # First stage estimation + ml_g = RandomForestRegressor(n_estimators=10) + ml_m = RandomForestClassifier(n_estimators=10) + + dml_obj = DoubleMLAPO(obj_dml_data, + ml_m=ml_m, + ml_g=ml_g, + treatment_level=0) + + dml_obj.fit() + # create a random basis + random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 5))) + + msg = "Invalid score APO_2. Valid score APO." + with pytest.raises(ValueError, match=msg): + dml_obj._score = 'APO_2' + _ = dml_obj.capo(random_basis) + # reset the score + dml_obj._score = 'APO' + + msg = "Only implemented for one repetition. Number of repetitions is 2." + with pytest.raises(NotImplementedError, match=msg): + dml_obj._n_rep = 2 + dml_obj.capo(random_basis) + # reset the number of repetitions + dml_obj._n_rep = 1 + + msg = "Groups must be of DataFrame type. Groups of type was passed." + with pytest.raises(TypeError, match=msg): + _ = dml_obj.gapo(1) + + groups_1 = pd.DataFrame( + np.column_stack([obj_dml_data.data['X1'] > 0.2, np.ones_like(obj_dml_data.data['X1'])]), + columns=['Group 1', 'Group 2'] + ) + msg = (r'Columns of groups must be of bool type or int type \(dummy coded\). Alternatively,' + ' groups should only contain one column.') + with pytest.raises(TypeError, match=msg): + _ = dml_obj.gapo(groups_1) From 73a164f32be7aadb56b623666463d26bb5c27ee1 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 25 Jul 2024 18:08:35 +0200 Subject: [PATCH 71/98] add methods exception tests for apos --- doubleml/irm/tests/test_apos_exceptions.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/doubleml/irm/tests/test_apos_exceptions.py b/doubleml/irm/tests/test_apos_exceptions.py index 058db5f72..3e60bba83 100644 --- a/doubleml/irm/tests/test_apos_exceptions.py +++ b/doubleml/irm/tests/test_apos_exceptions.py @@ -82,6 +82,24 @@ def test_apos_exception_ipw_normalization(): _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, normalize_ipw=1) +@pytest.mark.ci +def test_apos_exception_properties_and_methods(): + # properties + dml_obj = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, draw_sample_splitting=False) + msg = r'Sample splitting not specified. Draw samples via .draw_sample splitting\(\). External samples not implemented yet.' + with pytest.raises(ValueError, match=msg): + dml_obj.smpls + + # methods + dml_obj = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0) + msg = r'Apply fit\(\) before confint\(\).' + with pytest.raises(ValueError, match=msg): + dml_obj.confint() + msg = r'Apply fit\(\) before bootstrap\(\).' + with pytest.raises(ValueError, match=msg): + dml_obj.bootstrap() + + @pytest.mark.ci def test_causal_contrast_exceptions(): msg = r"Apply fit\(\) before causal_contrast\(\)." From 03d8636c023ff9afa7be98d90077b0b21c3f97ac Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 25 Jul 2024 19:59:35 +0200 Subject: [PATCH 72/98] add property tests for DoubleMLAPOS --- doubleml/irm/apos.py | 12 +++++++-- doubleml/irm/tests/test_apos.py | 45 ++++++++++++++++++++++++++++++++- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index 95c909dfa..c0fabe39b 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -229,14 +229,22 @@ def t_stat(self): """ t-statistics for the causal parameter(s) after calling :meth:`fit` (shape (``n_treatment_levels``,)). """ - return self.framework.t_stats + if self._framework is None: + t_stats = None + else: + t_stats = self.framework.t_stats + return t_stats @property def pval(self): """ p-values for the causal parameter(s) (shape (``n_treatment_levels``,)). """ - return self.framework.pvals + if self._framework is None: + pvals = None + else: + pvals = self.framework.pvals + return pvals @property def smpls(self): diff --git a/doubleml/irm/tests/test_apos.py b/doubleml/irm/tests/test_apos.py index 4ba478a41..6f8da9b90 100644 --- a/doubleml/irm/tests/test_apos.py +++ b/doubleml/irm/tests/test_apos.py @@ -8,12 +8,55 @@ from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor import doubleml as dml -from doubleml.datasets import make_irm_data_discrete_treatments +from doubleml.datasets import make_irm_data_discrete_treatments, make_irm_data from ._utils_apos_manual import fit_apos, boot_apos from ...tests._utils import confint_manual +@pytest.mark.ci +def test_apo_properties(): + n = 20 + # collect data + np.random.seed(42) + obj_dml_data = make_irm_data(n_obs=n, dim_x=2) + + dml_obj = dml.DoubleMLAPOS(obj_dml_data, + ml_g=RandomForestRegressor(n_estimators=10), + ml_m=RandomForestClassifier(n_estimators=10), + treatment_levels=0) + + # check properties before fit + assert dml_obj.n_rep_boot is None + assert dml_obj.coef is None + assert dml_obj.all_coef is None + assert dml_obj.se is None + assert dml_obj.all_se is None + assert dml_obj.t_stat is None + assert dml_obj.pval is None + assert dml_obj.n_rep_boot is None + assert dml_obj.boot_t_stat is None + assert dml_obj.boot_method is None + + # check properties after fit + dml_obj.fit() + assert dml_obj.coef is not None + assert dml_obj.all_coef is not None + assert dml_obj.se is not None + assert dml_obj.all_se is not None + assert dml_obj.t_stat is not None + assert dml_obj.pval is not None + assert dml_obj.n_rep_boot is None + assert dml_obj.boot_t_stat is None + assert dml_obj.boot_method is None + + # check properties after bootstrap + dml_obj.bootstrap() + assert dml_obj.n_rep_boot is not None + assert dml_obj.boot_t_stat is not None + assert dml_obj.boot_method is not None + + @pytest.fixture(scope='module', params=[[LinearRegression(), LogisticRegression(solver='lbfgs', max_iter=250, random_state=42)], From 039901dcd978e65861574a307cb176af632ee4fe Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Fri, 26 Jul 2024 07:44:34 +0200 Subject: [PATCH 73/98] fix exception test apos --- doubleml/irm/tests/test_apos_exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doubleml/irm/tests/test_apos_exceptions.py b/doubleml/irm/tests/test_apos_exceptions.py index 3e60bba83..e9d972f13 100644 --- a/doubleml/irm/tests/test_apos_exceptions.py +++ b/doubleml/irm/tests/test_apos_exceptions.py @@ -88,7 +88,7 @@ def test_apos_exception_properties_and_methods(): dml_obj = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, draw_sample_splitting=False) msg = r'Sample splitting not specified. Draw samples via .draw_sample splitting\(\). External samples not implemented yet.' with pytest.raises(ValueError, match=msg): - dml_obj.smpls + _ = dml_obj.smpls # methods dml_obj = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0) From 953cbef49a7b71382e55c60daaf831f4029b5d66 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Fri, 26 Jul 2024 16:48:58 +0200 Subject: [PATCH 74/98] add exception for framework with sensitivity analysis --- doubleml/tests/test_exceptions.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py index a694d807f..b8b690f34 100644 --- a/doubleml/tests/test_exceptions.py +++ b/doubleml/tests/test_exceptions.py @@ -260,6 +260,14 @@ def test_doubleml_exception_data(): Lasso(), LogisticRegression()) +@pytest.mark.ci +def test_doubleml_exception_framework(): + msg = r'Apply fit\(\) before sensitivity_analysis\(\).' + with pytest.raises(ValueError, match=msg): + dml_obj = DoubleMLPLR(dml_data, ml_l, ml_m) + dml_obj.sensitivity_analysis() + + @pytest.mark.ci def test_doubleml_exception_scores(): # PLR From a367b1e91f0c9ac4b3522482ed5634cb22e06898 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Fri, 26 Jul 2024 17:17:17 +0200 Subject: [PATCH 75/98] update default test for doubleml --- doubleml/tests/test_model_defaults.py | 63 ++++++++++++++++----------- 1 file changed, 38 insertions(+), 25 deletions(-) diff --git a/doubleml/tests/test_model_defaults.py b/doubleml/tests/test_model_defaults.py index 5da2fbcaf..4df50b062 100644 --- a/doubleml/tests/test_model_defaults.py +++ b/doubleml/tests/test_model_defaults.py @@ -28,38 +28,23 @@ dml_did_cs = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression()) dml_ssm = DoubleMLSSM(dml_data_ssm, Lasso(), LogisticRegression(), LogisticRegression()) -dml_plr.fit() -dml_pliv.fit() -dml_irm.fit() -dml_iivm.fit() -dml_cvar.fit() -dml_did.fit() -dml_did_cs.fit() -dml_ssm.fit() - -dml_plr.bootstrap() -dml_pliv.bootstrap() -dml_irm.bootstrap() -dml_iivm.bootstrap() -dml_cvar.bootstrap() -dml_did.bootstrap() -dml_did_cs.bootstrap() -dml_ssm.bootstrap() - # nonlinear models dml_pq = DoubleMLPQ(dml_data_irm, ml_g=LogisticRegression(), ml_m=LogisticRegression()) dml_lpq = DoubleMLLPQ(dml_data_iivm, ml_g=RandomForestClassifier(), ml_m=RandomForestClassifier()) dml_qte = DoubleMLQTE(dml_data_irm, ml_g=RandomForestClassifier(), ml_m=RandomForestClassifier()) -dml_pq.fit() -dml_lpq.fit() -dml_qte.fit() -dml_pq.bootstrap() -dml_lpq.bootstrap() -dml_qte.bootstrap() +def _assert_is_none(dml_obj): + assert dml_obj.n_rep_boot is None + assert dml_obj.boot_method is None + assert dml_obj.framework is None + assert dml_obj.sensitivity_params is None + assert dml_obj.boot_t_stat is None + -policy_tree = dml_irm.policy_tree(features=dml_data_irm.data.drop(columns=["y", "d"])) +def _fit_bootstrap(dml_obj): + dml_obj.fit() + dml_obj.bootstrap() def _assert_resampling_default_settings(dml_obj): @@ -84,12 +69,16 @@ def _assert_resampling_default_settings(dml_obj): @pytest.mark.ci def test_plr_defaults(): + _assert_is_none(dml_plr) + _fit_bootstrap(dml_plr) _assert_resampling_default_settings(dml_plr) assert dml_plr.score == 'partialling out' @pytest.mark.ci def test_pliv_defaults(): + _assert_is_none(dml_pliv) + _fit_bootstrap(dml_pliv) _assert_resampling_default_settings(dml_pliv) assert dml_pliv.score == 'partialling out' assert dml_pliv.partialX @@ -98,6 +87,8 @@ def test_pliv_defaults(): @pytest.mark.ci def test_irm_defaults(): + _assert_is_none(dml_irm) + _fit_bootstrap(dml_irm) _assert_resampling_default_settings(dml_irm) assert dml_irm.score == 'ATE' assert dml_irm.trimming_rule == 'truncate' @@ -109,6 +100,8 @@ def test_irm_defaults(): @pytest.mark.ci def test_iivm_defaults(): + _assert_is_none(dml_iivm) + _fit_bootstrap(dml_iivm) _assert_resampling_default_settings(dml_iivm) assert dml_iivm.score == 'LATE' assert dml_iivm.subgroups == {'always_takers': True, 'never_takers': True} @@ -119,6 +112,8 @@ def test_iivm_defaults(): @pytest.mark.ci def test_cvar_defaults(): + _assert_is_none(dml_cvar) + _fit_bootstrap(dml_cvar) _assert_resampling_default_settings(dml_cvar) assert dml_cvar.quantile == 0.5 assert dml_cvar.treatment == 1 @@ -129,6 +124,8 @@ def test_cvar_defaults(): @pytest.mark.ci def test_pq_defaults(): + _assert_is_none(dml_pq) + _fit_bootstrap(dml_pq) _assert_resampling_default_settings(dml_pq) assert dml_pq.quantile == 0.5 assert dml_pq.treatment == 1 @@ -140,6 +137,8 @@ def test_pq_defaults(): @pytest.mark.ci def test_lpq_defaults(): + _assert_is_none(dml_lpq) + _fit_bootstrap(dml_lpq) _assert_resampling_default_settings(dml_lpq) assert dml_lpq.quantile == 0.5 assert dml_lpq.treatment == 1 @@ -151,6 +150,11 @@ def test_lpq_defaults(): @pytest.mark.ci def test_qte_defaults(): + assert dml_qte.n_rep_boot is None + assert dml_qte.boot_method is None + assert dml_qte.framework is None + assert dml_qte.boot_t_stat is None + _fit_bootstrap(dml_qte) # not fix since its a differen object added in future versions _assert_resampling_default_settings(dml_qte) assert dml_qte.quantiles == 0.5 assert dml_qte.score == 'PQ' @@ -161,6 +165,8 @@ def test_qte_defaults(): @pytest.mark.ci def test_did_defaults(): + _assert_is_none(dml_did) + _fit_bootstrap(dml_did) _assert_resampling_default_settings(dml_did) assert dml_did.score == 'observational' assert dml_did.in_sample_normalization @@ -170,6 +176,8 @@ def test_did_defaults(): @pytest.mark.ci def test_did_cs_defaults(): + _assert_is_none(dml_did_cs) + _fit_bootstrap(dml_did_cs) _assert_resampling_default_settings(dml_did_cs) assert dml_did.score == 'observational' assert dml_did_cs.in_sample_normalization @@ -179,6 +187,8 @@ def test_did_cs_defaults(): @pytest.mark.ci def test_ssm_defaults(): + _assert_is_none(dml_ssm) + _fit_bootstrap(dml_ssm) _assert_resampling_default_settings(dml_ssm) assert dml_ssm.score == 'missing-at-random' assert dml_ssm.trimming_rule == 'truncate' @@ -200,6 +210,9 @@ def test_sensitivity_defaults(): @pytest.mark.ci def test_policytree_defaults(): + dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression()) + dml_irm.fit() + policy_tree = dml_irm.policy_tree(features=dml_data_irm.data.drop(columns=["y", "d"])) assert policy_tree.policy_tree.max_depth == 2 assert policy_tree.policy_tree.min_samples_leaf == 8 assert policy_tree.policy_tree.ccp_alpha == 0.01 From 87f6acc9d13c95f07b3f986d442545a94b3c2574 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Klaa=C3=9Fen?= <47529404+SvenKlaassen@users.noreply.github.com> Date: Sat, 27 Jul 2024 09:08:53 +0200 Subject: [PATCH 76/98] extend model default tests for apo and apos --- doubleml/tests/test_model_defaults.py | 56 ++++++++++++++++++++------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/doubleml/tests/test_model_defaults.py b/doubleml/tests/test_model_defaults.py index 4df50b062..b28274e24 100644 --- a/doubleml/tests/test_model_defaults.py +++ b/doubleml/tests/test_model_defaults.py @@ -1,8 +1,7 @@ import pytest import numpy as np -from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV, DoubleMLCVAR, DoubleMLPQ, \ - DoubleMLLPQ, DoubleMLQTE, DoubleMLDID, DoubleMLDIDCS, DoubleMLSSM +import doubleml as dml from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data, make_did_SZ2020, \ make_ssm_data @@ -19,19 +18,21 @@ dml_data_ssm = make_ssm_data(n_obs=2000, mar=True) # linear models -dml_plr = DoubleMLPLR(dml_data_plr, Lasso(), Lasso()) -dml_pliv = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso()) -dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression()) -dml_iivm = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression()) -dml_cvar = DoubleMLCVAR(dml_data_irm, ml_g=RandomForestRegressor(), ml_m=RandomForestClassifier()) -dml_did = DoubleMLDID(dml_data_did, Lasso(), LogisticRegression()) -dml_did_cs = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression()) -dml_ssm = DoubleMLSSM(dml_data_ssm, Lasso(), LogisticRegression(), LogisticRegression()) +dml_plr = dml.DoubleMLPLR(dml_data_plr, Lasso(), Lasso()) +dml_pliv = dml.DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso()) +dml_irm = dml.DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression()) +dml_iivm = dml.DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression()) +dml_cvar = dml.DoubleMLCVAR(dml_data_irm, ml_g=RandomForestRegressor(), ml_m=RandomForestClassifier()) +dml_did = dml.DoubleMLDID(dml_data_did, Lasso(), LogisticRegression()) +dml_did_cs = dml.DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression()) +dml_ssm = dml.DoubleMLSSM(dml_data_ssm, Lasso(), LogisticRegression(), LogisticRegression()) +dml_apo = dml.DoubleMLAPO(dml_data_irm, Lasso(), LogisticRegression(), treatment_level=0) +dml_apos = dml.DoubleMLAPOS(dml_data_irm, Lasso(), LogisticRegression(), treatment_levels=[0, 1]) # nonlinear models -dml_pq = DoubleMLPQ(dml_data_irm, ml_g=LogisticRegression(), ml_m=LogisticRegression()) -dml_lpq = DoubleMLLPQ(dml_data_iivm, ml_g=RandomForestClassifier(), ml_m=RandomForestClassifier()) -dml_qte = DoubleMLQTE(dml_data_irm, ml_g=RandomForestClassifier(), ml_m=RandomForestClassifier()) +dml_pq = dml.DoubleMLPQ(dml_data_irm, ml_g=LogisticRegression(), ml_m=LogisticRegression()) +dml_lpq = dml.DoubleMLLPQ(dml_data_iivm, ml_g=RandomForestClassifier(), ml_m=RandomForestClassifier()) +dml_qte = dml.DoubleMLQTE(dml_data_irm, ml_g=RandomForestClassifier(), ml_m=RandomForestClassifier()) def _assert_is_none(dml_obj): @@ -196,6 +197,33 @@ def test_ssm_defaults(): assert not dml_ssm.normalize_ipw +@pytest.mark.ci +def test_apo_defaults(): + _assert_is_none(dml_apo) + _fit_bootstrap(dml_apo) + _assert_resampling_default_settings(dml_apo) + assert dml_apo.score == 'APO' + assert dml_apo.trimming_rule == 'truncate' + assert dml_apo.trimming_threshold == 1e-2 + assert not dml_apo.normalize_ipw + assert set(dml_apo.weights.keys()) == set(['weights']) + assert np.array_equal(dml_apo.weights['weights'], np.ones((dml_apo._dml_data.n_obs,))) + + +@pytest.mark.ci +def test_apos_defaults(): + assert dml_apos.n_rep_boot is None + assert dml_apo.boot_method is None + assert dml_apo.framework is None + assert dml_apo.boot_t_stat is None + _fit_bootstrap(dml_qte) + assert dml_apos.score == 'APO' + assert dml_apos.trimming_rule == 'truncate' + assert dml_apos.trimming_threshold == 1e-2 + assert not dml_apos.normalize_ipw + assert np.array_equal(dml_apos.weights, np.ones((dml_apos._dml_data.n_obs,))) + + @pytest.mark.ci def test_sensitivity_defaults(): input_dict = {'cf_y': 0.03, @@ -210,7 +238,7 @@ def test_sensitivity_defaults(): @pytest.mark.ci def test_policytree_defaults(): - dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression()) + dml_irm = dml.DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression()) dml_irm.fit() policy_tree = dml_irm.policy_tree(features=dml_data_irm.data.drop(columns=["y", "d"])) assert policy_tree.policy_tree.max_depth == 2 From 1628001f4ee4b0fbda3ce517c2724b2499d26bc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Klaa=C3=9Fen?= <47529404+SvenKlaassen@users.noreply.github.com> Date: Sun, 28 Jul 2024 12:50:37 +0200 Subject: [PATCH 77/98] fix model default test --- doubleml/tests/test_model_defaults.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doubleml/tests/test_model_defaults.py b/doubleml/tests/test_model_defaults.py index b28274e24..8d7234d62 100644 --- a/doubleml/tests/test_model_defaults.py +++ b/doubleml/tests/test_model_defaults.py @@ -213,9 +213,9 @@ def test_apo_defaults(): @pytest.mark.ci def test_apos_defaults(): assert dml_apos.n_rep_boot is None - assert dml_apo.boot_method is None - assert dml_apo.framework is None - assert dml_apo.boot_t_stat is None + assert dml_apos.boot_method is None + assert dml_apos.framework is None + assert dml_apos.boot_t_stat is None _fit_bootstrap(dml_qte) assert dml_apos.score == 'APO' assert dml_apos.trimming_rule == 'truncate' From f24d13a46e2720e87216883b0485baf40e5988b4 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Wed, 31 Jul 2024 06:49:19 +0200 Subject: [PATCH 78/98] add sensitivity_elements property to apos --- doubleml/irm/apos.py | 13 +++++++++++++ doubleml/irm/tests/test_apos.py | 2 ++ 2 files changed, 15 insertions(+) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index c0fabe39b..9104ef564 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -283,6 +283,19 @@ def modellist(self): """ return self._modellist + @property + def sensitivity_elements(self): + """ + Values of the sensitivity components after calling :meth:`fit`; + If available (e.g., PLR, IRM) a dictionary with entries ``sigma2``, ``nu2``, ``psi_sigma2``, ``psi_nu2`` + and ``riesz_rep``. + """ + if self._framework is None: + sensitivity_elements = None + else: + sensitivity_elements = self._framework.sensitivity_elements + return sensitivity_elements + @property def summary(self): """ diff --git a/doubleml/irm/tests/test_apos.py b/doubleml/irm/tests/test_apos.py index 6f8da9b90..ce1fb48f5 100644 --- a/doubleml/irm/tests/test_apos.py +++ b/doubleml/irm/tests/test_apos.py @@ -37,6 +37,7 @@ def test_apo_properties(): assert dml_obj.n_rep_boot is None assert dml_obj.boot_t_stat is None assert dml_obj.boot_method is None + assert dml_obj.sensitivity_elements is None # check properties after fit dml_obj.fit() @@ -49,6 +50,7 @@ def test_apo_properties(): assert dml_obj.n_rep_boot is None assert dml_obj.boot_t_stat is None assert dml_obj.boot_method is None + assert dml_obj.sensitivity_elements is not None # check properties after bootstrap dml_obj.bootstrap() From ac38ba77d7216006113195b86ad3c93942e4e518 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Wed, 31 Jul 2024 06:54:36 +0200 Subject: [PATCH 79/98] add sensitivity_params and sensitivity_analysis to apos --- doubleml/irm/apos.py | 62 ++++++++++++++++++++++ doubleml/irm/tests/test_apos.py | 6 +++ doubleml/irm/tests/test_apos_exceptions.py | 3 ++ 3 files changed, 71 insertions(+) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index 9104ef564..d8283c1fb 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -296,6 +296,19 @@ def sensitivity_elements(self): sensitivity_elements = self._framework.sensitivity_elements return sensitivity_elements + @property + def sensitivity_params(self): + """ + Values of the sensitivity parameters after calling :meth:`sesitivity_analysis`; + If available (e.g., PLR, IRM) a dictionary with entries ``theta``, ``se``, ``ci``, ``rv`` + and ``rva``. + """ + if self._framework is None: + sensitivity_params = None + else: + sensitivity_params = self._framework.sensitivity_params + return sensitivity_params + @property def summary(self): """ @@ -413,6 +426,55 @@ def bootstrap(self, method='normal', n_rep_boot=500): return self + def sensitivity_analysis(self, cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95, null_hypothesis=0.0): + """ + Performs a sensitivity analysis to account for unobserved confounders. + + The evaluated scenario is stored as a dictionary in the property ``sensitivity_params``. + + Parameters + ---------- + cf_y : float + Percentage of the residual variation of the outcome explained by latent/confounding variables. + Default is ``0.03``. + + cf_d : float + Percentage gains in the variation of the Riesz representer generated by latent/confounding variables. + Default is ``0.03``. + + rho : float + The correlation between the differences in short and long representations in the main regression and + Riesz representer. Has to be in [-1,1]. The absolute value determines the adversarial strength of the + confounding (maximizes at 1.0). + Default is ``1.0``. + + level : float + The confidence level. + Default is ``0.95``. + + null_hypothesis : float or numpy.ndarray + Null hypothesis for the effect. Determines the robustness values. + If it is a single float uses the same null hypothesis for all estimated parameters. + Else the array has to be of shape (n_coefs,). + Default is ``0.0``. + + Returns + ------- + self : object + """ + + if self._framework is None: + raise ValueError('Apply fit() before sensitivity_analysis().') + self._framework.sensitivity_analysis( + cf_y=cf_y, + cf_d=cf_d, + rho=rho, + level=level, + null_hypothesis=null_hypothesis + ) + + return self + def draw_sample_splitting(self): """ Draw sample splitting for DoubleML models. diff --git a/doubleml/irm/tests/test_apos.py b/doubleml/irm/tests/test_apos.py index ce1fb48f5..9ebc7591e 100644 --- a/doubleml/irm/tests/test_apos.py +++ b/doubleml/irm/tests/test_apos.py @@ -38,6 +38,7 @@ def test_apo_properties(): assert dml_obj.boot_t_stat is None assert dml_obj.boot_method is None assert dml_obj.sensitivity_elements is None + assert dml_obj.sensitivity_params is None # check properties after fit dml_obj.fit() @@ -51,6 +52,7 @@ def test_apo_properties(): assert dml_obj.boot_t_stat is None assert dml_obj.boot_method is None assert dml_obj.sensitivity_elements is not None + assert dml_obj.sensitivity_params is None # check properties after bootstrap dml_obj.bootstrap() @@ -58,6 +60,10 @@ def test_apo_properties(): assert dml_obj.boot_t_stat is not None assert dml_obj.boot_method is not None + # check properties after sensitivity analysis + dml_obj.sensitivity_analysis() + assert dml_obj.sensitivity_params is not None + @pytest.fixture(scope='module', params=[[LinearRegression(), diff --git a/doubleml/irm/tests/test_apos_exceptions.py b/doubleml/irm/tests/test_apos_exceptions.py index e9d972f13..c6bee072c 100644 --- a/doubleml/irm/tests/test_apos_exceptions.py +++ b/doubleml/irm/tests/test_apos_exceptions.py @@ -98,6 +98,9 @@ def test_apos_exception_properties_and_methods(): msg = r'Apply fit\(\) before bootstrap\(\).' with pytest.raises(ValueError, match=msg): dml_obj.bootstrap() + msg = r'Apply fit\(\) before sensitivity_analysis\(\).' + with pytest.raises(ValueError, match=msg): + dml_obj.sensitivity_analysis() @pytest.mark.ci From 8f323a59001f8281f7c0105345128388ab1f8b77 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Wed, 31 Jul 2024 07:05:22 +0200 Subject: [PATCH 80/98] add sensitivity_plot to apos --- doubleml/irm/apos.py | 71 ++++++++++++++++++++++ doubleml/irm/tests/test_apos_exceptions.py | 3 + 2 files changed, 74 insertions(+) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index d8283c1fb..e04f55924 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -475,6 +475,77 @@ def sensitivity_analysis(self, cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95, null_h return self + def sensitivity_plot(self, idx_treatment=0, value='theta', rho=1.0, level=0.95, null_hypothesis=0.0, + include_scenario=True, benchmarks=None, fill=True, grid_bounds=(0.15, 0.15), grid_size=100): + """ + Contour plot of the sensivity with respect to latent/confounding variables. + + Parameters + ---------- + idx_treatment : int + Index of the treatment to perform the sensitivity analysis. + Default is ``0``. + + value : str + Determines which contours to plot. Valid values are ``'theta'`` (refers to the bounds) + and ``'ci'`` (refers to the bounds including statistical uncertainty). + Default is ``'theta'``. + + rho: float + The correlation between the differences in short and long representations in the main regression and + Riesz representer. Has to be in [-1,1]. The absolute value determines the adversarial strength of the + confounding (maximizes at 1.0). + Default is ``1.0``. + + level : float + The confidence level. + Default is ``0.95``. + + null_hypothesis : float + Null hypothesis for the effect. Determines the direction of the contour lines. + + include_scenario : bool + Indicates whether to highlight the scenario from the call of :meth:`sensitivity_analysis`. + Default is ``True``. + + benchmarks : dict or None + Dictionary of benchmarks to be included in the plot. The keys are ``cf_y``, ``cf_d`` and ``name``. + Default is ``None``. + + fill : bool + Indicates whether to use a heatmap style or only contour lines. + Default is ``True``. + + grid_bounds : tuple + Determines the evaluation bounds of the grid for ``cf_d`` and ``cf_y``. Has to contain two floats in [0, 1). + Default is ``(0.15, 0.15)``. + + grid_size : int + Determines the number of evaluation points of the grid. + Default is ``100``. + + Returns + ------- + fig : object + Plotly figure of the sensitivity contours. + """ + if self._framework is None: + raise ValueError('Apply fit() before sensitivity_plot().') + fig = self._framework.sensitivity_plot( + idx_treatment=idx_treatment, + value=value, + rho=rho, + level=level, + null_hypothesis=null_hypothesis, + include_scenario=include_scenario, + benchmarks=benchmarks, + fill=fill, + grid_bounds=grid_bounds, + grid_size=grid_size + ) + + return fig + def draw_sample_splitting(self): """ Draw sample splitting for DoubleML models. diff --git a/doubleml/irm/tests/test_apos_exceptions.py b/doubleml/irm/tests/test_apos_exceptions.py index c6bee072c..9d17447b7 100644 --- a/doubleml/irm/tests/test_apos_exceptions.py +++ b/doubleml/irm/tests/test_apos_exceptions.py @@ -101,6 +101,9 @@ def test_apos_exception_properties_and_methods(): msg = r'Apply fit\(\) before sensitivity_analysis\(\).' with pytest.raises(ValueError, match=msg): dml_obj.sensitivity_analysis() + msg = r'Apply fit\(\) before sensitivity_plot\(\).' + with pytest.raises(ValueError, match=msg): + dml_obj.sensitivity_plot() @pytest.mark.ci From 706004c7ac42e810469c6d89ba8a2e19d24d6e3f Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Wed, 31 Jul 2024 07:30:11 +0200 Subject: [PATCH 81/98] Update test_return_types.py --- doubleml/tests/test_return_types.py | 141 +++++++++++----------------- 1 file changed, 56 insertions(+), 85 deletions(-) diff --git a/doubleml/tests/test_return_types.py b/doubleml/tests/test_return_types.py index d76f2d147..79b5fc933 100644 --- a/doubleml/tests/test_return_types.py +++ b/doubleml/tests/test_return_types.py @@ -18,6 +18,7 @@ DoubleMLPolicyTree, DoubleMLFramework, DoubleMLSSM, + DoubleMLAPO ) from doubleml.datasets import ( make_plr_CCDDHNR2018, @@ -61,6 +62,7 @@ dml_did_cs = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression()) dml_did_cs_binary_outcome = DoubleMLDIDCS(dml_data_did_cs_binary_outcome, LogisticRegression(), LogisticRegression()) dml_ssm = DoubleMLSSM(dml_data_ssm, ml_g=Lasso(), ml_m=LogisticRegression(), ml_pi=LogisticRegression()) +dml_apo = DoubleMLAPO(dml_data_irm, Lasso(), LogisticRegression(), treatment_level=0) @pytest.mark.ci @@ -77,7 +79,8 @@ (dml_did_binary_outcome, DoubleMLDID), (dml_did_cs, DoubleMLDIDCS), (dml_did_cs_binary_outcome, DoubleMLDIDCS), - (dml_ssm, DoubleMLSSM)]) + (dml_ssm, DoubleMLSSM), + (dml_apo, DoubleMLAPO)]) def test_return_types(dml_obj, cls): # ToDo: A second test case with multiple treatment variables would be helpful assert isinstance(dml_obj.__str__(), str) @@ -168,11 +171,16 @@ def test_return_types(dml_obj, cls): ssm_obj.fit() ssm_obj.bootstrap(n_rep_boot=n_rep_boot) +apo_obj = DoubleMLAPO(dml_data_irm, Lasso(), LogisticRegression(), treatment_level=0, + n_rep=n_rep, n_folds=n_folds) +apo_obj.fit() +apo_obj.bootstrap(n_rep_boot=n_rep_boot) + @pytest.mark.ci @pytest.mark.parametrize('dml_obj', [plr_obj, pliv_obj, irm_obj, iivm_obj, cvar_obj, pq_obj, lpq_obj, - did_obj, did_cs_obj]) + did_obj, did_cs_obj, ssm_obj, apo_obj]) def test_property_types_and_shapes(dml_obj): # not checked: learner, learner_names, params, params_names, score # already checked: summary @@ -300,6 +308,10 @@ def test_stored_predictions(): assert ssm_obj.predictions['ml_m'].shape == (n_obs, n_rep, n_treat) assert ssm_obj.predictions['ml_pi'].shape == (n_obs, n_rep, n_treat) + assert apo_obj.predictions['ml_g0'].shape == (n_obs, n_rep, n_treat) + assert apo_obj.predictions['ml_g1'].shape == (n_obs, n_rep, n_treat) + assert apo_obj.predictions['ml_m'].shape == (n_obs, n_rep, n_treat) + @pytest.mark.ci def test_stored_nuisance_targets(): @@ -347,6 +359,10 @@ def test_stored_nuisance_targets(): assert ssm_obj.nuisance_targets['ml_m'].shape == (n_obs, n_rep, n_treat) assert ssm_obj.nuisance_targets['ml_pi'].shape == (n_obs, n_rep, n_treat) + assert apo_obj.nuisance_targets['ml_g0'].shape == (n_obs, n_rep, n_treat) + assert apo_obj.nuisance_targets['ml_g1'].shape == (n_obs, n_rep, n_treat) + assert apo_obj.nuisance_targets['ml_m'].shape == (n_obs, n_rep, n_treat) + @pytest.mark.ci def test_nuisance_loss(): @@ -394,100 +410,55 @@ def test_nuisance_loss(): assert ssm_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat) assert ssm_obj.nuisance_loss['ml_pi'].shape == (n_rep, n_treat) + assert apo_obj.nuisance_loss['ml_g0'].shape == (n_rep, n_treat) + assert apo_obj.nuisance_loss['ml_g1'].shape == (n_rep, n_treat) + assert apo_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat) -@pytest.mark.ci -def test_sensitivity(): - var_keys = ['sigma2', 'nu2'] - score_keys = ['psi_sigma2', 'psi_nu2', 'riesz_rep'] - benchmarks = {'cf_y': [0.1, 0.2], 'cf_d': [0.15, 0.2], 'name': ["test1", "test2"]} +def _test_sensitivity_return_types(dml_obj, n_rep, n_treat, benchmarking_set): + assert isinstance(dml_obj.sensitivity_elements, dict) + for key in ['sigma2', 'nu2']: + assert isinstance(dml_obj.sensitivity_elements[key], np.ndarray) + assert dml_obj.sensitivity_elements[key].shape == (1, n_rep, n_treat) + for key in ['psi_sigma2', 'psi_nu2', 'riesz_rep']: + assert isinstance(dml_obj.sensitivity_elements[key], np.ndarray) + assert dml_obj.sensitivity_elements[key].shape == (n_obs, n_rep, n_treat) - # PLR - assert isinstance(plr_obj.sensitivity_elements, dict) - for key in var_keys: - assert isinstance(plr_obj.sensitivity_elements[key], np.ndarray) - assert plr_obj.sensitivity_elements[key].shape == (1, n_rep, n_treat) - for key in score_keys: - assert isinstance(plr_obj.sensitivity_elements[key], np.ndarray) - assert plr_obj.sensitivity_elements[key].shape == (n_obs, n_rep, n_treat) - - assert isinstance(plr_obj.sensitivity_summary, str) - plr_obj.sensitivity_analysis() - assert isinstance(plr_obj.sensitivity_summary, str) - assert isinstance(plr_obj.sensitivity_plot(), plotly.graph_objs._figure.Figure) - assert isinstance(plr_obj.sensitivity_plot(value='ci', benchmarks=benchmarks), plotly.graph_objs._figure.Figure) - assert isinstance(plr_obj.framework._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95), dict) - assert isinstance( - plr_obj.framework._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0), - tuple) - plr_benchmark = plr_obj.sensitivity_benchmark(benchmarking_set=["X1"]) - assert isinstance(plr_benchmark, pd.DataFrame) + assert isinstance(dml_obj.sensitivity_summary, str) + dml_obj.sensitivity_analysis() + assert isinstance(dml_obj.sensitivity_summary, str) + assert isinstance(dml_obj.sensitivity_plot(), plotly.graph_objs._figure.Figure) + benchmarks = {'cf_y': [0.1, 0.2], 'cf_d': [0.15, 0.2], 'name': ["test1", "test2"]} + assert isinstance(dml_obj.sensitivity_plot(value='ci', benchmarks=benchmarks), plotly.graph_objs._figure.Figure) - # DID - assert isinstance(irm_obj.sensitivity_elements, dict) - for key in var_keys: - assert isinstance(irm_obj.sensitivity_elements[key], np.ndarray) - assert irm_obj.sensitivity_elements[key].shape == (1, n_rep, n_treat) - for key in score_keys: - assert isinstance(irm_obj.sensitivity_elements[key], np.ndarray) - assert irm_obj.sensitivity_elements[key].shape == (n_obs, n_rep, n_treat) - - assert isinstance(irm_obj.sensitivity_summary, str) - irm_obj.sensitivity_analysis() - assert isinstance(irm_obj.sensitivity_summary, str) - assert isinstance(irm_obj.sensitivity_plot(), plotly.graph_objs._figure.Figure) - assert isinstance(irm_obj.sensitivity_plot(value='ci', benchmarks=benchmarks), plotly.graph_objs._figure.Figure) - assert isinstance(irm_obj.framework._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95), dict) + assert isinstance(dml_obj.framework._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95), dict) assert isinstance( - irm_obj.framework._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0), + dml_obj.framework._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0), tuple ) - irm_benchmark = irm_obj.sensitivity_benchmark(benchmarking_set=["X1"]) - assert isinstance(irm_benchmark, pd.DataFrame) + benchmark = dml_obj.sensitivity_benchmark(benchmarking_set=benchmarking_set) + assert isinstance(benchmark, pd.DataFrame) + + return + + +@pytest.mark.ci +def test_sensitivity(): + + # PLR + _test_sensitivity_return_types(plr_obj, n_rep, n_treat, benchmarking_set=["X1"]) + + # IRM + _test_sensitivity_return_types(irm_obj, n_rep, n_treat, benchmarking_set=["X1"]) # DID - assert isinstance(did_obj.sensitivity_elements, dict) - for key in var_keys: - assert isinstance(did_obj.sensitivity_elements[key], np.ndarray) - assert did_obj.sensitivity_elements[key].shape == (1, n_rep, n_treat) - for key in score_keys: - assert isinstance(did_obj.sensitivity_elements[key], np.ndarray) - assert did_obj.sensitivity_elements[key].shape == (n_obs, n_rep, n_treat) - - assert isinstance(did_obj.sensitivity_summary, str) - did_obj.sensitivity_analysis() - assert isinstance(did_obj.sensitivity_summary, str) - assert isinstance(did_obj.sensitivity_plot(), plotly.graph_objs._figure.Figure) - assert isinstance(did_obj.sensitivity_plot(value='ci', benchmarks=benchmarks), plotly.graph_objs._figure.Figure) - assert isinstance(did_obj.framework._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95), dict) - assert isinstance( - did_obj.framework._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0), - tuple - ) - did_benchmark = did_obj.sensitivity_benchmark(benchmarking_set=['Z1']) - assert isinstance(did_benchmark, pd.DataFrame) + _test_sensitivity_return_types(did_obj, n_rep, n_treat, benchmarking_set=["Z1"]) # DIDCS - assert isinstance(did_cs_obj.sensitivity_elements, dict) - for key in var_keys: - assert isinstance(did_cs_obj.sensitivity_elements[key], np.ndarray) - assert did_cs_obj.sensitivity_elements[key].shape == (1, n_rep, n_treat) - for key in score_keys: - assert isinstance(did_cs_obj.sensitivity_elements[key], np.ndarray) - assert did_cs_obj.sensitivity_elements[key].shape == (n_obs, n_rep, n_treat) - - assert isinstance(did_cs_obj.sensitivity_summary, str) - did_cs_obj.sensitivity_analysis() - assert isinstance(did_cs_obj.sensitivity_summary, str) - assert isinstance(did_cs_obj.sensitivity_plot(), plotly.graph_objs._figure.Figure) - assert isinstance(did_cs_obj.sensitivity_plot(value='ci', benchmarks=benchmarks), plotly.graph_objs._figure.Figure) - assert isinstance(did_cs_obj.framework._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95), dict) - assert isinstance( - did_cs_obj.framework._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0), - tuple - ) - did_cs_benchmark = did_cs_obj.sensitivity_benchmark(benchmarking_set=['Z1']) - assert isinstance(did_cs_benchmark, pd.DataFrame) + _test_sensitivity_return_types(did_cs_obj, n_rep, n_treat, benchmarking_set=["Z1"]) + + # APO + _test_sensitivity_return_types(apo_obj, n_rep, n_treat, benchmarking_set=["X1"]) @pytest.mark.ci From fb2dfa63f177d0cf78a6f7fd50a5cf85569ba06b Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Wed, 31 Jul 2024 07:32:57 +0200 Subject: [PATCH 82/98] fix format --- doubleml/tests/test_return_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doubleml/tests/test_return_types.py b/doubleml/tests/test_return_types.py index 79b5fc933..a9014d089 100644 --- a/doubleml/tests/test_return_types.py +++ b/doubleml/tests/test_return_types.py @@ -172,7 +172,7 @@ def test_return_types(dml_obj, cls): ssm_obj.bootstrap(n_rep_boot=n_rep_boot) apo_obj = DoubleMLAPO(dml_data_irm, Lasso(), LogisticRegression(), treatment_level=0, - n_rep=n_rep, n_folds=n_folds) + n_rep=n_rep, n_folds=n_folds) apo_obj.fit() apo_obj.bootstrap(n_rep_boot=n_rep_boot) From f13ba784ea034713dad05c86fdd9cdc4e711b33c Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Wed, 31 Jul 2024 08:01:24 +0200 Subject: [PATCH 83/98] add sensitivity_summary to framework obj --- doubleml/double_ml_framework.py | 49 ++++++++++++++++++++ doubleml/tests/test_framework_sensitivity.py | 23 +++++++++ 2 files changed, 72 insertions(+) diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py index 633d6464e..d6c54042e 100644 --- a/doubleml/double_ml_framework.py +++ b/doubleml/double_ml_framework.py @@ -225,6 +225,55 @@ def summary(self): self.pvals, ci, self._treatment_names) return df_summary + @property + def sensitivity_summary(self): + """ + Returns a summary for the sensitivity analysis after calling :meth:`sensitivity_analysis`. + + Returns + ------- + res : str + Summary for the sensitivity analysis. + """ + header = '================== Sensitivity Analysis ==================\n' + if self.sensitivity_params is None: + res = header + 'Apply sensitivity_analysis() to generate sensitivity_summary.' + else: + sig_level = f'Significance Level: level={self.sensitivity_params["input"]["level"]}\n' + scenario_params = f'Sensitivity parameters: cf_y={self.sensitivity_params["input"]["cf_y"]}; ' \ + f'cf_d={self.sensitivity_params["input"]["cf_d"]}, ' \ + f'rho={self.sensitivity_params["input"]["rho"]}' + + theta_and_ci_col_names = ['CI lower', 'theta lower', ' theta', 'theta upper', 'CI upper'] + theta_and_ci = np.transpose(np.vstack((self.sensitivity_params['ci']['lower'], + self.sensitivity_params['theta']['lower'], + self.thetas, + self.sensitivity_params['theta']['upper'], + self.sensitivity_params['ci']['upper']))) + df_theta_and_ci = pd.DataFrame(theta_and_ci, + columns=theta_and_ci_col_names, + index=self.treatment_names) + theta_and_ci_summary = str(df_theta_and_ci) + + rvs_col_names = ['H_0', 'RV (%)', 'RVa (%)'] + rvs = np.transpose(np.vstack((self.sensitivity_params['rv'], + self.sensitivity_params['rva']))) * 100 + + df_rvs = pd.DataFrame(np.column_stack((self.sensitivity_params["input"]["null_hypothesis"], rvs)), + columns=rvs_col_names, + index=self.treatment_names) + rvs_summary = str(df_rvs) + + res = header + \ + '\n------------------ Scenario ------------------\n' + \ + sig_level + scenario_params + '\n' + \ + '\n------------------ Bounds with CI ------------------\n' + \ + theta_and_ci_summary + '\n' + \ + '\n------------------ Robustness Values ------------------\n' + \ + rvs_summary + + return res + def __add__(self, other): if isinstance(other, DoubleMLFramework): diff --git a/doubleml/tests/test_framework_sensitivity.py b/doubleml/tests/test_framework_sensitivity.py index 5b1be8bd5..044d89d22 100644 --- a/doubleml/tests/test_framework_sensitivity.py +++ b/doubleml/tests/test_framework_sensitivity.py @@ -43,6 +43,7 @@ def dml_framework_sensitivity_fixture(n_rep, generate_data_simple): 'dml_obj': dml_irm_obj, 'dml_obj_2': dml_irm_obj_2, 'dml_framework_obj': dml_framework_obj, + 'dml_framework_obj_2': dml_framework_obj_2, 'dml_framework_obj_add_obj': dml_framework_obj_add_obj, 'dml_framework_obj_sub_obj': dml_framework_obj_sub_obj, 'dml_framework_obj_mul_obj': dml_framework_obj_mul_obj, @@ -59,6 +60,7 @@ def test_dml_framework_sensitivity_shapes(dml_framework_sensitivity_fixture): n_obs = dml_framework_sensitivity_fixture['dml_framework_obj'].n_obs object_list = ['dml_framework_obj', + 'dml_framework_obj_2', 'dml_framework_obj_add_obj', 'dml_framework_obj_sub_obj', 'dml_framework_obj_mul_obj'] @@ -81,3 +83,24 @@ def test_dml_framework_sensitivity_shapes(dml_framework_sensitivity_fixture): for key in score_keys: assert dml_framework_sensitivity_fixture['dml_framework_obj_concat']._sensitivity_elements[key].shape == \ (n_obs, 2, n_rep) + + +@pytest.mark.ci +def test_dml_framework_sensitivity_summary(dml_framework_sensitivity_fixture): + # summary without sensitivity analysis + sensitivity_summary = dml_framework_sensitivity_fixture['dml_framework_obj_2'].sensitivity_summary + substring = 'Apply sensitivity_analysis() to generate sensitivity_summary.' + assert substring in sensitivity_summary + + # summary with sensitivity analysis + sensitivity_summary = dml_framework_sensitivity_fixture['dml_framework_obj'].sensitivity_summary + assert isinstance(sensitivity_summary, str) + substrings = [ + '\n------------------ Scenario ------------------\n', + '\n------------------ Bounds with CI ------------------\n', + '\n------------------ Robustness Values ------------------\n', + 'Significance Level: level=', + 'Sensitivity parameters: cf_y=' + ] + for substring in substrings: + assert substring in sensitivity_summary From e0715c8ceb2d8c63c20104146c920d8c8d831148 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Wed, 31 Jul 2024 08:14:14 +0200 Subject: [PATCH 84/98] move sensitivity_summary to DoubleMLFramework class --- doubleml/double_ml.py | 41 +++------------------- doubleml/irm/apos.py | 16 +++++++++ doubleml/irm/tests/test_apos_exceptions.py | 3 ++ doubleml/tests/test_exceptions.py | 7 ++++ 4 files changed, 30 insertions(+), 37 deletions(-) diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index 4bb88dfba..71f8b4418 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -1436,44 +1436,11 @@ def sensitivity_summary(self): res : str Summary for the sensitivity analysis. """ - header = '================== Sensitivity Analysis ==================\n' - if self.sensitivity_params is None: - res = header + 'Apply sensitivity_analysis() to generate sensitivity_summary.' + if self._framework is None: + raise ValueError('Apply sensitivity_analysis() before sensitivity_summary.') else: - sig_level = f'Significance Level: level={self.sensitivity_params["input"]["level"]}\n' - scenario_params = f'Sensitivity parameters: cf_y={self.sensitivity_params["input"]["cf_y"]}; ' \ - f'cf_d={self.sensitivity_params["input"]["cf_d"]}, ' \ - f'rho={self.sensitivity_params["input"]["rho"]}' - - theta_and_ci_col_names = ['CI lower', 'theta lower', ' theta', 'theta upper', 'CI upper'] - theta_and_ci = np.transpose(np.vstack((self.sensitivity_params['ci']['lower'], - self.sensitivity_params['theta']['lower'], - self.coef, - self.sensitivity_params['theta']['upper'], - self.sensitivity_params['ci']['upper']))) - df_theta_and_ci = pd.DataFrame(theta_and_ci, - columns=theta_and_ci_col_names, - index=self._dml_data.d_cols) - theta_and_ci_summary = str(df_theta_and_ci) - - rvs_col_names = ['H_0', 'RV (%)', 'RVa (%)'] - rvs = np.transpose(np.vstack((self.sensitivity_params['rv'], - self.sensitivity_params['rva']))) * 100 - - df_rvs = pd.DataFrame(np.column_stack((self.sensitivity_params["input"]["null_hypothesis"], rvs)), - columns=rvs_col_names, - index=self._dml_data.d_cols) - rvs_summary = str(df_rvs) - - res = header + \ - '\n------------------ Scenario ------------------\n' + \ - sig_level + scenario_params + '\n' + \ - '\n------------------ Bounds with CI ------------------\n' + \ - theta_and_ci_summary + '\n' + \ - '\n------------------ Robustness Values ------------------\n' + \ - rvs_summary - - return res + sensitivity_summary = self._framework.sensitivity_summary + return sensitivity_summary def sensitivity_plot(self, idx_treatment=0, value='theta', rho=1.0, level=0.95, null_hypothesis=0.0, include_scenario=True, benchmarks=None, fill=True, grid_bounds=(0.15, 0.15), grid_size=100): diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index e04f55924..c44198bc9 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -323,6 +323,22 @@ def summary(self): self.pval, ci, self._treatment_levels) return df_summary + @property + def sensitivity_summary(self): + """ + Returns a summary for the sensitivity analysis after calling :meth:`sensitivity_analysis`. + + Returns + ------- + res : str + Summary for the sensitivity analysis. + """ + if self._framework is None: + raise ValueError('Apply sensitivity_analysis() before sensitivity_summary.') + else: + sensitivity_summary = self._framework.sensitivity_summary + return sensitivity_summary + def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_models=False, external_predictions=None): """ Estimate DoubleMLAPOS models. diff --git a/doubleml/irm/tests/test_apos_exceptions.py b/doubleml/irm/tests/test_apos_exceptions.py index 9d17447b7..7b3c8bdb0 100644 --- a/doubleml/irm/tests/test_apos_exceptions.py +++ b/doubleml/irm/tests/test_apos_exceptions.py @@ -104,6 +104,9 @@ def test_apos_exception_properties_and_methods(): msg = r'Apply fit\(\) before sensitivity_plot\(\).' with pytest.raises(ValueError, match=msg): dml_obj.sensitivity_plot() + msg = r'Apply sensitivity_analysis\(\) before sensitivity_summary.' + with pytest.raises(ValueError, match=msg): + dml_obj.sensitivity_summary @pytest.mark.ci diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py index b8b690f34..97b8dac41 100644 --- a/doubleml/tests/test_exceptions.py +++ b/doubleml/tests/test_exceptions.py @@ -1107,6 +1107,13 @@ def test_doubleml_sensitivity_inputs(): dml_irm.sensitivity_analysis() +def test_doubleml_sensitivity_summary(): + dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1) + msg = r'Apply sensitivity_analysis\(\) before sensitivity_summary.' + with pytest.raises(ValueError, match=msg): + _ = dml_irm.sensitivity_summary() + + @pytest.mark.ci def test_doubleml_sensitivity_benchmark(): dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1) From 04fae5ed22db07ae9dd5c427e98d796471a270e2 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Wed, 31 Jul 2024 09:57:55 +0200 Subject: [PATCH 85/98] fix gain statistics for multiple treatments --- doubleml/utils/gain_statistics.py | 4 ++-- .../tests/test_exceptions_gain_statistics.py | 24 +++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/doubleml/utils/gain_statistics.py b/doubleml/utils/gain_statistics.py index 3c50d084a..5a05e1b2a 100644 --- a/doubleml/utils/gain_statistics.py +++ b/doubleml/utils/gain_statistics.py @@ -56,7 +56,7 @@ def gain_statistics(dml_long, dml_short): if not isinstance(dml_short.all_coef, np.ndarray): raise TypeError("dml_short.all_coef does not contain the necessary coefficients. Expected numpy.ndarray.") - expected_shape = (dml_long.sensitivity_elements['sigma2'].shape[2], dml_long.sensitivity_elements['sigma2'].shape[1]) + expected_shape = (dml_long.sensitivity_elements['sigma2'].shape[1], dml_long.sensitivity_elements['sigma2'].shape[2]) if dml_long.all_coef.shape != expected_shape: raise ValueError("dml_long.all_coef does not contain the necessary coefficients. Expected shape: " + str(expected_shape)) @@ -83,7 +83,7 @@ def gain_statistics(dml_long, dml_short): cf_d_benchmark = np.median(all_cf_d_benchmark, axis=0) # change in estimates (slightly different to paper) - all_delta_theta = np.transpose(dml_short.all_coef - dml_long.all_coef) + all_delta_theta = dml_short.all_coef - dml_long.all_coef delta_theta = np.median(all_delta_theta, axis=0) # degree of adversity diff --git a/doubleml/utils/tests/test_exceptions_gain_statistics.py b/doubleml/utils/tests/test_exceptions_gain_statistics.py index 805a84ed0..c4f3d3af3 100644 --- a/doubleml/utils/tests/test_exceptions_gain_statistics.py +++ b/doubleml/utils/tests/test_exceptions_gain_statistics.py @@ -22,13 +22,13 @@ def test_doubleml_exception_data(): 'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)), 'nu2': np.random.normal(size=(n_obs, n_rep, n_coef)) }, - all_coef=np.random.normal(size=(n_coef, n_rep)) + all_coef=np.random.normal(size=(n_rep, n_coef)) ) # incorrect types dml_incorrect = test_dml_class( sensitivity_elements=np.random.normal(size=(n_obs, n_rep, n_coef)), - all_coef=np.random.normal(size=(n_coef, n_rep)) + all_coef=np.random.normal(size=(n_rep, n_coef)) ) msg = r"dml_long does not contain the necessary sensitivity elements\. Expected dict for dml_long\.sensitivity_elements\." with pytest.raises(TypeError, match=msg): @@ -43,7 +43,7 @@ def test_doubleml_exception_data(): sensitivity_elements={ 'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)), }, - all_coef=np.random.normal(size=(n_coef, n_rep)) + all_coef=np.random.normal(size=(n_rep, n_coef)) ) msg = r"dml_long does not contain the necessary sensitivity elements\. Required keys are: \['sigma2', 'nu2'\]" with pytest.raises(ValueError, match=msg): @@ -58,7 +58,7 @@ def test_doubleml_exception_data(): 'sigma2': {}, 'nu2': np.random.normal(size=(n_obs, n_rep, n_coef)) }, - all_coef=np.random.normal(size=(n_coef, n_rep)) + all_coef=np.random.normal(size=(n_rep, n_coef)) ) msg = r"dml_long does not contain the necessary sensitivity elements\. Expected numpy\.ndarray for key sigma2\." with pytest.raises(TypeError, match=msg): @@ -72,7 +72,7 @@ def test_doubleml_exception_data(): 'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)), 'nu2': {} }, - all_coef=np.random.normal(size=(n_coef, n_rep)) + all_coef=np.random.normal(size=(n_rep, n_coef)) ) msg = r"dml_long does not contain the necessary sensitivity elements\. Expected numpy\.ndarray for key nu2\." with pytest.raises(TypeError, match=msg): @@ -87,7 +87,7 @@ def test_doubleml_exception_data(): 'sigma2': np.random.normal(size=(n_obs + 1, n_rep, n_coef)), 'nu2': np.random.normal(size=(n_obs, n_rep, n_coef)) }, - all_coef=np.random.normal(size=(n_coef, n_rep)) + all_coef=np.random.normal(size=(n_rep, n_coef)) ) msg = (r"dml_long does not contain the necessary sensitivity elements\. " r"Expected 3 dimensions of shape \(1, n_coef, n_rep\) for key sigma2\.") @@ -103,7 +103,7 @@ def test_doubleml_exception_data(): 'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)), 'nu2': np.random.normal(size=(n_obs + 1, n_rep, n_coef)) }, - all_coef=np.random.normal(size=(n_coef, n_rep)) + all_coef=np.random.normal(size=(n_rep, n_coef)) ) msg = (r"dml_long does not contain the necessary sensitivity elements\. " r"Expected 3 dimensions of shape \(1, n_coef, n_rep\) for key nu2\.") @@ -120,7 +120,7 @@ def test_doubleml_exception_data(): 'sigma2': np.random.normal(size=(n_obs, n_rep + 1, n_coef)), 'nu2': np.random.normal(size=(n_obs, n_rep, n_coef)) }, - all_coef=np.random.normal(size=(n_coef, n_rep)) + all_coef=np.random.normal(size=(n_rep, n_coef)) ) msg = r"dml_long and dml_short do not contain the same shape of sensitivity elements\. " msg += r"Shapes of sigma2 are: \(1, 4, 5\) and \(1, 3, 5\)" @@ -136,7 +136,7 @@ def test_doubleml_exception_data(): 'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)), 'nu2': np.random.normal(size=(n_obs, n_rep + 1, n_coef)) }, - all_coef=np.random.normal(size=(n_coef, n_rep)) + all_coef=np.random.normal(size=(n_rep, n_coef)) ) msg = r"dml_long and dml_short do not contain the same shape of sensitivity elements\. " msg += r"Shapes of nu2 are: \(1, 4, 5\) and \(1, 3, 5\)" @@ -168,11 +168,11 @@ def test_doubleml_exception_data(): 'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)), 'nu2': np.random.normal(size=(n_obs, n_rep, n_coef)) }, - all_coef=np.random.normal(size=(n_coef + 1, n_rep)) + all_coef=np.random.normal(size=(n_rep, n_coef + 1)) ) - msg = r"dml_long\.all_coef does not contain the necessary coefficients\. Expected shape: \(5, 3\)" + msg = r"dml_long\.all_coef does not contain the necessary coefficients\. Expected shape: \(3, 5\)" with pytest.raises(ValueError, match=msg): _ = gain_statistics(dml_incorrect, dml_correct) - msg = r"dml_short\.all_coef does not contain the necessary coefficients\. Expected shape: \(5, 3\)" + msg = r"dml_short\.all_coef does not contain the necessary coefficients\. Expected shape: \(3, 5\)" with pytest.raises(ValueError, match=msg): _ = gain_statistics(dml_correct, dml_incorrect) From 53b4e4acc00280aa1faca16002418c1ea9b2ab11 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Wed, 31 Jul 2024 11:44:01 +0200 Subject: [PATCH 86/98] Update gain_statistics.py --- doubleml/utils/gain_statistics.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doubleml/utils/gain_statistics.py b/doubleml/utils/gain_statistics.py index 5a05e1b2a..bfd388455 100644 --- a/doubleml/utils/gain_statistics.py +++ b/doubleml/utils/gain_statistics.py @@ -79,12 +79,12 @@ def gain_statistics(dml_long, dml_short): # Gain statistics all_cf_y_benchmark = np.clip(np.divide((R2_y_long - R2_y_short), (1.0 - R2_y_long)), 0, 1) all_cf_d_benchmark = np.clip(np.divide((1.0 - R2_riesz), R2_riesz), 0, 1) - cf_y_benchmark = np.median(all_cf_y_benchmark, axis=0) - cf_d_benchmark = np.median(all_cf_d_benchmark, axis=0) + cf_y_benchmark = np.median(all_cf_y_benchmark, axis=1) + cf_d_benchmark = np.median(all_cf_d_benchmark, axis=1) # change in estimates (slightly different to paper) all_delta_theta = dml_short.all_coef - dml_long.all_coef - delta_theta = np.median(all_delta_theta, axis=0) + delta_theta = np.median(all_delta_theta, axis=1) # degree of adversity var_g = var_y_residuals_short - var_y_residuals_long @@ -97,7 +97,7 @@ def gain_statistics(dml_long, dml_short): where=denom != 0), 0.0, 1.0) all_rho_benchmark = np.multiply(rho_values, rho_sign) - rho_benchmark = np.median(all_rho_benchmark, axis=0) + rho_benchmark = np.median(all_rho_benchmark, axis=1) benchmark_dict = { "cf_y": cf_y_benchmark, "cf_d": cf_d_benchmark, From 9911b69b8b30e08638c2fa3c1510af5f5447d314 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Wed, 31 Jul 2024 11:44:14 +0200 Subject: [PATCH 87/98] add benchmarking to apos --- doubleml/irm/apos.py | 41 ++++++++++++++++++++++++++ doubleml/irm/tests/test_apos.py | 51 +++++++++++++++++++++------------ 2 files changed, 73 insertions(+), 19 deletions(-) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index c44198bc9..c6337100e 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -1,5 +1,6 @@ import numpy as np import pandas as pd +import copy from collections.abc import Iterable from sklearn.base import clone @@ -14,6 +15,7 @@ from ..utils.resampling import DoubleMLResampling from ..utils._descriptive import generate_summary from ..utils._checks import _check_score, _check_trimming, _check_weights, _check_sample_splitting +from ..utils.gain_statistics import gain_statistics class DoubleMLAPOS: @@ -562,6 +564,45 @@ def sensitivity_plot(self, idx_treatment=0, value='theta', rho=1.0, level=0.95, return fig + def sensitivity_benchmark(self, benchmarking_set, fit_args=None): + """ + Computes a benchmark for a given set of features. + Returns a DataFrame containing the corresponding values for cf_y, cf_d, rho and the change in estimates. + Returns + ------- + benchmark_results : pandas.DataFrame + Benchmark results. + """ + x_list_long = self._dml_data.x_cols + + # input checks + if self.sensitivity_elements is None: + raise NotImplementedError(f'Sensitivity analysis not yet implemented for {self.__class__.__name__}.') + if not isinstance(benchmarking_set, list): + raise TypeError('benchmarking_set must be a list. ' + f'{str(benchmarking_set)} of type {type(benchmarking_set)} was passed.') + if len(benchmarking_set) == 0: + raise ValueError('benchmarking_set must not be empty.') + if not set(benchmarking_set) <= set(x_list_long): + raise ValueError(f"benchmarking_set must be a subset of features {str(self._dml_data.x_cols)}. " + f'{str(benchmarking_set)} was passed.') + if fit_args is not None and not isinstance(fit_args, dict): + raise TypeError('fit_args must be a dict. ' + f'{str(fit_args)} of type {type(fit_args)} was passed.') + + # refit short form of the model + x_list_short = [x for x in x_list_long if x not in benchmarking_set] + dml_short = copy.deepcopy(self) + dml_short._dml_data.x_cols = x_list_short + if fit_args is not None: + dml_short.fit(**fit_args) + else: + dml_short.fit() + + benchmark_dict = gain_statistics(dml_long=self, dml_short=dml_short) + df_benchmark = pd.DataFrame(benchmark_dict, index=self.treatment_levels) + return df_benchmark + def draw_sample_splitting(self): """ Draw sample splitting for DoubleML models. diff --git a/doubleml/irm/tests/test_apos.py b/doubleml/irm/tests/test_apos.py index 9ebc7591e..92a372ff1 100644 --- a/doubleml/irm/tests/test_apos.py +++ b/doubleml/irm/tests/test_apos.py @@ -75,7 +75,7 @@ def learner(request): @pytest.fixture(scope='module', - params=[1]) + params=[1, 5]) def n_rep(request): return request.param @@ -148,6 +148,7 @@ def dml_apos_fixture(learner, n_rep, normalize_ipw, trimming_threshold, treatmen clone(learner[0]), clone(learner[1]), treatment_levels=treatment_levels, all_smpls=all_smpls, + n_rep=n_rep, score='APO', trimming_rule='truncate', normalize_ipw=normalize_ipw, @@ -176,23 +177,24 @@ def dml_apos_fixture(learner, n_rep, normalize_ipw, trimming_threshold, treatmen 'apos_model': dml_obj, 'unfitted_apos_model': unfitted_apos_model } + if n_rep == 1: + for bootstrap in boot_methods: + np.random.seed(42) + boot_t_stat = boot_apos(res_manual['apo_scaled_score'], res_manual['all_se'], treatment_levels, + all_smpls, n_rep, bootstrap, n_rep_boot) - for bootstrap in boot_methods: - np.random.seed(42) - boot_t_stat = boot_apos(res_manual['apo_scaled_score'], res_manual['all_se'], treatment_levels, - all_smpls, n_rep, bootstrap, n_rep_boot) + np.random.seed(42) + dml_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) - np.random.seed(42) - dml_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) + res_dict['boot_t_stat_' + bootstrap] = dml_obj.boot_t_stat + res_dict['boot_t_stat_' + bootstrap + '_manual'] = boot_t_stat - res_dict['boot_t_stat_' + bootstrap] = dml_obj.boot_t_stat - res_dict['boot_t_stat_' + bootstrap + '_manual'] = boot_t_stat - - ci = dml_obj.confint(joint=True, level=0.95) - ci_manual = confint_manual(res_manual['apos'], res_manual['se'], treatment_levels, - boot_t_stat=boot_t_stat, joint=True, level=0.95) - res_dict['boot_ci_' + bootstrap] = ci.to_numpy() - res_dict['boot_ci_' + bootstrap + '_manual'] = ci_manual.to_numpy() + ci = dml_obj.confint(joint=True, level=0.95) + ci_manual = confint_manual( + res_manual['apos'], res_manual['se'], treatment_levels, + boot_t_stat=boot_t_stat, joint=True, level=0.95) + res_dict['boot_ci_' + bootstrap] = ci.to_numpy() + res_dict['boot_ci_' + bootstrap + '_manual'] = ci_manual.to_numpy() # causal contrasts if len(treatment_levels) > 1: @@ -216,6 +218,8 @@ def test_dml_apos_coef(dml_apos_fixture): @pytest.mark.ci def test_dml_apos_se(dml_apos_fixture): + if dml_apos_fixture['n_rep'] != 1: + pytest.skip("Skipping test as n_rep is not 1") assert np.allclose(dml_apos_fixture['se'], dml_apos_fixture['se_manual'], rtol=1e-9, atol=1e-9) @@ -226,6 +230,8 @@ def test_dml_apos_se(dml_apos_fixture): @pytest.mark.ci def test_dml_apos_boot(dml_apos_fixture): + if dml_apos_fixture['n_rep'] != 1: + pytest.skip("Skipping test as n_rep is not 1") for bootstrap in dml_apos_fixture['boot_methods']: assert np.allclose(dml_apos_fixture['boot_t_stat_' + bootstrap], dml_apos_fixture['boot_t_stat_' + bootstrap + '_manual'], @@ -234,6 +240,8 @@ def test_dml_apos_boot(dml_apos_fixture): @pytest.mark.ci def test_dml_apos_ci(dml_apos_fixture): + if dml_apos_fixture['n_rep'] != 1: + pytest.skip("Skipping test as n_rep is not 1") for bootstrap in dml_apos_fixture['boot_methods']: assert np.allclose(dml_apos_fixture['ci'], dml_apos_fixture['ci_manual'], @@ -260,18 +268,23 @@ def test_doubleml_apos_return_types(dml_apos_fixture): assert isinstance(dml_apos_fixture['causal_contrast_single'], dml.DoubleMLFramework) assert isinstance(dml_apos_fixture['causal_contrast_multiple'], dml.DoubleMLFramework) + benchmark = dml_apos_fixture['apos_model'].sensitivity_benchmark(benchmarking_set=['x1']) + assert isinstance(benchmark, pd.DataFrame) + @pytest.mark.ci def test_doubleml_apos_causal_contrast(dml_apos_fixture): if dml_apos_fixture['n_treatment_levels'] == 1: pytest.skip("Skipping test as n_treatment_levels is 1") - acc_single = dml_apos_fixture['coef'][1:] - dml_apos_fixture['coef'][0] - assert np.allclose(dml_apos_fixture['causal_contrast_single'].thetas, + acc_single = dml_apos_fixture['apos_model'].all_coef[1:, ] - dml_apos_fixture['apos_model'].all_coef[0, ] + assert np.allclose(dml_apos_fixture['causal_contrast_single'].all_thetas, acc_single, rtol=1e-9, atol=1e-9) - acc_multiple = np.append(acc_single, dml_apos_fixture['coef'][2] - dml_apos_fixture['coef'][1]) - assert np.allclose(dml_apos_fixture['causal_contrast_multiple'].thetas, + acc_multiple = np.append(acc_single, + dml_apos_fixture['apos_model'].all_coef[2:3, ] - dml_apos_fixture['apos_model'].all_coef[1:2, ], + axis=0) + assert np.allclose(dml_apos_fixture['causal_contrast_multiple'].all_thetas, acc_multiple, rtol=1e-9, atol=1e-9) From 31b386a948ebb9b7f738b0d952fd0d84db8d25b3 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Wed, 31 Jul 2024 12:11:56 +0200 Subject: [PATCH 88/98] add _all_treatments to apos --- doubleml/irm/apos.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index c6337100e..2727c4e22 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -39,6 +39,7 @@ def __init__(self, self._is_cluster_data = isinstance(obj_dml_data, DoubleMLClusterData) self._check_data(self._dml_data) + self._all_treatments = np.unique(self._dml_data.d) self._treatment_levels = self._check_treatment_levels(treatment_levels) self._n_treatment_levels = len(self._treatment_levels) @@ -739,7 +740,7 @@ def _check_treatment_levels(self, treatment_levels): treatment_level_list = [treatment_levels] else: treatment_level_list = [t_lvl for t_lvl in treatment_levels] - is_d_subset = set(treatment_level_list).issubset(set(np.unique(self._dml_data.d))) + is_d_subset = set(treatment_level_list).issubset(set(self._all_treatments)) if not is_d_subset: raise ValueError('Invalid reference_levels. reference_levels has to be an iterable subset or ' 'a single element of the unique treatment levels in the data.') From 738edf34282f7b489a3c9c53ed18931ddcb42b26 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Wed, 31 Jul 2024 13:13:00 +0200 Subject: [PATCH 89/98] add exception tests for external predicitons --- doubleml/irm/apos.py | 70 +++++++++++++++++++--- doubleml/irm/tests/test_apos_exceptions.py | 44 ++++++++++++++ 2 files changed, 105 insertions(+), 9 deletions(-) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index 2727c4e22..27f4d5c12 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -39,9 +39,12 @@ def __init__(self, self._is_cluster_data = isinstance(obj_dml_data, DoubleMLClusterData) self._check_data(self._dml_data) - self._all_treatments = np.unique(self._dml_data.d) + self._all_treatment_levels = np.unique(self._dml_data.d) + self._treatment_levels = self._check_treatment_levels(treatment_levels) self._n_treatment_levels = len(self._treatment_levels) + # Check if there are elements in self._all_treatments that are not in self.treatment_levels + self._add_treatment_levels = [t for t in self._all_treatment_levels if t not in self._treatment_levels] self._normalize_ipw = normalize_ipw self._n_folds = n_folds @@ -366,8 +369,14 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_ to analyze the fitted models or extract information like variable importance. Default is ``False``. - external_predictions : None - Not implemented for DoubleMLAPOS. + external_predictions : dict or None + A nested dictionary where the keys correspond the the treatment levels and contain predictions according to each + treatment level. The values have to be dictionaries which containkeys ``'ml_g'`` and ``'ml_m'``. + The predictions for ``'ml_m'`` are passed directly to the DoubleMLAPO model, + whereas the predictions for ``'ml_g'`` are used to compute predictions for ``'ml_g1'`` and ``'ml_g0'``. + If the treatment levels do not cover all levels in the data, combined predictions for ``'ml_g'`` have + to be provided under the key ``'add_treatment_levels'``. + Default is `None`. Returns ------- @@ -375,12 +384,20 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_ """ if external_predictions is not None: - raise NotImplementedError(f"External predictions not implemented for {self.__class__.__name__}.") + self._check_external_predictions(external_predictions) + ext_pred_dict = self._recompute_external_predictions(self) # parallel estimation of the models parallel = Parallel(n_jobs=n_jobs_models, verbose=0, pre_dispatch='2*n_jobs') - fitted_models = parallel(delayed(self._fit_model)(i_level, n_jobs_cv, store_predictions, store_models) - for i_level in range(self.n_treatment_levels)) + fitted_models = parallel( + delayed(self._fit_model)( + i_level, + n_jobs_cv, + store_predictions, + store_models, + ext_pred_dict) + for i_level in range(self.n_treatment_levels) + ) # combine the estimates and scores framework_list = [None] * self.n_treatment_levels @@ -728,10 +745,15 @@ def causal_contrast(self, reference_levels): acc.treatment_names = all_treatment_names return acc - def _fit_model(self, i_level, n_jobs_cv=None, store_predictions=True, store_models=False): + def _fit_model(self, i_level, n_jobs_cv=None, store_predictions=True, store_models=False, external_predictions_dict=None): model = self.modellist[i_level] - model.fit(n_jobs_cv=n_jobs_cv, store_predictions=store_predictions, store_models=store_models) + if external_predictions_dict is not None: + external_predictions = external_predictions_dict[self.treatment_levels[i_level]] + else: + external_predictions = None + model.fit(n_jobs_cv=n_jobs_cv, store_predictions=store_predictions, store_models=store_models, + external_predictions=external_predictions) return model def _check_treatment_levels(self, treatment_levels): @@ -740,7 +762,7 @@ def _check_treatment_levels(self, treatment_levels): treatment_level_list = [treatment_levels] else: treatment_level_list = [t_lvl for t_lvl in treatment_levels] - is_d_subset = set(treatment_level_list).issubset(set(self._all_treatments)) + is_d_subset = set(treatment_level_list).issubset(set(self._all_treatment_levels)) if not is_d_subset: raise ValueError('Invalid reference_levels. reference_levels has to be an iterable subset or ' 'a single element of the unique treatment levels in the data.') @@ -753,6 +775,36 @@ def _check_data(self, obj_dml_data): raise ValueError('The data must not contain instrumental variables.') return + def _check_external_predictions(self, external_predictions): + expected_keys = self.treatment_levels + if len(self._add_treatment_levels) > 0: + expected_keys += ['add_treatment_levels'] + if not isinstance(external_predictions, dict): + raise TypeError('external_predictions must be a dictionary. ' + + f'Object of type {type(external_predictions)} passed.') + + if not set(external_predictions.keys()) == set(expected_keys): + raise ValueError('external_predictions must contain predictions for all treatment levels. ' + + f'Expected keys: {set(expected_keys)}. ' + + f'Passed keys: {set(external_predictions.keys())}.') + + contains_ml_g = ['ml_g' in external_predictions[treatment_level] for treatment_level in self.treatment_levels] + if not all(contains_ml_g) and not all([not contains for contains in contains_ml_g]): + raise ValueError('The predictions for ml_g have to provided for all treatment levels or not at all.') + return + + def _recompute_external_predictions(self, external_predictions): + ext_pred_dict = {} + for i_level in range(self.n_treatment_levels): + ext_pred_dict[self.treatment_levels[i_level]] = { + 'ml_g1': external_predictions[self.treatment_levels[i_level]]['ml_g'], + 'ml_m': external_predictions[self.treatment_levels[i_level]]['ml_m'] + } + ext_pred_dict[self.treatment_levels[i_level]]['ml_g0'] = \ + external_predictions[self.treatment_levels[i_level]]['ml_g'] + + return ext_pred_dict + def _initialize_weights(self, weights): if weights is None: weights = np.ones(self._dml_data.n_obs) diff --git a/doubleml/irm/tests/test_apos_exceptions.py b/doubleml/irm/tests/test_apos_exceptions.py index 7b3c8bdb0..752f77240 100644 --- a/doubleml/irm/tests/test_apos_exceptions.py +++ b/doubleml/irm/tests/test_apos_exceptions.py @@ -109,6 +109,50 @@ def test_apos_exception_properties_and_methods(): dml_obj.sensitivity_summary +@pytest.mark.ci +def test_apos_exception_ext_pred(): + dml_obj = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0) + external_predictions = [0, 1] + msg = r'external_predictions must be a dictionary. Object of type passed.' + with pytest.raises(TypeError, match=msg): + dml_obj.fit(external_predictions=external_predictions) + + # test with a level subset + external_predictions = { + 0: "dummy", + 1: "dummy" + } + msg = ( + r"external_predictions must contain predictions for all treatment levels\. " + r"Expected keys: \{0, 'add_treatment_levels'\}\. " + r"Passed keys: \{0, 1\}\." + ) + with pytest.raises(ValueError, match=msg): + dml_obj.fit(external_predictions=external_predictions) + + external_predictions = { + 0: {"ml_g": "dummy"}, + 'add_treatment_levels': {"ml_m": "dummy"} + } + msg = "The predictions for ml_g have to provided for all treatment levels or not at all." + with pytest.raises(ValueError, match=msg): + dml_obj.fit(external_predictions=external_predictions) + + # test with all levels + dml_obj = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=[0, 1, 2, 3]) + external_predictions = { + 0: "dummy", + 1: "dummy" + } + msg = ( + r"external_predictions must contain predictions for all treatment levels\. " + r"Expected keys: \{0, 1, 2, 3\}\. " + r"Passed keys: \{0, 1\}\." + ) + with pytest.raises(ValueError, match=msg): + dml_obj.fit(external_predictions=external_predictions) + + @pytest.mark.ci def test_causal_contrast_exceptions(): msg = r"Apply fit\(\) before causal_contrast\(\)." From e18aba113a63de76f2de5ae6b26d98de7d6fa7a9 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Wed, 31 Jul 2024 14:26:54 +0200 Subject: [PATCH 90/98] add exception for multiple treatment variables in apo setting --- doubleml/irm/apo.py | 4 ++++ doubleml/irm/tests/test_apo_exceptions.py | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py index f52750709..b0f4f3817 100644 --- a/doubleml/irm/apo.py +++ b/doubleml/irm/apo.py @@ -367,6 +367,10 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_ return res def _check_data(self, obj_dml_data): + if len(obj_dml_data.d_cols) > 1: + raise ValueError('Only one treatment variable is allowed. ' + + f'Got {len(obj_dml_data.d_cols)} treatment variables.') + if obj_dml_data.z_cols is not None: raise ValueError('Incompatible data. ' + ' and '.join(obj_dml_data.z_cols) + diff --git a/doubleml/irm/tests/test_apo_exceptions.py b/doubleml/irm/tests/test_apo_exceptions.py index 970ba1900..31fa6b447 100644 --- a/doubleml/irm/tests/test_apo_exceptions.py +++ b/doubleml/irm/tests/test_apo_exceptions.py @@ -25,6 +25,11 @@ def test_apo_exception_data(): with pytest.raises(TypeError, match=msg): _ = DoubleMLAPO(pd.DataFrame(), ml_g, ml_m, treatment_level=0) + msg = 'Only one treatment variable is allowed. Got 2 treatment variables.' + with pytest.raises(ValueError, match=msg): + dml_data_multiple = DoubleMLData(df_apo, 'y', ['d', 'x1']) + _ = DoubleMLAPO(dml_data_multiple, ml_g, ml_m, treatment_level=0) + dml_data_z = make_iivm_data() msg = r'Incompatible data. z have been set as instrumental variable\(s\).' with pytest.raises(ValueError, match=msg): From bfa57dafc84825b728c16cd54237be338712ea11 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Wed, 31 Jul 2024 14:32:00 +0200 Subject: [PATCH 91/98] add simple external predictions to apos --- doubleml/irm/apos.py | 22 ++-- .../tests/test_apos_external_predictions.py | 103 ++++++++++++++++++ 2 files changed, 116 insertions(+), 9 deletions(-) create mode 100644 doubleml/irm/tests/test_apos_external_predictions.py diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index 27f4d5c12..1b5f7e0b7 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -385,7 +385,9 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_ if external_predictions is not None: self._check_external_predictions(external_predictions) - ext_pred_dict = self._recompute_external_predictions(self) + ext_pred_dict = self._recompute_external_predictions(external_predictions) + else: + ext_pred_dict = None # parallel estimation of the models parallel = Parallel(n_jobs=n_jobs_models, verbose=0, pre_dispatch='2*n_jobs') @@ -794,14 +796,16 @@ def _check_external_predictions(self, external_predictions): return def _recompute_external_predictions(self, external_predictions): - ext_pred_dict = {} - for i_level in range(self.n_treatment_levels): - ext_pred_dict[self.treatment_levels[i_level]] = { - 'ml_g1': external_predictions[self.treatment_levels[i_level]]['ml_g'], - 'ml_m': external_predictions[self.treatment_levels[i_level]]['ml_m'] - } - ext_pred_dict[self.treatment_levels[i_level]]['ml_g0'] = \ - external_predictions[self.treatment_levels[i_level]]['ml_g'] + d_col = self._dml_data.d_cols[0] + ext_pred_dict = {treatment_level: {d_col: {}} for treatment_level in self.treatment_levels} + for treatment_level in self.treatment_levels: + if "ml_g1" in external_predictions[treatment_level]: + ext_pred_dict[treatment_level][d_col]['ml_g1'] = external_predictions[treatment_level]['ml_g1'] + if "ml_m" in external_predictions[treatment_level]: + ext_pred_dict[treatment_level][d_col]['ml_m'] = external_predictions[treatment_level]['ml_m'] + if "ml_g0" in external_predictions[treatment_level]: + ext_pred_dict[treatment_level][d_col]['ml_g0'] = external_predictions[treatment_level]['ml_g0'] + # TODO: Combine the models return ext_pred_dict diff --git a/doubleml/irm/tests/test_apos_external_predictions.py b/doubleml/irm/tests/test_apos_external_predictions.py new file mode 100644 index 000000000..1aa8d4ba1 --- /dev/null +++ b/doubleml/irm/tests/test_apos_external_predictions.py @@ -0,0 +1,103 @@ +import pytest +import numpy as np +import pandas as pd +import math + +from sklearn.linear_model import LinearRegression, LogisticRegression +from doubleml import DoubleMLAPOS, DoubleMLData +from doubleml.datasets import make_irm_data_discrete_treatments +from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier + +from ...tests._utils import draw_smpls + + +@pytest.fixture(scope="module", params=[1, 3]) +def n_rep(request): + return request.param + + +@pytest.fixture(scope="module", params=[[0, 1, 2, 3], [0, 1]]) +def treatment_levels(request): + return request.param + + +@pytest.fixture(scope="module", params=[True, False]) +def set_ml_m_ext(request): + return request.param + + +@pytest.fixture(scope="module", params=[True, False]) +def set_ml_g_ext(request): + return request.param + + +@pytest.fixture(scope="module") +def doubleml_apos_ext_fixture(n_rep, treatment_levels, set_ml_m_ext, set_ml_g_ext): + score = "APO" + ext_predictions = { + treatment_level: {} for treatment_level in treatment_levels + } + + np.random.seed(3141) + n_obs = 500 + data_apo = make_irm_data_discrete_treatments(n_obs=n_obs) + df_apo = pd.DataFrame( + np.column_stack((data_apo['y'], data_apo['d'], data_apo['x'])), + columns=['y', 'd'] + ['x' + str(i) for i in range(data_apo['x'].shape[1])] + ) + + dml_data = DoubleMLData(df_apo, 'y', 'd') + d = data_apo['d'] + all_smpls = draw_smpls(n_obs, n_folds=5, n_rep=n_rep, groups=d) + + kwargs = { + "obj_dml_data": dml_data, + "score": score, + "treatment_levels": treatment_levels, + "n_rep": n_rep, + "draw_sample_splitting": False + } + + dml_obj = DoubleMLAPOS(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs) + dml_obj.set_sample_splitting(all_smpls=all_smpls) + + np.random.seed(3141) + dml_obj.fit(store_predictions=True) + + if set_ml_m_ext: + for i_treatment_level, treatment_level in enumerate(treatment_levels): + ext_predictions[treatment_level]["ml_m"] = dml_obj.modellist[i_treatment_level].predictions["ml_m"][:, :, 0] + ml_m = DMLDummyClassifier() + else: + ml_m = LogisticRegression(random_state=42) + + if set_ml_g_ext: + for i_treatment_level, treatment_level in enumerate(treatment_levels): + ext_predictions[treatment_level]["ml_g0"] = dml_obj.modellist[i_treatment_level].predictions["ml_g0"][:, :, 0] + ext_predictions[treatment_level]["ml_g1"] = dml_obj.modellist[i_treatment_level].predictions["ml_g1"][:, :, 0] + ml_g = DMLDummyRegressor() + else: + ml_g = LinearRegression() + + dml_obj_ext = DoubleMLAPOS(ml_g=ml_g, ml_m=ml_m, **kwargs) + dml_obj_ext.set_sample_splitting(all_smpls=all_smpls) + + np.random.seed(3141) + dml_obj_ext.fit(external_predictions=ext_predictions) + + res_dict = { + "coef_normal": dml_obj.coef[0], + "coef_ext": dml_obj_ext.coef[0] + } + + return res_dict + + +@pytest.mark.ci +def test_doubleml_apos_ext_coef(doubleml_apos_ext_fixture): + assert math.isclose( + doubleml_apos_ext_fixture["coef_normal"], + doubleml_apos_ext_fixture["coef_ext"], + rel_tol=1e-9, + abs_tol=1e-4 + ) From 6f8d3f4c7fd7232f01d9c408277d2ebe1207687a Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Wed, 31 Jul 2024 14:36:49 +0200 Subject: [PATCH 92/98] fix sensitivity_summary tests --- doubleml/irm/tests/test_apos_exceptions.py | 2 +- doubleml/tests/test_exceptions.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doubleml/irm/tests/test_apos_exceptions.py b/doubleml/irm/tests/test_apos_exceptions.py index 752f77240..716364cf4 100644 --- a/doubleml/irm/tests/test_apos_exceptions.py +++ b/doubleml/irm/tests/test_apos_exceptions.py @@ -106,7 +106,7 @@ def test_apos_exception_properties_and_methods(): dml_obj.sensitivity_plot() msg = r'Apply sensitivity_analysis\(\) before sensitivity_summary.' with pytest.raises(ValueError, match=msg): - dml_obj.sensitivity_summary + _ = dml_obj.sensitivity_summary @pytest.mark.ci diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py index 97b8dac41..cacd1edfa 100644 --- a/doubleml/tests/test_exceptions.py +++ b/doubleml/tests/test_exceptions.py @@ -1111,7 +1111,7 @@ def test_doubleml_sensitivity_summary(): dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1) msg = r'Apply sensitivity_analysis\(\) before sensitivity_summary.' with pytest.raises(ValueError, match=msg): - _ = dml_irm.sensitivity_summary() + _ = dml_irm.sensitivity_summary @pytest.mark.ci From e25663c3dbf032176556338379ff7f45a34234f9 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 1 Aug 2024 08:33:02 +0200 Subject: [PATCH 93/98] add more restrictions on ext predictions for apos --- doubleml/irm/apos.py | 20 ++++++++++--------- doubleml/irm/tests/test_apos_exceptions.py | 23 ++++++++++++++-------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index 1b5f7e0b7..7a91826b7 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -374,8 +374,6 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_ treatment level. The values have to be dictionaries which containkeys ``'ml_g'`` and ``'ml_m'``. The predictions for ``'ml_m'`` are passed directly to the DoubleMLAPO model, whereas the predictions for ``'ml_g'`` are used to compute predictions for ``'ml_g1'`` and ``'ml_g0'``. - If the treatment levels do not cover all levels in the data, combined predictions for ``'ml_g'`` have - to be provided under the key ``'add_treatment_levels'``. Default is `None`. Returns @@ -779,20 +777,24 @@ def _check_data(self, obj_dml_data): def _check_external_predictions(self, external_predictions): expected_keys = self.treatment_levels - if len(self._add_treatment_levels) > 0: - expected_keys += ['add_treatment_levels'] if not isinstance(external_predictions, dict): raise TypeError('external_predictions must be a dictionary. ' + f'Object of type {type(external_predictions)} passed.') - if not set(external_predictions.keys()) == set(expected_keys): - raise ValueError('external_predictions must contain predictions for all treatment levels. ' + + if not set(external_predictions.keys()).issubset(set(expected_keys)): + raise ValueError('external_predictions must be a subset of all treatment levels. ' + f'Expected keys: {set(expected_keys)}. ' + f'Passed keys: {set(external_predictions.keys())}.') - contains_ml_g = ['ml_g' in external_predictions[treatment_level] for treatment_level in self.treatment_levels] - if not all(contains_ml_g) and not all([not contains for contains in contains_ml_g]): - raise ValueError('The predictions for ml_g have to provided for all treatment levels or not at all.') + expected_learner_keys = ['ml_g0', 'ml_g1', 'ml_m'] + for key, value in external_predictions.items(): + if not isinstance(value, dict): + raise TypeError(f'external_predictions[{key}] must be a dictionary. ' + + f'Object of type {type(value)} passed.') + if not set(value.keys()).issubset(set(expected_learner_keys)): + raise ValueError(f'external_predictions[{key}] must be a subset of {set(expected_learner_keys)}. ' + + f'Passed keys: {set(value.keys())}.') + return def _recompute_external_predictions(self, external_predictions): diff --git a/doubleml/irm/tests/test_apos_exceptions.py b/doubleml/irm/tests/test_apos_exceptions.py index 716364cf4..0c20efe53 100644 --- a/doubleml/irm/tests/test_apos_exceptions.py +++ b/doubleml/irm/tests/test_apos_exceptions.py @@ -123,18 +123,24 @@ def test_apos_exception_ext_pred(): 1: "dummy" } msg = ( - r"external_predictions must contain predictions for all treatment levels\. " - r"Expected keys: \{0, 'add_treatment_levels'\}\. " + r"external_predictions must be a subset of all treatment levels\. " + r"Expected keys: \{0\}\. " r"Passed keys: \{0, 1\}\." ) with pytest.raises(ValueError, match=msg): dml_obj.fit(external_predictions=external_predictions) external_predictions = { - 0: {"ml_g": "dummy"}, - 'add_treatment_levels': {"ml_m": "dummy"} + 0: "dummy", + } + msg = r'external_predictions\[0\] must be a dictionary. Object of type passed.' + with pytest.raises(TypeError, match=msg): + dml_obj.fit(external_predictions=external_predictions) + + external_predictions = { + 0: {"ml_g": "dummy"} } - msg = "The predictions for ml_g have to provided for all treatment levels or not at all." + msg = r"external_predictions\[0\] must be a subset of \{.*\}. Passed keys: \{'ml_g'\}\." with pytest.raises(ValueError, match=msg): dml_obj.fit(external_predictions=external_predictions) @@ -142,12 +148,13 @@ def test_apos_exception_ext_pred(): dml_obj = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=[0, 1, 2, 3]) external_predictions = { 0: "dummy", - 1: "dummy" + 1: "dummy", + 4: "dummy" } msg = ( - r"external_predictions must contain predictions for all treatment levels\. " + r"external_predictions must be a subset of all treatment levels\. " r"Expected keys: \{0, 1, 2, 3\}\. " - r"Passed keys: \{0, 1\}\." + r"Passed keys: \{0, 1, 4\}\." ) with pytest.raises(ValueError, match=msg): dml_obj.fit(external_predictions=external_predictions) From 07e919ab99f99646e6a66801363bda2d0b677aff Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 1 Aug 2024 08:59:14 +0200 Subject: [PATCH 94/98] finalize external predictions (docstrings and method names --- doubleml/irm/apos.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index 7a91826b7..2a6b5ce1a 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -370,10 +370,9 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_ Default is ``False``. external_predictions : dict or None - A nested dictionary where the keys correspond the the treatment levels and contain predictions according to each - treatment level. The values have to be dictionaries which containkeys ``'ml_g'`` and ``'ml_m'``. - The predictions for ``'ml_m'`` are passed directly to the DoubleMLAPO model, - whereas the predictions for ``'ml_g'`` are used to compute predictions for ``'ml_g1'`` and ``'ml_g0'``. + A nested dictionary where the keys correspond the the treatment levels and can contain predictions according to + each treatment level. The values have to be dictionaries which can contain keys ``'ml_g0'``, ``'ml_g1'`` + and ``'ml_m'``. Default is `None`. Returns @@ -383,7 +382,7 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_ if external_predictions is not None: self._check_external_predictions(external_predictions) - ext_pred_dict = self._recompute_external_predictions(external_predictions) + ext_pred_dict = self._rename_external_predictions(external_predictions) else: ext_pred_dict = None @@ -797,7 +796,7 @@ def _check_external_predictions(self, external_predictions): return - def _recompute_external_predictions(self, external_predictions): + def _rename_external_predictions(self, external_predictions): d_col = self._dml_data.d_cols[0] ext_pred_dict = {treatment_level: {d_col: {}} for treatment_level in self.treatment_levels} for treatment_level in self.treatment_levels: @@ -807,7 +806,6 @@ def _recompute_external_predictions(self, external_predictions): ext_pred_dict[treatment_level][d_col]['ml_m'] = external_predictions[treatment_level]['ml_m'] if "ml_g0" in external_predictions[treatment_level]: ext_pred_dict[treatment_level][d_col]['ml_g0'] = external_predictions[treatment_level]['ml_g0'] - # TODO: Combine the models return ext_pred_dict From 7b8b330b2aa9761587a0bbaf845272e3f6773b49 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 1 Aug 2024 09:06:28 +0200 Subject: [PATCH 95/98] add evaluations for external predictions in DoubleMLAPO --- doubleml/irm/apo.py | 6 +++--- .../irm/tests/test_apos_external_predictions.py | 17 ++++++++++++++++- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py index b0f4f3817..93c3c0df4 100644 --- a/doubleml/irm/apo.py +++ b/doubleml/irm/apo.py @@ -218,7 +218,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa if g0_external: # use external predictions g_hat0 = {'preds': external_predictions['ml_g0'], - 'targets': None, + 'targets': _cond_targets(y, cond_sample=(treated == 0)), 'models': None} else: g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv, @@ -233,7 +233,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa if g1_external: # use external predictions g_hat1 = {'preds': external_predictions['ml_g1'], - 'targets': None, + 'targets': _cond_targets(y, cond_sample=(treated == 1)), 'models': None} else: g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv, @@ -250,7 +250,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa if m_external: # use external predictions m_hat = {'preds': external_predictions['ml_m'], - 'targets': None, + 'targets': treated, 'models': None} else: m_hat = _dml_cv_predict(self._learner['ml_m'], x, treated, smpls=smpls, n_jobs=n_jobs_cv, diff --git a/doubleml/irm/tests/test_apos_external_predictions.py b/doubleml/irm/tests/test_apos_external_predictions.py index 1aa8d4ba1..b6a2c8eed 100644 --- a/doubleml/irm/tests/test_apos_external_predictions.py +++ b/doubleml/irm/tests/test_apos_external_predictions.py @@ -87,7 +87,10 @@ def doubleml_apos_ext_fixture(n_rep, treatment_levels, set_ml_m_ext, set_ml_g_ex res_dict = { "coef_normal": dml_obj.coef[0], - "coef_ext": dml_obj_ext.coef[0] + "coef_ext": dml_obj_ext.coef[0], + "dml_obj": dml_obj, + "dml_obj_ext": dml_obj_ext, + "treatment_levels": treatment_levels } return res_dict @@ -101,3 +104,15 @@ def test_doubleml_apos_ext_coef(doubleml_apos_ext_fixture): rel_tol=1e-9, abs_tol=1e-4 ) + + +@pytest.mark.ci +def test_doubleml_apos_ext_pred_nuisance(doubleml_apos_ext_fixture): + for i_level, _ in enumerate(doubleml_apos_ext_fixture["treatment_levels"]): + for nuisance_key in ["ml_g0", "ml_g1", "ml_m"]: + assert np.allclose( + doubleml_apos_ext_fixture["dml_obj"].modellist[i_level].nuisance_loss[nuisance_key], + doubleml_apos_ext_fixture["dml_obj_ext"].modellist[i_level].nuisance_loss[nuisance_key], + rtol=1e-9, + atol=1e-4 + ) From ae5b2c0d8037a4a08bd255ed274e5a30b775b73a Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 1 Aug 2024 12:08:03 +0200 Subject: [PATCH 96/98] fix dimensions in gain_statistics --- doubleml/utils/gain_statistics.py | 41 +++++++++++-------- .../tests/test_exceptions_gain_statistics.py | 11 +++-- 2 files changed, 31 insertions(+), 21 deletions(-) diff --git a/doubleml/utils/gain_statistics.py b/doubleml/utils/gain_statistics.py index bfd388455..2fa233b33 100644 --- a/doubleml/utils/gain_statistics.py +++ b/doubleml/utils/gain_statistics.py @@ -19,44 +19,49 @@ def gain_statistics(dml_long, dml_short): benchmark_dict : dict Benchmarking dictionary (dict) with values for ``cf_d``, ``cf_y``, ``rho``, and ``delta_theta``. """ - if not isinstance(dml_long.sensitivity_elements, dict): + + # set input for readability + sensitivity_elements_long = dml_long.framework.sensitivity_elements + sensitivity_elements_short = dml_short.framework.sensitivity_elements + + if not isinstance(sensitivity_elements_long, dict): raise TypeError("dml_long does not contain the necessary sensitivity elements. " - "Expected dict for dml_long.sensitivity_elements.") + "Expected dict for dml_long.framework.sensitivity_elements.") expected_keys = ['sigma2', 'nu2'] - if not all(key in dml_long.sensitivity_elements.keys() for key in expected_keys): + if not all(key in sensitivity_elements_long.keys() for key in expected_keys): raise ValueError("dml_long does not contain the necessary sensitivity elements. " "Required keys are: " + str(expected_keys)) - if not isinstance(dml_short.sensitivity_elements, dict): + if not isinstance(sensitivity_elements_short, dict): raise TypeError("dml_short does not contain the necessary sensitivity elements. " - "Expected dict for dml_short.sensitivity_elements.") - if not all(key in dml_short.sensitivity_elements.keys() for key in expected_keys): + "Expected dict for dml_short.framework.sensitivity_elements.") + if not all(key in sensitivity_elements_short.keys() for key in expected_keys): raise ValueError("dml_short does not contain the necessary sensitivity elements. " "Required keys are: " + str(expected_keys)) for key in expected_keys: - if not isinstance(dml_long.sensitivity_elements[key], np.ndarray): + if not isinstance(sensitivity_elements_long[key], np.ndarray): raise TypeError("dml_long does not contain the necessary sensitivity elements. " f"Expected numpy.ndarray for key {key}.") - if not isinstance(dml_short.sensitivity_elements[key], np.ndarray): + if not isinstance(sensitivity_elements_short[key], np.ndarray): raise TypeError("dml_short does not contain the necessary sensitivity elements. " f"Expected numpy.ndarray for key {key}.") - if len(dml_long.sensitivity_elements[key].shape) != 3 or dml_long.sensitivity_elements[key].shape[0] != 1: + if len(sensitivity_elements_long[key].shape) != 3 or sensitivity_elements_long[key].shape[0] != 1: raise ValueError("dml_long does not contain the necessary sensitivity elements. " f"Expected 3 dimensions of shape (1, n_coef, n_rep) for key {key}.") - if len(dml_short.sensitivity_elements[key].shape) != 3 or dml_short.sensitivity_elements[key].shape[0] != 1: + if len(sensitivity_elements_short[key].shape) != 3 or sensitivity_elements_short[key].shape[0] != 1: raise ValueError("dml_short does not contain the necessary sensitivity elements. " f"Expected 3 dimensions of shape (1, n_coef, n_rep) for key {key}.") - if not np.array_equal(dml_long.sensitivity_elements[key].shape, dml_short.sensitivity_elements[key].shape): + if not np.array_equal(sensitivity_elements_long[key].shape, sensitivity_elements_short[key].shape): raise ValueError("dml_long and dml_short do not contain the same shape of sensitivity elements. " - "Shapes of " + key + " are: " + str(dml_long.sensitivity_elements[key].shape) + - " and " + str(dml_short.sensitivity_elements[key].shape)) + "Shapes of " + key + " are: " + str(sensitivity_elements_long[key].shape) + + " and " + str(sensitivity_elements_short[key].shape)) if not isinstance(dml_long.all_coef, np.ndarray): raise TypeError("dml_long.all_coef does not contain the necessary coefficients. Expected numpy.ndarray.") if not isinstance(dml_short.all_coef, np.ndarray): raise TypeError("dml_short.all_coef does not contain the necessary coefficients. Expected numpy.ndarray.") - expected_shape = (dml_long.sensitivity_elements['sigma2'].shape[1], dml_long.sensitivity_elements['sigma2'].shape[2]) + expected_shape = (sensitivity_elements_long['sigma2'].shape[1], sensitivity_elements_long['sigma2'].shape[2]) if dml_long.all_coef.shape != expected_shape: raise ValueError("dml_long.all_coef does not contain the necessary coefficients. Expected shape: " + str(expected_shape)) @@ -66,10 +71,10 @@ def gain_statistics(dml_long, dml_short): # save elements for readability var_y = np.var(dml_long._dml_data.y) - var_y_residuals_long = np.squeeze(dml_long.sensitivity_elements['sigma2'], axis=0) - nu2_long = np.squeeze(dml_long.sensitivity_elements['nu2'], axis=0) - var_y_residuals_short = np.squeeze(dml_short.sensitivity_elements['sigma2'], axis=0) - nu2_short = np.squeeze(dml_short.sensitivity_elements['nu2'], axis=0) + var_y_residuals_long = np.squeeze(sensitivity_elements_long['sigma2'], axis=0) + nu2_long = np.squeeze(sensitivity_elements_long['nu2'], axis=0) + var_y_residuals_short = np.squeeze(sensitivity_elements_short['sigma2'], axis=0) + nu2_short = np.squeeze(sensitivity_elements_short['nu2'], axis=0) # compute nonparametric R2 R2_y_long = 1.0 - np.divide(var_y_residuals_long, var_y) diff --git a/doubleml/utils/tests/test_exceptions_gain_statistics.py b/doubleml/utils/tests/test_exceptions_gain_statistics.py index c4f3d3af3..9f42063d6 100644 --- a/doubleml/utils/tests/test_exceptions_gain_statistics.py +++ b/doubleml/utils/tests/test_exceptions_gain_statistics.py @@ -4,9 +4,14 @@ from doubleml.utils.gain_statistics import gain_statistics +class test_framework(): + def __init__(self, sensitivity_elements): + self.sensitivity_elements = sensitivity_elements + + class test_dml_class(): def __init__(self, sensitivity_elements, all_coef): - self.sensitivity_elements = sensitivity_elements + self.framework = test_framework(sensitivity_elements) self.all_coef = all_coef @@ -30,11 +35,11 @@ def test_doubleml_exception_data(): sensitivity_elements=np.random.normal(size=(n_obs, n_rep, n_coef)), all_coef=np.random.normal(size=(n_rep, n_coef)) ) - msg = r"dml_long does not contain the necessary sensitivity elements\. Expected dict for dml_long\.sensitivity_elements\." + msg = r"dml_long does not contain the necessary sensitivity elements\. Expected dict for dml_long\.framework\.sensitivity_elements\." with pytest.raises(TypeError, match=msg): _ = gain_statistics(dml_incorrect, dml_correct) msg = r"dml_short does not contain the necessary sensitivity elements\. " - msg += r"Expected dict for dml_short\.sensitivity_elements\." + msg += r"Expected dict for dml_short\.framework\.sensitivity_elements\." with pytest.raises(TypeError, match=msg): _ = gain_statistics(dml_correct, dml_incorrect) From fef69878cc972b4821ea09bf020d2f274645b373 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 1 Aug 2024 16:06:10 +0200 Subject: [PATCH 97/98] fix formatting --- doubleml/utils/tests/test_exceptions_gain_statistics.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doubleml/utils/tests/test_exceptions_gain_statistics.py b/doubleml/utils/tests/test_exceptions_gain_statistics.py index 9f42063d6..734185eb4 100644 --- a/doubleml/utils/tests/test_exceptions_gain_statistics.py +++ b/doubleml/utils/tests/test_exceptions_gain_statistics.py @@ -35,7 +35,8 @@ def test_doubleml_exception_data(): sensitivity_elements=np.random.normal(size=(n_obs, n_rep, n_coef)), all_coef=np.random.normal(size=(n_rep, n_coef)) ) - msg = r"dml_long does not contain the necessary sensitivity elements\. Expected dict for dml_long\.framework\.sensitivity_elements\." + msg = r"dml_long does not contain the necessary sensitivity elements\. " + msg += r"Expected dict for dml_long\.framework\.sensitivity_elements\." with pytest.raises(TypeError, match=msg): _ = gain_statistics(dml_incorrect, dml_correct) msg = r"dml_short does not contain the necessary sensitivity elements\. " From 5200ce289de075049e16831d85697893f46c2681 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 1 Aug 2024 16:07:32 +0200 Subject: [PATCH 98/98] update make_irm_data_discrete_treatments descriptions --- doubleml/datasets.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doubleml/datasets.py b/doubleml/datasets.py index e2f2dcddf..b17c43f5f 100644 --- a/doubleml/datasets.py +++ b/doubleml/datasets.py @@ -1503,6 +1503,10 @@ def make_irm_data_discrete_treatments(n_obs=200, n_levels=3, linear=False, rando The number of treatment levels. Default is ``3``. + linear : bool + Indicates whether the true underlying regression is linear. + Default is ``False``. + random_state : int Random seed for reproducibility. Default is ``42``.