From d86e90f5d46b560f41659f5c5ee737fa1797d865 Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Wed, 17 Jul 2024 22:01:37 +0200 Subject: [PATCH 1/9] rename rmse to nuisance_losses in doubleml class --- doubleml/double_ml.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index 8ea773a41..b7195a674 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -54,7 +54,7 @@ def __init__(self, # initialize predictions and target to None which are only stored if method fit is called with store_predictions=True self._predictions = None self._nuisance_targets = None - self._rmses = None + self._losses = None # initialize models to None which are only stored if method fit is called with store_models=True self._models = None @@ -234,11 +234,11 @@ def nuisance_targets(self): return self._nuisance_targets @property - def rmses(self): + def nuisance_loss(self): """ - The root-mean-squared-errors of the nuisance models. + The losses of the nuisance models (root-mean-squared-errors or logloss). """ - return self._rmses + return self._nuisance_loss @property def models(self): @@ -916,7 +916,7 @@ def _check_fit(self, n_jobs_cv, store_predictions, external_predictions, store_m def _initalize_fit(self, store_predictions, store_models): # initialize rmse arrays for nuisance functions evaluation - self._initialize_rmses() + self._initialize_nuisance_loss() if store_predictions: self._initialize_predictions_and_targets() @@ -942,8 +942,8 @@ def _fit_nuisance_and_score_elements(self, n_jobs_cv, store_predictions, externa self._set_score_elements(score_elements, self._i_rep, self._i_treat) - # calculate rmses and store predictions and targets of the nuisance models - self._calc_rmses(preds['predictions'], preds['targets']) + # calculate nuisance losses and store predictions and targets of the nuisance models + self._calc_nuisance_loss(preds['predictions'], preds['targets']) if store_predictions: self._store_predictions_and_targets(preds['predictions'], preds['targets']) if store_models: @@ -1001,9 +1001,11 @@ def _initialize_predictions_and_targets(self): self._nuisance_targets = {learner: np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan) for learner in self.params_names} - def _initialize_rmses(self): - self._rmses = {learner: np.full((self.n_rep, self._dml_data.n_coefs), np.nan) - for learner in self.params_names} + def _initialize_nuisance_loss(self): + self._nuisance_loss = { + learner: np.full((self.n_rep, self._dml_data.n_coefs), np.nan) + for learner in self.params_names + } def _initialize_models(self): self._models = {learner: {treat_var: [None] * self.n_rep for treat_var in self._dml_data.d_cols} @@ -1014,13 +1016,13 @@ def _store_predictions_and_targets(self, preds, targets): self._predictions[learner][:, self._i_rep, self._i_treat] = preds[learner] self._nuisance_targets[learner][:, self._i_rep, self._i_treat] = targets[learner] - def _calc_rmses(self, preds, targets): + def _calc_nuisance_loss(self, preds, targets): for learner in self.params_names: if targets[learner] is None: - self._rmses[learner][self._i_rep, self._i_treat] = np.nan + self._nuisance_loss[learner][self._i_rep, self._i_treat] = np.nan else: sq_error = np.power(targets[learner] - preds[learner], 2) - self._rmses[learner][self._i_rep, self._i_treat] = np.sqrt(np.nanmean(sq_error, axis=0)) + self._nuisance_loss[learner][self._i_rep, self._i_treat] = np.sqrt(np.nanmean(sq_error, axis=0)) def _store_models(self, models): for learner in self.params_names: From 3a224eb207436253bb7ac6a89636bffa087bf5fa Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Wed, 17 Jul 2024 22:20:55 +0200 Subject: [PATCH 2/9] fix nuisance_loss in doubleml.summary --- doubleml/double_ml.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index b7195a674..ccb0e6932 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -54,7 +54,7 @@ def __init__(self, # initialize predictions and target to None which are only stored if method fit is called with store_predictions=True self._predictions = None self._nuisance_targets = None - self._losses = None + self._nuisance_loss = None # initialize models to None which are only stored if method fit is called with store_models=True self._models = None @@ -119,10 +119,10 @@ def __str__(self): learner_info = '' for key, value in self.learner.items(): learner_info += f'Learner {key}: {str(value)}\n' - if self.rmses is not None: + if self.nuisance_loss is not None: learner_info += 'Out-of-sample Performance:\n' for learner in self.params_names: - learner_info += f'Learner {learner} RMSE: {self.rmses[learner]}\n' + learner_info += f'Learner {learner} RMSE: {self.nuisance_loss[learner]}\n' if self._is_cluster_data: resampling_info = f'No. folds per cluster: {self._n_folds_per_cluster}\n' \ From 09e49f1f45ea70ac3346b1e53853bd4ccde9860b Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Wed, 17 Jul 2024 22:26:36 +0200 Subject: [PATCH 3/9] update unit test to nuisance_loss --- doubleml/tests/test_evaluate_learner.py | 2 +- doubleml/tests/test_return_types.py | 88 ++++++++++++------------- 2 files changed, 45 insertions(+), 45 deletions(-) diff --git a/doubleml/tests/test_evaluate_learner.py b/doubleml/tests/test_evaluate_learner.py index 51af25f60..031dd4c42 100644 --- a/doubleml/tests/test_evaluate_learner.py +++ b/doubleml/tests/test_evaluate_learner.py @@ -49,7 +49,7 @@ def dml_irm_eval_learner_fixture(learner, trimming_threshold, n_rep): dml_irm_obj.fit() res_manual = dml_irm_obj.evaluate_learners() - res_dict = {'rmses': dml_irm_obj.rmses, + res_dict = {'rmses': dml_irm_obj.nuisance_loss, 'rmses_manual': res_manual } return res_dict diff --git a/doubleml/tests/test_return_types.py b/doubleml/tests/test_return_types.py index 1d03439a4..45aae6d9b 100644 --- a/doubleml/tests/test_return_types.py +++ b/doubleml/tests/test_return_types.py @@ -349,50 +349,50 @@ def test_stored_nuisance_targets(): @pytest.mark.ci -def test_rmses(): - assert plr_obj.rmses['ml_l'].shape == (n_rep, n_treat) - assert plr_obj.rmses['ml_m'].shape == (n_rep, n_treat) - - assert pliv_obj.rmses['ml_l'].shape == (n_rep, n_treat) - assert pliv_obj.rmses['ml_m'].shape == (n_rep, n_treat) - assert pliv_obj.rmses['ml_r'].shape == (n_rep, n_treat) - - assert irm_obj.rmses['ml_g0'].shape == (n_rep, n_treat) - assert irm_obj.rmses['ml_g1'].shape == (n_rep, n_treat) - assert irm_obj.rmses['ml_m'].shape == (n_rep, n_treat) - - assert iivm_obj.rmses['ml_g0'].shape == (n_rep, n_treat) - assert iivm_obj.rmses['ml_g1'].shape == (n_rep, n_treat) - assert iivm_obj.rmses['ml_m'].shape == (n_rep, n_treat) - assert iivm_obj.rmses['ml_r0'].shape == (n_rep, n_treat) - assert iivm_obj.rmses['ml_r1'].shape == (n_rep, n_treat) - - assert cvar_obj.rmses['ml_g'].shape == (n_rep, n_treat) - assert cvar_obj.rmses['ml_m'].shape == (n_rep, n_treat) - - assert pq_obj.rmses['ml_g'].shape == (n_rep, n_treat) - assert pq_obj.rmses['ml_m'].shape == (n_rep, n_treat) - - assert lpq_obj.rmses['ml_g_du_z0'].shape == (n_rep, n_treat) - assert lpq_obj.rmses['ml_g_du_z1'].shape == (n_rep, n_treat) - assert lpq_obj.rmses['ml_m_z'].shape == (n_rep, n_treat) - assert lpq_obj.rmses['ml_m_d_z0'].shape == (n_rep, n_treat) - assert lpq_obj.rmses['ml_m_d_z1'].shape == (n_rep, n_treat) - - assert did_obj.rmses['ml_g0'].shape == (n_rep, n_treat) - assert did_obj.rmses['ml_g1'].shape == (n_rep, n_treat) - assert did_obj.rmses['ml_m'].shape == (n_rep, n_treat) - - assert did_cs_obj.rmses['ml_g_d0_t0'].shape == (n_rep, n_treat) - assert did_cs_obj.rmses['ml_g_d0_t1'].shape == (n_rep, n_treat) - assert did_cs_obj.rmses['ml_g_d1_t0'].shape == (n_rep, n_treat) - assert did_cs_obj.rmses['ml_g_d1_t1'].shape == (n_rep, n_treat) - assert did_cs_obj.rmses['ml_m'].shape == (n_rep, n_treat) - - assert ssm_obj.rmses['ml_g_d0'].shape == (n_rep, n_treat) - assert ssm_obj.rmses['ml_g_d1'].shape == (n_rep, n_treat) - assert ssm_obj.rmses['ml_m'].shape == (n_rep, n_treat) - assert ssm_obj.rmses['ml_pi'].shape == (n_rep, n_treat) +def test_nuisance_loss(): + assert plr_obj.nuisance_loss['ml_l'].shape == (n_rep, n_treat) + assert plr_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat) + + assert pliv_obj.nuisance_loss['ml_l'].shape == (n_rep, n_treat) + assert pliv_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat) + assert pliv_obj.nuisance_loss['ml_r'].shape == (n_rep, n_treat) + + assert irm_obj.nuisance_loss['ml_g0'].shape == (n_rep, n_treat) + assert irm_obj.nuisance_loss['ml_g1'].shape == (n_rep, n_treat) + assert irm_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat) + + assert iivm_obj.nuisance_loss['ml_g0'].shape == (n_rep, n_treat) + assert iivm_obj.nuisance_loss['ml_g1'].shape == (n_rep, n_treat) + assert iivm_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat) + assert iivm_obj.nuisance_loss['ml_r0'].shape == (n_rep, n_treat) + assert iivm_obj.nuisance_loss['ml_r1'].shape == (n_rep, n_treat) + + assert cvar_obj.nuisance_loss['ml_g'].shape == (n_rep, n_treat) + assert cvar_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat) + + assert pq_obj.nuisance_loss['ml_g'].shape == (n_rep, n_treat) + assert pq_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat) + + assert lpq_obj.nuisance_loss['ml_g_du_z0'].shape == (n_rep, n_treat) + assert lpq_obj.nuisance_loss['ml_g_du_z1'].shape == (n_rep, n_treat) + assert lpq_obj.nuisance_loss['ml_m_z'].shape == (n_rep, n_treat) + assert lpq_obj.nuisance_loss['ml_m_d_z0'].shape == (n_rep, n_treat) + assert lpq_obj.nuisance_loss['ml_m_d_z1'].shape == (n_rep, n_treat) + + assert did_obj.nuisance_loss['ml_g0'].shape == (n_rep, n_treat) + assert did_obj.nuisance_loss['ml_g1'].shape == (n_rep, n_treat) + assert did_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat) + + assert did_cs_obj.nuisance_loss['ml_g_d0_t0'].shape == (n_rep, n_treat) + assert did_cs_obj.nuisance_loss['ml_g_d0_t1'].shape == (n_rep, n_treat) + assert did_cs_obj.nuisance_loss['ml_g_d1_t0'].shape == (n_rep, n_treat) + assert did_cs_obj.nuisance_loss['ml_g_d1_t1'].shape == (n_rep, n_treat) + assert did_cs_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat) + + assert ssm_obj.nuisance_loss['ml_g_d0'].shape == (n_rep, n_treat) + assert ssm_obj.nuisance_loss['ml_g_d1'].shape == (n_rep, n_treat) + assert ssm_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat) + assert ssm_obj.nuisance_loss['ml_pi'].shape == (n_rep, n_treat) @pytest.mark.ci From 53bbca9f5915407d01bf5685d16e7ce8c737051f Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Thu, 18 Jul 2024 00:01:03 +0200 Subject: [PATCH 4/9] Add logloss to _cal_nuisance_loss for classifier as learner --- doubleml/double_ml.py | 18 +++++++++++++++-- doubleml/tests/test_evaluate_learner.py | 27 ++++++++++++++----------- doubleml/utils/_estimation.py | 8 +++++++- 3 files changed, 38 insertions(+), 15 deletions(-) diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index ccb0e6932..fe8cc54d0 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -1018,11 +1018,25 @@ def _store_predictions_and_targets(self, preds, targets): def _calc_nuisance_loss(self, preds, targets): for learner in self.params_names: + if targets[learner] is None: self._nuisance_loss[learner][self._i_rep, self._i_treat] = np.nan else: - sq_error = np.power(targets[learner] - preds[learner], 2) - self._nuisance_loss[learner][self._i_rep, self._i_treat] = np.sqrt(np.nanmean(sq_error, axis=0)) + learner_key = [key for key in self._learner.keys() if key in learner][0] + is_classifier = self._check_learner( + self._learner[learner_key], + learner, + regressor=True, classifier=True + ) + if is_classifier: + predictions = np.clip(preds[learner], 1e-15, 1 - 1e-15) + logloss = targets[learner] * np.log(predictions) + (1 - targets[learner]) * np.log(1 - predictions) + loss = -np.nanmean(logloss, axis=0) + else: + sq_error = np.power(targets[learner] - preds[learner], 2) + loss = np.sqrt(np.nanmean(sq_error, axis=0)) + + self._nuisance_loss[learner][self._i_rep, self._i_treat] = loss def _store_models(self, models): for learner in self.params_names: diff --git a/doubleml/tests/test_evaluate_learner.py b/doubleml/tests/test_evaluate_learner.py index 031dd4c42..4b3056b89 100644 --- a/doubleml/tests/test_evaluate_learner.py +++ b/doubleml/tests/test_evaluate_learner.py @@ -7,6 +7,8 @@ from sklearn.linear_model import LogisticRegression, LinearRegression from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor +from doubleml.utils._estimation import _logloss + np.random.seed(3141) data = make_irm_data(theta=0.5, n_obs=200, dim_x=5, return_type='DataFrame') @@ -47,26 +49,27 @@ def dml_irm_eval_learner_fixture(learner, trimming_threshold, n_rep): n_rep=n_rep, trimming_threshold=trimming_threshold) dml_irm_obj.fit() - res_manual = dml_irm_obj.evaluate_learners() + res_manual = dml_irm_obj.evaluate_learners(learners=['ml_g0', 'ml_g1']) + res_manual['ml_m'] = dml_irm_obj.evaluate_learners(learners=['ml_m'], metric=_logloss)['ml_m'] - res_dict = {'rmses': dml_irm_obj.nuisance_loss, - 'rmses_manual': res_manual + res_dict = {'nuisance_loss': dml_irm_obj.nuisance_loss, + 'nuisance_loss_manual': res_manual } return res_dict @pytest.mark.ci def test_dml_irm_eval_learner(dml_irm_eval_learner_fixture, n_rep): - assert dml_irm_eval_learner_fixture['rmses_manual']['ml_g0'].shape == (n_rep, 1) - assert dml_irm_eval_learner_fixture['rmses_manual']['ml_g1'].shape == (n_rep, 1) - assert dml_irm_eval_learner_fixture['rmses_manual']['ml_m'].shape == (n_rep, 1) + assert dml_irm_eval_learner_fixture['nuisance_loss_manual']['ml_g0'].shape == (n_rep, 1) + assert dml_irm_eval_learner_fixture['nuisance_loss_manual']['ml_g1'].shape == (n_rep, 1) + assert dml_irm_eval_learner_fixture['nuisance_loss_manual']['ml_m'].shape == (n_rep, 1) - assert np.allclose(dml_irm_eval_learner_fixture['rmses_manual']['ml_g0'], - dml_irm_eval_learner_fixture['rmses']['ml_g0'], + assert np.allclose(dml_irm_eval_learner_fixture['nuisance_loss_manual']['ml_g0'], + dml_irm_eval_learner_fixture['nuisance_loss']['ml_g0'], rtol=1e-9, atol=1e-4) - assert np.allclose(dml_irm_eval_learner_fixture['rmses_manual']['ml_g1'], - dml_irm_eval_learner_fixture['rmses']['ml_g1'], + assert np.allclose(dml_irm_eval_learner_fixture['nuisance_loss_manual']['ml_g1'], + dml_irm_eval_learner_fixture['nuisance_loss']['ml_g1'], rtol=1e-9, atol=1e-4) - assert np.allclose(dml_irm_eval_learner_fixture['rmses_manual']['ml_m'], - dml_irm_eval_learner_fixture['rmses']['ml_m'], + assert np.allclose(dml_irm_eval_learner_fixture['nuisance_loss_manual']['ml_m'], + dml_irm_eval_learner_fixture['nuisance_loss']['ml_m'], rtol=1e-9, atol=1e-4) diff --git a/doubleml/utils/_estimation.py b/doubleml/utils/_estimation.py index 7892caf1d..42de8c639 100644 --- a/doubleml/utils/_estimation.py +++ b/doubleml/utils/_estimation.py @@ -6,7 +6,7 @@ from sklearn.base import clone from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import KFold, GridSearchCV, RandomizedSearchCV -from sklearn.metrics import root_mean_squared_error +from sklearn.metrics import root_mean_squared_error, log_loss from statsmodels.nonparametric.kde import KDEUnivariate @@ -204,6 +204,12 @@ def _rmse(y_true, y_pred): return rmse +def _logloss(y_true, y_pred): + subset = np.logical_not(np.isnan(y_true)) + logloss = log_loss(y_true[subset], y_pred[subset]) + return logloss + + def _predict_zero_one_propensity(learner, X): pred_proba = learner.predict_proba(X) if pred_proba.shape[1] == 2: From 18861472284ae8bc9210b300d173d685d3a91b94 Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Thu, 18 Jul 2024 00:11:49 +0200 Subject: [PATCH 5/9] update summary --- doubleml/double_ml.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index fe8cc54d0..39dda838b 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -122,7 +122,7 @@ def __str__(self): if self.nuisance_loss is not None: learner_info += 'Out-of-sample Performance:\n' for learner in self.params_names: - learner_info += f'Learner {learner} RMSE: {self.nuisance_loss[learner]}\n' + learner_info += f'Learner {learner} Loss: {self.nuisance_loss[learner]}\n' if self._is_cluster_data: resampling_info = f'No. folds per cluster: {self._n_folds_per_cluster}\n' \ @@ -915,7 +915,7 @@ def _check_fit(self, n_jobs_cv, store_predictions, external_predictions, store_m raise NotImplementedError(f"External predictions not implemented for {self.__class__.__name__}.") def _initalize_fit(self, store_predictions, store_models): - # initialize rmse arrays for nuisance functions evaluation + # initialize loss arrays for nuisance functions evaluation self._initialize_nuisance_loss() if store_predictions: @@ -1022,9 +1022,10 @@ def _calc_nuisance_loss(self, preds, targets): if targets[learner] is None: self._nuisance_loss[learner][self._i_rep, self._i_treat] = np.nan else: - learner_key = [key for key in self._learner.keys() if key in learner][0] + learner_keys = [key for key in self._learner.keys() if key in learner] + assert len(learner_keys) == 1 is_classifier = self._check_learner( - self._learner[learner_key], + self._learner[learner_keys[0]], learner, regressor=True, classifier=True ) From fbaca50fc2dcb706087c10277ea20364b706bed0 Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Thu, 18 Jul 2024 00:22:46 +0200 Subject: [PATCH 6/9] add dict _is_classifier for learners --- doubleml/double_ml.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index 39dda838b..6e4965b2f 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -50,6 +50,7 @@ def __init__(self, # initialize learners and parameters which are set model specific self._learner = None self._params = None + self._is_classfier = None # initialize predictions and target to None which are only stored if method fit is called with store_predictions=True self._predictions = None @@ -1017,19 +1018,24 @@ def _store_predictions_and_targets(self, preds, targets): self._nuisance_targets[learner][:, self._i_rep, self._i_treat] = targets[learner] def _calc_nuisance_loss(self, preds, targets): + self._is_classfier = {key: False for key in self.params_names} for learner in self.params_names: + # check if the learner is a classifier + learner_keys = [key for key in self._learner.keys() if key in learner] + assert len(learner_keys) == 1 + self._is_classfier[learner] = self._check_learner( + self._learner[learner_keys[0]], + learner, + regressor=True, classifier=True + ) if targets[learner] is None: self._nuisance_loss[learner][self._i_rep, self._i_treat] = np.nan else: learner_keys = [key for key in self._learner.keys() if key in learner] assert len(learner_keys) == 1 - is_classifier = self._check_learner( - self._learner[learner_keys[0]], - learner, - regressor=True, classifier=True - ) - if is_classifier: + + if self._is_classfier[learner]: predictions = np.clip(preds[learner], 1e-15, 1 - 1e-15) logloss = targets[learner] * np.log(predictions) + (1 - targets[learner]) * np.log(1 - predictions) loss = -np.nanmean(logloss, axis=0) From 0cdb29f988c029aa34fd137489763f6338c058ed Mon Sep 17 00:00:00 2001 From: Sven1704 Date: Thu, 18 Jul 2024 00:52:56 +0200 Subject: [PATCH 7/9] update summary to include classification and regression for learner evaluation --- doubleml/double_ml.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index 6e4965b2f..5fe6decf2 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -50,7 +50,7 @@ def __init__(self, # initialize learners and parameters which are set model specific self._learner = None self._params = None - self._is_classfier = None + self._is_classifier = {} # initialize predictions and target to None which are only stored if method fit is called with store_predictions=True self._predictions = None @@ -122,8 +122,16 @@ def __str__(self): learner_info += f'Learner {key}: {str(value)}\n' if self.nuisance_loss is not None: learner_info += 'Out-of-sample Performance:\n' - for learner in self.params_names: - learner_info += f'Learner {learner} Loss: {self.nuisance_loss[learner]}\n' + is_classifier = [value for value in self._is_classifier.values()] + is_regressor = [not value for value in is_classifier] + if any(is_regressor): + learner_info += 'Regression:\n' + for learner in [key for key, value in self._is_classifier.items() if value is False]: + learner_info += f'Learner {learner} RMSE: {self.nuisance_loss[learner]}\n' + if any(is_classifier): + learner_info += 'Classification:\n' + for learner in [key for key, value in self._is_classifier.items() if value is True]: + learner_info += f'Learner {learner} Log Loss: {self.nuisance_loss[learner]}\n' if self._is_cluster_data: resampling_info = f'No. folds per cluster: {self._n_folds_per_cluster}\n' \ @@ -1018,12 +1026,12 @@ def _store_predictions_and_targets(self, preds, targets): self._nuisance_targets[learner][:, self._i_rep, self._i_treat] = targets[learner] def _calc_nuisance_loss(self, preds, targets): - self._is_classfier = {key: False for key in self.params_names} + self._is_classifier = {key: False for key in self.params_names} for learner in self.params_names: # check if the learner is a classifier learner_keys = [key for key in self._learner.keys() if key in learner] assert len(learner_keys) == 1 - self._is_classfier[learner] = self._check_learner( + self._is_classifier[learner] = self._check_learner( self._learner[learner_keys[0]], learner, regressor=True, classifier=True @@ -1035,7 +1043,7 @@ def _calc_nuisance_loss(self, preds, targets): learner_keys = [key for key in self._learner.keys() if key in learner] assert len(learner_keys) == 1 - if self._is_classfier[learner]: + if self._is_classifier[learner]: predictions = np.clip(preds[learner], 1e-15, 1 - 1e-15) logloss = targets[learner] * np.log(predictions) + (1 - targets[learner]) * np.log(1 - predictions) loss = -np.nanmean(logloss, axis=0) From 36cf51bb26fe572e899271bae5dfbe34571d4c5a Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Fri, 19 Jul 2024 07:18:12 +0200 Subject: [PATCH 8/9] remove ml_m from did experimental --- doubleml/did/did.py | 6 +++++- doubleml/did/tests/_utils_did_manual.py | 2 +- doubleml/did/tests/test_did_external_predictions.py | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/doubleml/did/did.py b/doubleml/did/did.py index 1fd85f46d..733b76c97 100644 --- a/doubleml/did/did.py +++ b/doubleml/did/did.py @@ -161,7 +161,11 @@ def trimming_threshold(self): return self._trimming_threshold def _initialize_ml_nuisance_params(self): - valid_learner = ['ml_g0', 'ml_g1', 'ml_m'] + if self.score == 'observational': + valid_learner = ['ml_g0', 'ml_g1', 'ml_m'] + else: + assert self.score == 'experimental' + valid_learner = ['ml_g0', 'ml_g1'] self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in valid_learner} diff --git a/doubleml/did/tests/_utils_did_manual.py b/doubleml/did/tests/_utils_did_manual.py index 5956d6d95..0fc98a6a2 100644 --- a/doubleml/did/tests/_utils_did_manual.py +++ b/doubleml/did/tests/_utils_did_manual.py @@ -220,7 +220,6 @@ def fit_sensitivity_elements_did(y, d, all_coef, predictions, score, in_sample_n for i_rep in range(n_rep): - m_hat = predictions['ml_m'][:, i_rep, 0] g_hat0 = predictions['ml_g0'][:, i_rep, 0] g_hat1 = predictions['ml_g1'][:, i_rep, 0] @@ -229,6 +228,7 @@ def fit_sensitivity_elements_did(y, d, all_coef, predictions, score, in_sample_n psi_sigma2[:, i_rep, 0] = sigma2_score_element - sigma2[0, i_rep, 0] if score == 'observational': + m_hat = predictions['ml_m'][:, i_rep, 0] propensity_weight_d0 = np.divide(m_hat, 1.0-m_hat) if in_sample_normalization: m_alpha_1 = np.divide(d, np.mean(d)) diff --git a/doubleml/did/tests/test_did_external_predictions.py b/doubleml/did/tests/test_did_external_predictions.py index e12c8c21b..676302e96 100644 --- a/doubleml/did/tests/test_did_external_predictions.py +++ b/doubleml/did/tests/test_did_external_predictions.py @@ -36,7 +36,8 @@ def doubleml_did_fixture(did_score, n_rep): ext_predictions["d"]["ml_g0"] = dml_did.predictions["ml_g0"][:, :, 0] ext_predictions["d"]["ml_g1"] = dml_did.predictions["ml_g1"][:, :, 0] - ext_predictions["d"]["ml_m"] = dml_did.predictions["ml_m"][:, :, 0] + if did_score == "observational": + ext_predictions["d"]["ml_m"] = dml_did.predictions["ml_m"][:, :, 0] dml_did_ext = DoubleMLDID(ml_g=DMLDummyRegressor(), ml_m=DMLDummyClassifier(), **kwargs) dml_did_ext.set_sample_splitting(all_smpls) From 870c12cae1fa332c60d6cd93aaed6946b7aa8c24 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Fri, 19 Jul 2024 07:21:36 +0200 Subject: [PATCH 9/9] remove ml_m from did_cs experimental --- doubleml/did/did_cs.py | 9 +++++++-- doubleml/did/tests/_utils_did_cs_manual.py | 2 +- doubleml/did/tests/test_did_cs_external_predictions.py | 3 ++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py index 4b84fb1a4..766f0ea3a 100644 --- a/doubleml/did/did_cs.py +++ b/doubleml/did/did_cs.py @@ -162,8 +162,13 @@ def trimming_threshold(self): return self._trimming_threshold def _initialize_ml_nuisance_params(self): - valid_learner = ['ml_g_d0_t0', 'ml_g_d0_t1', - 'ml_g_d1_t0', 'ml_g_d1_t1', 'ml_m'] + if self.score == 'observational': + valid_learner = ['ml_g_d0_t0', 'ml_g_d0_t1', + 'ml_g_d1_t0', 'ml_g_d1_t1', 'ml_m'] + else: + assert self.score == 'experimental' + valid_learner = ['ml_g_d0_t0', 'ml_g_d0_t1', + 'ml_g_d1_t0', 'ml_g_d1_t1'] self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in valid_learner} diff --git a/doubleml/did/tests/_utils_did_cs_manual.py b/doubleml/did/tests/_utils_did_cs_manual.py index 0ace40b04..f7d975e35 100644 --- a/doubleml/did/tests/_utils_did_cs_manual.py +++ b/doubleml/did/tests/_utils_did_cs_manual.py @@ -285,7 +285,6 @@ def fit_sensitivity_elements_did_cs(y, d, t, all_coef, predictions, score, in_sa for i_rep in range(n_rep): - m_hat = predictions['ml_m'][:, i_rep, 0] g_hat_d0_t0 = predictions['ml_g_d0_t0'][:, i_rep, 0] g_hat_d0_t1 = predictions['ml_g_d0_t1'][:, i_rep, 0] g_hat_d1_t0 = predictions['ml_g_d1_t0'][:, i_rep, 0] @@ -305,6 +304,7 @@ def fit_sensitivity_elements_did_cs(y, d, t, all_coef, predictions, score, in_sa p_hat = np.mean(d) lambda_hat = np.mean(t) if score == 'observational': + m_hat = predictions['ml_m'][:, i_rep, 0] propensity_weight_d0 = np.divide(m_hat, 1.0-m_hat) if in_sample_normalization: weight_d0t1 = np.multiply(d0t1, propensity_weight_d0) diff --git a/doubleml/did/tests/test_did_cs_external_predictions.py b/doubleml/did/tests/test_did_cs_external_predictions.py index da95428a4..732aaa54d 100644 --- a/doubleml/did/tests/test_did_cs_external_predictions.py +++ b/doubleml/did/tests/test_did_cs_external_predictions.py @@ -39,7 +39,8 @@ def doubleml_didcs_fixture(did_score, n_rep): ext_predictions["d"]["ml_g_d0_t1"] = dml_did_cs.predictions["ml_g_d0_t1"][:, :, 0] ext_predictions["d"]["ml_g_d1_t0"] = dml_did_cs.predictions["ml_g_d1_t0"][:, :, 0] ext_predictions["d"]["ml_g_d1_t1"] = dml_did_cs.predictions["ml_g_d1_t1"][:, :, 0] - ext_predictions["d"]["ml_m"] = dml_did_cs.predictions["ml_m"][:, :, 0] + if did_score == "observational": + ext_predictions["d"]["ml_m"] = dml_did_cs.predictions["ml_m"][:, :, 0] dml_did_cs_ext = DoubleMLDIDCS(ml_g=DMLDummyRegressor(), ml_m=DMLDummyClassifier(), **kwargs) dml_did_cs_ext.set_sample_splitting(all_smpls)