diff --git a/doubleml/did/did.py b/doubleml/did/did.py index 1fd85f46d..733b76c97 100644 --- a/doubleml/did/did.py +++ b/doubleml/did/did.py @@ -161,7 +161,11 @@ def trimming_threshold(self): return self._trimming_threshold def _initialize_ml_nuisance_params(self): - valid_learner = ['ml_g0', 'ml_g1', 'ml_m'] + if self.score == 'observational': + valid_learner = ['ml_g0', 'ml_g1', 'ml_m'] + else: + assert self.score == 'experimental' + valid_learner = ['ml_g0', 'ml_g1'] self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in valid_learner} diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py index 4b84fb1a4..766f0ea3a 100644 --- a/doubleml/did/did_cs.py +++ b/doubleml/did/did_cs.py @@ -162,8 +162,13 @@ def trimming_threshold(self): return self._trimming_threshold def _initialize_ml_nuisance_params(self): - valid_learner = ['ml_g_d0_t0', 'ml_g_d0_t1', - 'ml_g_d1_t0', 'ml_g_d1_t1', 'ml_m'] + if self.score == 'observational': + valid_learner = ['ml_g_d0_t0', 'ml_g_d0_t1', + 'ml_g_d1_t0', 'ml_g_d1_t1', 'ml_m'] + else: + assert self.score == 'experimental' + valid_learner = ['ml_g_d0_t0', 'ml_g_d0_t1', + 'ml_g_d1_t0', 'ml_g_d1_t1'] self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in valid_learner} diff --git a/doubleml/did/tests/_utils_did_cs_manual.py b/doubleml/did/tests/_utils_did_cs_manual.py index 0ace40b04..f7d975e35 100644 --- a/doubleml/did/tests/_utils_did_cs_manual.py +++ b/doubleml/did/tests/_utils_did_cs_manual.py @@ -285,7 +285,6 @@ def fit_sensitivity_elements_did_cs(y, d, t, all_coef, predictions, score, in_sa for i_rep in range(n_rep): - m_hat = predictions['ml_m'][:, i_rep, 0] g_hat_d0_t0 = predictions['ml_g_d0_t0'][:, i_rep, 0] g_hat_d0_t1 = predictions['ml_g_d0_t1'][:, i_rep, 0] g_hat_d1_t0 = predictions['ml_g_d1_t0'][:, i_rep, 0] @@ -305,6 +304,7 @@ def fit_sensitivity_elements_did_cs(y, d, t, all_coef, predictions, score, in_sa p_hat = np.mean(d) lambda_hat = np.mean(t) if score == 'observational': + m_hat = predictions['ml_m'][:, i_rep, 0] propensity_weight_d0 = np.divide(m_hat, 1.0-m_hat) if in_sample_normalization: weight_d0t1 = np.multiply(d0t1, propensity_weight_d0) diff --git a/doubleml/did/tests/_utils_did_manual.py b/doubleml/did/tests/_utils_did_manual.py index 5956d6d95..0fc98a6a2 100644 --- a/doubleml/did/tests/_utils_did_manual.py +++ b/doubleml/did/tests/_utils_did_manual.py @@ -220,7 +220,6 @@ def fit_sensitivity_elements_did(y, d, all_coef, predictions, score, in_sample_n for i_rep in range(n_rep): - m_hat = predictions['ml_m'][:, i_rep, 0] g_hat0 = predictions['ml_g0'][:, i_rep, 0] g_hat1 = predictions['ml_g1'][:, i_rep, 0] @@ -229,6 +228,7 @@ def fit_sensitivity_elements_did(y, d, all_coef, predictions, score, in_sample_n psi_sigma2[:, i_rep, 0] = sigma2_score_element - sigma2[0, i_rep, 0] if score == 'observational': + m_hat = predictions['ml_m'][:, i_rep, 0] propensity_weight_d0 = np.divide(m_hat, 1.0-m_hat) if in_sample_normalization: m_alpha_1 = np.divide(d, np.mean(d)) diff --git a/doubleml/did/tests/test_did_cs_external_predictions.py b/doubleml/did/tests/test_did_cs_external_predictions.py index da95428a4..732aaa54d 100644 --- a/doubleml/did/tests/test_did_cs_external_predictions.py +++ b/doubleml/did/tests/test_did_cs_external_predictions.py @@ -39,7 +39,8 @@ def doubleml_didcs_fixture(did_score, n_rep): ext_predictions["d"]["ml_g_d0_t1"] = dml_did_cs.predictions["ml_g_d0_t1"][:, :, 0] ext_predictions["d"]["ml_g_d1_t0"] = dml_did_cs.predictions["ml_g_d1_t0"][:, :, 0] ext_predictions["d"]["ml_g_d1_t1"] = dml_did_cs.predictions["ml_g_d1_t1"][:, :, 0] - ext_predictions["d"]["ml_m"] = dml_did_cs.predictions["ml_m"][:, :, 0] + if did_score == "observational": + ext_predictions["d"]["ml_m"] = dml_did_cs.predictions["ml_m"][:, :, 0] dml_did_cs_ext = DoubleMLDIDCS(ml_g=DMLDummyRegressor(), ml_m=DMLDummyClassifier(), **kwargs) dml_did_cs_ext.set_sample_splitting(all_smpls) diff --git a/doubleml/did/tests/test_did_external_predictions.py b/doubleml/did/tests/test_did_external_predictions.py index e12c8c21b..676302e96 100644 --- a/doubleml/did/tests/test_did_external_predictions.py +++ b/doubleml/did/tests/test_did_external_predictions.py @@ -36,7 +36,8 @@ def doubleml_did_fixture(did_score, n_rep): ext_predictions["d"]["ml_g0"] = dml_did.predictions["ml_g0"][:, :, 0] ext_predictions["d"]["ml_g1"] = dml_did.predictions["ml_g1"][:, :, 0] - ext_predictions["d"]["ml_m"] = dml_did.predictions["ml_m"][:, :, 0] + if did_score == "observational": + ext_predictions["d"]["ml_m"] = dml_did.predictions["ml_m"][:, :, 0] dml_did_ext = DoubleMLDID(ml_g=DMLDummyRegressor(), ml_m=DMLDummyClassifier(), **kwargs) dml_did_ext.set_sample_splitting(all_smpls) diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index 8ea773a41..5fe6decf2 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -50,11 +50,12 @@ def __init__(self, # initialize learners and parameters which are set model specific self._learner = None self._params = None + self._is_classifier = {} # initialize predictions and target to None which are only stored if method fit is called with store_predictions=True self._predictions = None self._nuisance_targets = None - self._rmses = None + self._nuisance_loss = None # initialize models to None which are only stored if method fit is called with store_models=True self._models = None @@ -119,10 +120,18 @@ def __str__(self): learner_info = '' for key, value in self.learner.items(): learner_info += f'Learner {key}: {str(value)}\n' - if self.rmses is not None: + if self.nuisance_loss is not None: learner_info += 'Out-of-sample Performance:\n' - for learner in self.params_names: - learner_info += f'Learner {learner} RMSE: {self.rmses[learner]}\n' + is_classifier = [value for value in self._is_classifier.values()] + is_regressor = [not value for value in is_classifier] + if any(is_regressor): + learner_info += 'Regression:\n' + for learner in [key for key, value in self._is_classifier.items() if value is False]: + learner_info += f'Learner {learner} RMSE: {self.nuisance_loss[learner]}\n' + if any(is_classifier): + learner_info += 'Classification:\n' + for learner in [key for key, value in self._is_classifier.items() if value is True]: + learner_info += f'Learner {learner} Log Loss: {self.nuisance_loss[learner]}\n' if self._is_cluster_data: resampling_info = f'No. folds per cluster: {self._n_folds_per_cluster}\n' \ @@ -234,11 +243,11 @@ def nuisance_targets(self): return self._nuisance_targets @property - def rmses(self): + def nuisance_loss(self): """ - The root-mean-squared-errors of the nuisance models. + The losses of the nuisance models (root-mean-squared-errors or logloss). """ - return self._rmses + return self._nuisance_loss @property def models(self): @@ -915,8 +924,8 @@ def _check_fit(self, n_jobs_cv, store_predictions, external_predictions, store_m raise NotImplementedError(f"External predictions not implemented for {self.__class__.__name__}.") def _initalize_fit(self, store_predictions, store_models): - # initialize rmse arrays for nuisance functions evaluation - self._initialize_rmses() + # initialize loss arrays for nuisance functions evaluation + self._initialize_nuisance_loss() if store_predictions: self._initialize_predictions_and_targets() @@ -942,8 +951,8 @@ def _fit_nuisance_and_score_elements(self, n_jobs_cv, store_predictions, externa self._set_score_elements(score_elements, self._i_rep, self._i_treat) - # calculate rmses and store predictions and targets of the nuisance models - self._calc_rmses(preds['predictions'], preds['targets']) + # calculate nuisance losses and store predictions and targets of the nuisance models + self._calc_nuisance_loss(preds['predictions'], preds['targets']) if store_predictions: self._store_predictions_and_targets(preds['predictions'], preds['targets']) if store_models: @@ -1001,9 +1010,11 @@ def _initialize_predictions_and_targets(self): self._nuisance_targets = {learner: np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan) for learner in self.params_names} - def _initialize_rmses(self): - self._rmses = {learner: np.full((self.n_rep, self._dml_data.n_coefs), np.nan) - for learner in self.params_names} + def _initialize_nuisance_loss(self): + self._nuisance_loss = { + learner: np.full((self.n_rep, self._dml_data.n_coefs), np.nan) + for learner in self.params_names + } def _initialize_models(self): self._models = {learner: {treat_var: [None] * self.n_rep for treat_var in self._dml_data.d_cols} @@ -1014,13 +1025,33 @@ def _store_predictions_and_targets(self, preds, targets): self._predictions[learner][:, self._i_rep, self._i_treat] = preds[learner] self._nuisance_targets[learner][:, self._i_rep, self._i_treat] = targets[learner] - def _calc_rmses(self, preds, targets): + def _calc_nuisance_loss(self, preds, targets): + self._is_classifier = {key: False for key in self.params_names} for learner in self.params_names: + # check if the learner is a classifier + learner_keys = [key for key in self._learner.keys() if key in learner] + assert len(learner_keys) == 1 + self._is_classifier[learner] = self._check_learner( + self._learner[learner_keys[0]], + learner, + regressor=True, classifier=True + ) + if targets[learner] is None: - self._rmses[learner][self._i_rep, self._i_treat] = np.nan + self._nuisance_loss[learner][self._i_rep, self._i_treat] = np.nan else: - sq_error = np.power(targets[learner] - preds[learner], 2) - self._rmses[learner][self._i_rep, self._i_treat] = np.sqrt(np.nanmean(sq_error, axis=0)) + learner_keys = [key for key in self._learner.keys() if key in learner] + assert len(learner_keys) == 1 + + if self._is_classifier[learner]: + predictions = np.clip(preds[learner], 1e-15, 1 - 1e-15) + logloss = targets[learner] * np.log(predictions) + (1 - targets[learner]) * np.log(1 - predictions) + loss = -np.nanmean(logloss, axis=0) + else: + sq_error = np.power(targets[learner] - preds[learner], 2) + loss = np.sqrt(np.nanmean(sq_error, axis=0)) + + self._nuisance_loss[learner][self._i_rep, self._i_treat] = loss def _store_models(self, models): for learner in self.params_names: diff --git a/doubleml/tests/test_evaluate_learner.py b/doubleml/tests/test_evaluate_learner.py index 51af25f60..4b3056b89 100644 --- a/doubleml/tests/test_evaluate_learner.py +++ b/doubleml/tests/test_evaluate_learner.py @@ -7,6 +7,8 @@ from sklearn.linear_model import LogisticRegression, LinearRegression from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor +from doubleml.utils._estimation import _logloss + np.random.seed(3141) data = make_irm_data(theta=0.5, n_obs=200, dim_x=5, return_type='DataFrame') @@ -47,26 +49,27 @@ def dml_irm_eval_learner_fixture(learner, trimming_threshold, n_rep): n_rep=n_rep, trimming_threshold=trimming_threshold) dml_irm_obj.fit() - res_manual = dml_irm_obj.evaluate_learners() + res_manual = dml_irm_obj.evaluate_learners(learners=['ml_g0', 'ml_g1']) + res_manual['ml_m'] = dml_irm_obj.evaluate_learners(learners=['ml_m'], metric=_logloss)['ml_m'] - res_dict = {'rmses': dml_irm_obj.rmses, - 'rmses_manual': res_manual + res_dict = {'nuisance_loss': dml_irm_obj.nuisance_loss, + 'nuisance_loss_manual': res_manual } return res_dict @pytest.mark.ci def test_dml_irm_eval_learner(dml_irm_eval_learner_fixture, n_rep): - assert dml_irm_eval_learner_fixture['rmses_manual']['ml_g0'].shape == (n_rep, 1) - assert dml_irm_eval_learner_fixture['rmses_manual']['ml_g1'].shape == (n_rep, 1) - assert dml_irm_eval_learner_fixture['rmses_manual']['ml_m'].shape == (n_rep, 1) + assert dml_irm_eval_learner_fixture['nuisance_loss_manual']['ml_g0'].shape == (n_rep, 1) + assert dml_irm_eval_learner_fixture['nuisance_loss_manual']['ml_g1'].shape == (n_rep, 1) + assert dml_irm_eval_learner_fixture['nuisance_loss_manual']['ml_m'].shape == (n_rep, 1) - assert np.allclose(dml_irm_eval_learner_fixture['rmses_manual']['ml_g0'], - dml_irm_eval_learner_fixture['rmses']['ml_g0'], + assert np.allclose(dml_irm_eval_learner_fixture['nuisance_loss_manual']['ml_g0'], + dml_irm_eval_learner_fixture['nuisance_loss']['ml_g0'], rtol=1e-9, atol=1e-4) - assert np.allclose(dml_irm_eval_learner_fixture['rmses_manual']['ml_g1'], - dml_irm_eval_learner_fixture['rmses']['ml_g1'], + assert np.allclose(dml_irm_eval_learner_fixture['nuisance_loss_manual']['ml_g1'], + dml_irm_eval_learner_fixture['nuisance_loss']['ml_g1'], rtol=1e-9, atol=1e-4) - assert np.allclose(dml_irm_eval_learner_fixture['rmses_manual']['ml_m'], - dml_irm_eval_learner_fixture['rmses']['ml_m'], + assert np.allclose(dml_irm_eval_learner_fixture['nuisance_loss_manual']['ml_m'], + dml_irm_eval_learner_fixture['nuisance_loss']['ml_m'], rtol=1e-9, atol=1e-4) diff --git a/doubleml/tests/test_return_types.py b/doubleml/tests/test_return_types.py index 1d03439a4..45aae6d9b 100644 --- a/doubleml/tests/test_return_types.py +++ b/doubleml/tests/test_return_types.py @@ -349,50 +349,50 @@ def test_stored_nuisance_targets(): @pytest.mark.ci -def test_rmses(): - assert plr_obj.rmses['ml_l'].shape == (n_rep, n_treat) - assert plr_obj.rmses['ml_m'].shape == (n_rep, n_treat) - - assert pliv_obj.rmses['ml_l'].shape == (n_rep, n_treat) - assert pliv_obj.rmses['ml_m'].shape == (n_rep, n_treat) - assert pliv_obj.rmses['ml_r'].shape == (n_rep, n_treat) - - assert irm_obj.rmses['ml_g0'].shape == (n_rep, n_treat) - assert irm_obj.rmses['ml_g1'].shape == (n_rep, n_treat) - assert irm_obj.rmses['ml_m'].shape == (n_rep, n_treat) - - assert iivm_obj.rmses['ml_g0'].shape == (n_rep, n_treat) - assert iivm_obj.rmses['ml_g1'].shape == (n_rep, n_treat) - assert iivm_obj.rmses['ml_m'].shape == (n_rep, n_treat) - assert iivm_obj.rmses['ml_r0'].shape == (n_rep, n_treat) - assert iivm_obj.rmses['ml_r1'].shape == (n_rep, n_treat) - - assert cvar_obj.rmses['ml_g'].shape == (n_rep, n_treat) - assert cvar_obj.rmses['ml_m'].shape == (n_rep, n_treat) - - assert pq_obj.rmses['ml_g'].shape == (n_rep, n_treat) - assert pq_obj.rmses['ml_m'].shape == (n_rep, n_treat) - - assert lpq_obj.rmses['ml_g_du_z0'].shape == (n_rep, n_treat) - assert lpq_obj.rmses['ml_g_du_z1'].shape == (n_rep, n_treat) - assert lpq_obj.rmses['ml_m_z'].shape == (n_rep, n_treat) - assert lpq_obj.rmses['ml_m_d_z0'].shape == (n_rep, n_treat) - assert lpq_obj.rmses['ml_m_d_z1'].shape == (n_rep, n_treat) - - assert did_obj.rmses['ml_g0'].shape == (n_rep, n_treat) - assert did_obj.rmses['ml_g1'].shape == (n_rep, n_treat) - assert did_obj.rmses['ml_m'].shape == (n_rep, n_treat) - - assert did_cs_obj.rmses['ml_g_d0_t0'].shape == (n_rep, n_treat) - assert did_cs_obj.rmses['ml_g_d0_t1'].shape == (n_rep, n_treat) - assert did_cs_obj.rmses['ml_g_d1_t0'].shape == (n_rep, n_treat) - assert did_cs_obj.rmses['ml_g_d1_t1'].shape == (n_rep, n_treat) - assert did_cs_obj.rmses['ml_m'].shape == (n_rep, n_treat) - - assert ssm_obj.rmses['ml_g_d0'].shape == (n_rep, n_treat) - assert ssm_obj.rmses['ml_g_d1'].shape == (n_rep, n_treat) - assert ssm_obj.rmses['ml_m'].shape == (n_rep, n_treat) - assert ssm_obj.rmses['ml_pi'].shape == (n_rep, n_treat) +def test_nuisance_loss(): + assert plr_obj.nuisance_loss['ml_l'].shape == (n_rep, n_treat) + assert plr_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat) + + assert pliv_obj.nuisance_loss['ml_l'].shape == (n_rep, n_treat) + assert pliv_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat) + assert pliv_obj.nuisance_loss['ml_r'].shape == (n_rep, n_treat) + + assert irm_obj.nuisance_loss['ml_g0'].shape == (n_rep, n_treat) + assert irm_obj.nuisance_loss['ml_g1'].shape == (n_rep, n_treat) + assert irm_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat) + + assert iivm_obj.nuisance_loss['ml_g0'].shape == (n_rep, n_treat) + assert iivm_obj.nuisance_loss['ml_g1'].shape == (n_rep, n_treat) + assert iivm_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat) + assert iivm_obj.nuisance_loss['ml_r0'].shape == (n_rep, n_treat) + assert iivm_obj.nuisance_loss['ml_r1'].shape == (n_rep, n_treat) + + assert cvar_obj.nuisance_loss['ml_g'].shape == (n_rep, n_treat) + assert cvar_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat) + + assert pq_obj.nuisance_loss['ml_g'].shape == (n_rep, n_treat) + assert pq_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat) + + assert lpq_obj.nuisance_loss['ml_g_du_z0'].shape == (n_rep, n_treat) + assert lpq_obj.nuisance_loss['ml_g_du_z1'].shape == (n_rep, n_treat) + assert lpq_obj.nuisance_loss['ml_m_z'].shape == (n_rep, n_treat) + assert lpq_obj.nuisance_loss['ml_m_d_z0'].shape == (n_rep, n_treat) + assert lpq_obj.nuisance_loss['ml_m_d_z1'].shape == (n_rep, n_treat) + + assert did_obj.nuisance_loss['ml_g0'].shape == (n_rep, n_treat) + assert did_obj.nuisance_loss['ml_g1'].shape == (n_rep, n_treat) + assert did_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat) + + assert did_cs_obj.nuisance_loss['ml_g_d0_t0'].shape == (n_rep, n_treat) + assert did_cs_obj.nuisance_loss['ml_g_d0_t1'].shape == (n_rep, n_treat) + assert did_cs_obj.nuisance_loss['ml_g_d1_t0'].shape == (n_rep, n_treat) + assert did_cs_obj.nuisance_loss['ml_g_d1_t1'].shape == (n_rep, n_treat) + assert did_cs_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat) + + assert ssm_obj.nuisance_loss['ml_g_d0'].shape == (n_rep, n_treat) + assert ssm_obj.nuisance_loss['ml_g_d1'].shape == (n_rep, n_treat) + assert ssm_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat) + assert ssm_obj.nuisance_loss['ml_pi'].shape == (n_rep, n_treat) @pytest.mark.ci diff --git a/doubleml/utils/_estimation.py b/doubleml/utils/_estimation.py index 7892caf1d..42de8c639 100644 --- a/doubleml/utils/_estimation.py +++ b/doubleml/utils/_estimation.py @@ -6,7 +6,7 @@ from sklearn.base import clone from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import KFold, GridSearchCV, RandomizedSearchCV -from sklearn.metrics import root_mean_squared_error +from sklearn.metrics import root_mean_squared_error, log_loss from statsmodels.nonparametric.kde import KDEUnivariate @@ -204,6 +204,12 @@ def _rmse(y_true, y_pred): return rmse +def _logloss(y_true, y_pred): + subset = np.logical_not(np.isnan(y_true)) + logloss = log_loss(y_true[subset], y_pred[subset]) + return logloss + + def _predict_zero_one_propensity(learner, X): pred_proba = learner.predict_proba(X) if pred_proba.shape[1] == 2: