diff --git a/doubleml/data/did_data.py b/doubleml/data/did_data.py index 1554e4bc..57d486a3 100644 --- a/doubleml/data/did_data.py +++ b/doubleml/data/did_data.py @@ -63,7 +63,7 @@ class DoubleMLDIDData(DoubleMLData): >>> from doubleml.did.datasets import make_did_SZ2020 >>> # initialization from pandas.DataFrame >>> df = make_did_SZ2020(return_type='DataFrame') - >>> obj_dml_data_from_df = DoubleMLDIDData(df, 'y', 'd', 't') + >>> obj_dml_data_from_df = DoubleMLDIDData(df, 'y', 'd') >>> # initialization from np.ndarray >>> (x, y, d, t) = make_did_SZ2020(return_type='array') >>> obj_dml_data_from_array = DoubleMLDIDData.from_arrays(x, y, d, t=t) diff --git a/doubleml/data/rdd_data.py b/doubleml/data/rdd_data.py index 6bf4a830..203c3294 100644 --- a/doubleml/data/rdd_data.py +++ b/doubleml/data/rdd_data.py @@ -61,14 +61,17 @@ class DoubleMLRDDData(DoubleMLData): Examples -------- + >>> import numpy as np + >>> import pandas as pd >>> from doubleml import DoubleMLRDDData - >>> from doubleml.rdd.datasets import make_rdd_data + >>> from doubleml.rdd.datasets import make_simple_rdd_data >>> # initialization from pandas.DataFrame - >>> df = make_rdd_data(return_type='DataFrame') - >>> obj_dml_data_from_df = DoubleMLRDDData(df, 'y', 'd', 's') + >>> data = make_simple_rdd_data(return_type='DataFrame') + >>> columns = ["y", "d", "score"] + ["x" + str(i) for i in range(data["X"].shape[1])] + >>> df = pd.DataFrame(np.column_stack((data["Y"], data["D"], data["score"], data["X"])), columns=columns) + >>> obj_dml_data_from_df = DoubleMLRDDData(df, 'y', 'd', score_col='s') >>> # initialization from np.ndarray - >>> (x, y, d, s) = make_rdd_data(return_type='array') - >>> obj_dml_data_from_array = DoubleMLRDDData.from_arrays(x, y, d, s=s) + >>> obj_dml_data_from_array = DoubleMLRDDData.from_arrays(data["X"], data["Y"], data["D"], score=data["score"]) """ def __init__( @@ -160,10 +163,13 @@ def from_arrays( Examples -------- + >>> import numpy as np + >>> import pandas as pd >>> from doubleml import DoubleMLRDDData - >>> from doubleml.rdd.datasets import make_rdd_data - >>> (x, y, d, s) = make_rdd_data(return_type='array') - >>> obj_dml_data_from_array = DoubleMLRDDData.from_arrays(x, y, d, s=s) + >>> from doubleml.rdd.datasets import make_simple_rdd_data + >>> # initialization from pandas.DataFrame + >>> data = make_simple_rdd_data(return_type='DataFrame') + >>> obj_dml_data_from_array = DoubleMLRDDData.from_arrays(data["X"], data["Y"], data["D"], score=data["score"]) """ # Prepare score variable score = check_array(score, ensure_2d=False, allow_nd=False) diff --git a/doubleml/data/ssm_data.py b/doubleml/data/ssm_data.py index 2785821a..a8797834 100644 --- a/doubleml/data/ssm_data.py +++ b/doubleml/data/ssm_data.py @@ -66,9 +66,9 @@ class DoubleMLSSMData(DoubleMLData): >>> from doubleml.irm.datasets import make_ssm_data >>> # initialization from pandas.DataFrame >>> df = make_ssm_data(return_type='DataFrame') - >>> obj_dml_data_from_df = DoubleMLSSMData(df, 'y', 'd', 's') + >>> obj_dml_data_from_df = DoubleMLSSMData(df, 'y', 'd', s_col='s') >>> # initialization from np.ndarray - >>> (x, y, d, s) = make_ssm_data(return_type='array') + >>> (x, y, d, _, s) = make_ssm_data(return_type='array') >>> obj_dml_data_from_array = DoubleMLSSMData.from_arrays(x, y, d, s=s) """ @@ -186,7 +186,7 @@ def from_arrays( -------- >>> from doubleml import DoubleMLSSMData >>> from doubleml.irm.datasets import make_ssm_data - >>> (x, y, d, s) = make_ssm_data(return_type='array') + >>> (x, y, d, _, s) = make_ssm_data(return_type='array') >>> obj_dml_data_from_array = DoubleMLSSMData.from_arrays(x, y, d, s=s) """ # Prepare selection variable diff --git a/doubleml/did/did.py b/doubleml/did/did.py index 1e56ccd8..62c75a42 100644 --- a/doubleml/did/did.py +++ b/doubleml/did/did.py @@ -75,8 +75,9 @@ class DoubleMLDID(LinearScoreMixin, DoubleML): >>> obj_dml_data = dml.DoubleMLDIDData(data, 'y', 'd') >>> dml_did_obj = dml.DoubleMLDID(obj_dml_data, ml_g, ml_m) >>> dml_did_obj.fit().summary - coef std err t P>|t| 2.5 % 97.5 % - d -2.685104 1.798071 -1.493325 0.135352 -6.209257 0.83905 + coef std err t P>|t| 2.5 % 97.5 % + d -2.840718 1.760386 -1.613691 0.106595 -6.291011 0.609575 + """ def __init__( diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py index 7cba006e..354fa666 100644 --- a/doubleml/did/did_cs.py +++ b/doubleml/did/did_cs.py @@ -74,8 +74,8 @@ class DoubleMLDIDCS(LinearScoreMixin, DoubleML): >>> obj_dml_data = dml.DoubleMLDIDData(data, 'y', 'd', t_col='t') >>> dml_did_obj = dml.DoubleMLDIDCS(obj_dml_data, ml_g, ml_m) >>> dml_did_obj.fit().summary - coef std err t P>|t| 2.5 % 97.5 % - d -6.604603 8.725802 -0.756905 0.449107 -23.706862 10.497655 + coef std err t P>|t| 2.5 % 97.5 % + d -4.9944 7.561785 -0.660479 0.508947 -19.815226 9.826426 """ def __init__( diff --git a/doubleml/did/did_multi.py b/doubleml/did/did_multi.py index 972f6e64..66e7f837 100644 --- a/doubleml/did/did_multi.py +++ b/doubleml/did/did_multi.py @@ -134,7 +134,22 @@ class DoubleMLDIDMulti: ... gt_combinations="standard", ... control_group="never_treated", ... ) - >>> print(dml_did_obj.fit()) + >>> print(dml_did_obj.fit().summary) + coef std err ... 2.5 % 97.5 % + ATT(2025-03,2025-01,2025-02) -0.797617 0.459617 ... -1.698450 0.103215 + ATT(2025-03,2025-02,2025-03) 0.270311 0.456453 ... -0.624320 1.164941 + ATT(2025-03,2025-02,2025-04) 0.628213 0.895275 ... -1.126494 2.382919 + ATT(2025-03,2025-02,2025-05) 1.281360 1.327121 ... -1.319750 3.882470 + ATT(2025-04,2025-01,2025-02) -0.078095 0.407758 ... -0.877287 0.721097 + ATT(2025-04,2025-02,2025-03) 0.223625 0.479288 ... -0.715764 1.163013 + ATT(2025-04,2025-03,2025-04) 1.008674 0.455564 ... 0.115785 1.901563 + ATT(2025-04,2025-03,2025-05) 2.941047 0.832991 ... 1.308415 4.573679 + ATT(2025-05,2025-01,2025-02) -0.102282 0.454129 ... -0.992359 0.787795 + ATT(2025-05,2025-02,2025-03) 0.108742 0.547794 ... -0.964914 1.182399 + ATT(2025-05,2025-03,2025-04) 0.253610 0.422984 ... -0.575423 1.082643 + ATT(2025-05,2025-04,2025-05) 1.264255 0.487934 ... 0.307923 2.220587 + + [12 rows x 6 columns] """ def __init__( diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index 4fbf0bd3..8e7de835 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -1207,14 +1207,12 @@ def evaluate_learners(self, learners=None, metric=_rmse): >>> data = make_irm_data(theta=0.5, n_obs=500, dim_x=20, return_type='DataFrame') >>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd') >>> dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_g, ml_m) - >>> dml_irm_obj.fit() + >>> _ = dml_irm_obj.fit() >>> def mae(y_true, y_pred): - >>> subset = np.logical_not(np.isnan(y_true)) - >>> return mean_absolute_error(y_true[subset], y_pred[subset]) + ... subset = np.logical_not(np.isnan(y_true)) + ... return mean_absolute_error(y_true[subset], y_pred[subset]) >>> dml_irm_obj.evaluate_learners(metric=mae) - {'ml_g0': array([[0.85974356]]), - 'ml_g1': array([[0.85280376]]), - 'ml_m': array([[0.35365143]])} + {'ml_g0': array([[0.88173585]]), 'ml_g1': array([[0.83854057]]), 'ml_m': array([[0.35871235]])} """ # if no learners are provided try to evaluate all learners if learners is None: diff --git a/doubleml/double_ml_sampling_mixins.py b/doubleml/double_ml_sampling_mixins.py index bd9d0c13..d7d8b2e1 100644 --- a/doubleml/double_ml_sampling_mixins.py +++ b/doubleml/double_ml_sampling_mixins.py @@ -91,19 +91,18 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None): >>> ml_m = learner >>> obj_dml_data = make_plr_CCDDHNR2018(n_obs=10, alpha=0.5) >>> dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m) - >>> # simple sample splitting with two folds and without cross-fitting - >>> smpls = ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]) - >>> dml_plr_obj.set_sample_splitting(smpls) >>> # sample splitting with two folds and cross-fitting >>> smpls = [([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]), - >>> ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])] - >>> dml_plr_obj.set_sample_splitting(smpls) + ... ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])] + >>> dml_plr_obj.set_sample_splitting(smpls) # doctest: +ELLIPSIS + >>> # sample splitting with two folds and repeated cross-fitting with n_rep = 2 >>> smpls = [[([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]), - >>> ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])], - >>> [([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]), - >>> ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])]] - >>> dml_plr_obj.set_sample_splitting(smpls) + ... ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])], + ... [([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]), + ... ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])]] + >>> dml_plr_obj.set_sample_splitting(smpls) # doctest: +ELLIPSIS + """ self._smpls, self._smpls_cluster, self._n_rep, self._n_folds = _check_sample_splitting( all_smpls, all_smpls_cluster, self._dml_data, self._is_cluster_data, n_obs=self._n_obs_sample_splitting diff --git a/doubleml/irm/cvar.py b/doubleml/irm/cvar.py index dd6e4737..5701c5f2 100644 --- a/doubleml/irm/cvar.py +++ b/doubleml/irm/cvar.py @@ -91,8 +91,9 @@ class DoubleMLCVAR(LinearScoreMixin, DoubleML): >>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd') >>> dml_cvar_obj = dml.DoubleMLCVAR(obj_dml_data, ml_g, ml_m, treatment=1, quantile=0.5) >>> dml_cvar_obj.fit().summary - coef std err t P>|t| 2.5 % 97.5 % - d 1.591441 0.095781 16.615498 5.382582e-62 1.403715 1.779167 + coef std err t P>|t| 2.5 % 97.5 % + d 1.588364 0.096616 16.43989 9.909942e-61 1.398999 1.777728 + """ def __init__( diff --git a/doubleml/irm/iivm.py b/doubleml/irm/iivm.py index fbd33a14..54dcdd6a 100644 --- a/doubleml/irm/iivm.py +++ b/doubleml/irm/iivm.py @@ -91,7 +91,7 @@ class DoubleMLIIVM(LinearScoreMixin, DoubleML): >>> dml_iivm_obj = dml.DoubleMLIIVM(obj_dml_data, ml_g, ml_m, ml_r) >>> dml_iivm_obj.fit().summary coef std err t P>|t| 2.5 % 97.5 % - d 0.378351 0.190648 1.984551 0.047194 0.004688 0.752015 + d 0.362398 0.191578 1.891649 0.058538 -0.013088 0.737884 Notes ----- diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py index f9d8271f..5e2d693b 100644 --- a/doubleml/irm/irm.py +++ b/doubleml/irm/irm.py @@ -94,7 +94,7 @@ class DoubleMLIRM(LinearScoreMixin, DoubleML): >>> dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_g, ml_m) >>> dml_irm_obj.fit().summary coef std err t P>|t| 2.5 % 97.5 % - d 0.414073 0.238529 1.735941 0.082574 -0.053436 0.881581 + d 0.371972 0.206802 1.798685 0.072069 -0.033353 0.777297 Notes ----- diff --git a/doubleml/irm/ssm.py b/doubleml/irm/ssm.py index e8570b81..7e27b52a 100644 --- a/doubleml/irm/ssm.py +++ b/doubleml/irm/ssm.py @@ -67,7 +67,7 @@ class DoubleMLSSM(LinearScoreMixin, DoubleML): >>> import numpy as np >>> import doubleml as dml >>> from doubleml import DoubleMLSSMData - >>> from sklearn.linear_model import LassoCV, LogisticRegressionCV() + >>> from sklearn.linear_model import LassoCV, LogisticRegressionCV >>> from sklearn.base import clone >>> np.random.seed(3146) >>> n = 2000 @@ -88,10 +88,10 @@ class DoubleMLSSM(LinearScoreMixin, DoubleML): >>> ml_g_sim = clone(learner) >>> ml_pi_sim = clone(learner_class) >>> ml_m_sim = clone(learner_class) - >>> obj_dml_sim = DoubleMLS(simul_data, ml_g_sim, ml_pi_sim, ml_m_sim) + >>> obj_dml_sim = DoubleMLSSM(simul_data, ml_g_sim, ml_pi_sim, ml_m_sim) >>> obj_dml_sim.fit().summary - coef std err t P>|t| 2.5 % 97.5 % - d 0.49135 0.070534 6.966097 3.258541e-12 0.353105 0.629595 + coef std err t P>|t| 2.5 % 97.5 % + d 0.518517 0.065535 7.912033 2.532202e-15 0.39007 0.646963 Notes ----- diff --git a/doubleml/plm/pliv.py b/doubleml/plm/pliv.py index 385d5c67..f933ce7d 100644 --- a/doubleml/plm/pliv.py +++ b/doubleml/plm/pliv.py @@ -74,8 +74,8 @@ class DoubleMLPLIV(LinearScoreMixin, DoubleML): >>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd', z_cols='Z1') >>> dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, ml_l, ml_m, ml_r) >>> dml_pliv_obj.fit().summary - coef std err t P>|t| 2.5 % 97.5 % - d 0.522753 0.082263 6.354688 2.088504e-10 0.361521 0.683984 + coef std err t P>|t| 2.5 % 97.5 % + d 0.511722 0.087184 5.869427 4.373034e-09 0.340844 0.6826 Notes ----- diff --git a/doubleml/plm/plr.py b/doubleml/plm/plr.py index db6b5a48..0e29df0d 100644 --- a/doubleml/plm/plr.py +++ b/doubleml/plm/plr.py @@ -70,7 +70,7 @@ class DoubleMLPLR(LinearScoreMixin, DoubleML): >>> dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m) >>> dml_plr_obj.fit().summary coef std err t P>|t| 2.5 % 97.5 % - d 0.482251 0.040629 11.869585 1.703108e-32 0.402619 0.561883 + d 0.480691 0.040533 11.859129 1.929729e-32 0.401247 0.560135 Notes ----- diff --git a/doubleml/rdd/rdd.py b/doubleml/rdd/rdd.py index f9811c9c..0d97ed0a 100644 --- a/doubleml/rdd/rdd.py +++ b/doubleml/rdd/rdd.py @@ -86,7 +86,7 @@ class RDFlex: ... x=data_dict["X"], ... y=data_dict["Y"], ... d=data_dict["D"], - ... s=data_dict["score"] + ... score=data_dict["score"] ... ) >>> ml_g = RandomForestRegressor() >>> ml_m = RandomForestClassifier() @@ -94,8 +94,12 @@ class RDFlex: >>> print(rdflex_obj.fit()) Method Coef. S.E. t-stat P>|t| 95% CI ------------------------------------------------------------------------- - Conventional 0.935 0.220 4.244 2.196e-05 [0.503, 1.367] - Robust - - 3.635 2.785e-04 [0.418, 1.396] + Conventional 0.950 0.225 4.230 2.333e-05 [0.510, 1.391] + Robust - - 3.653 2.589e-04 [0.431, 1.429] + Design Type: Fuzzy + Cutoff: 0 + First Stage Kernel: triangular + Final Bandwidth: [0.74746872] """ diff --git a/pyproject.toml b/pyproject.toml index af7d9aa9..1da863b4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,22 @@ dev = [ "pre-commit>=4.2.0", ] +[project.urls] +Documentation = "https://docs.doubleml.org" +Source = "https://github.com/DoubleML/doubleml-for-py" +"Bug Tracker" = "https://github.com/DoubleML/doubleml-for-py/issues" + +[tool.pytest.ini_options] +addopts = [ + "--doctest-modules", + "--doctest-ignore-import-errors" +] +doctest_optionflags = [ + "NORMALIZE_WHITESPACE", + "IGNORE_EXCEPTION_DETAIL", + "ELLIPSIS" +] + [tool.black] line-length = 127 target-version = ['py39', 'py310', 'py311', 'py312'] @@ -79,8 +95,3 @@ ignore = [ # isinstance checks "E721", ] - -[project.urls] -Documentation = "https://docs.doubleml.org" -Source = "https://github.com/DoubleML/doubleml-for-py" -"Bug Tracker" = "https://github.com/DoubleML/doubleml-for-py/issues"