Skip to content
2 changes: 1 addition & 1 deletion doubleml/data/did_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class DoubleMLDIDData(DoubleMLData):
>>> from doubleml.did.datasets import make_did_SZ2020
>>> # initialization from pandas.DataFrame
>>> df = make_did_SZ2020(return_type='DataFrame')
>>> obj_dml_data_from_df = DoubleMLDIDData(df, 'y', 'd', 't')
>>> obj_dml_data_from_df = DoubleMLDIDData(df, 'y', 'd')
>>> # initialization from np.ndarray
>>> (x, y, d, t) = make_did_SZ2020(return_type='array')
>>> obj_dml_data_from_array = DoubleMLDIDData.from_arrays(x, y, d, t=t)
Expand Down
22 changes: 14 additions & 8 deletions doubleml/data/rdd_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,17 @@ class DoubleMLRDDData(DoubleMLData):

Examples
--------
>>> import numpy as np
>>> import pandas as pd
>>> from doubleml import DoubleMLRDDData
>>> from doubleml.rdd.datasets import make_rdd_data
>>> from doubleml.rdd.datasets import make_simple_rdd_data
>>> # initialization from pandas.DataFrame
>>> df = make_rdd_data(return_type='DataFrame')
>>> obj_dml_data_from_df = DoubleMLRDDData(df, 'y', 'd', 's')
>>> data = make_simple_rdd_data(return_type='DataFrame')
>>> columns = ["y", "d", "score"] + ["x" + str(i) for i in range(data["X"].shape[1])]
>>> df = pd.DataFrame(np.column_stack((data["Y"], data["D"], data["score"], data["X"])), columns=columns)
>>> obj_dml_data_from_df = DoubleMLRDDData(df, 'y', 'd', score_col='s')
>>> # initialization from np.ndarray
>>> (x, y, d, s) = make_rdd_data(return_type='array')
>>> obj_dml_data_from_array = DoubleMLRDDData.from_arrays(x, y, d, s=s)
>>> obj_dml_data_from_array = DoubleMLRDDData.from_arrays(data["X"], data["Y"], data["D"], score=data["score"])
"""

def __init__(
Expand Down Expand Up @@ -160,10 +163,13 @@ def from_arrays(

Examples
--------
>>> import numpy as np
>>> import pandas as pd
>>> from doubleml import DoubleMLRDDData
>>> from doubleml.rdd.datasets import make_rdd_data
>>> (x, y, d, s) = make_rdd_data(return_type='array')
>>> obj_dml_data_from_array = DoubleMLRDDData.from_arrays(x, y, d, s=s)
>>> from doubleml.rdd.datasets import make_simple_rdd_data
>>> # initialization from pandas.DataFrame
>>> data = make_simple_rdd_data(return_type='DataFrame')
>>> obj_dml_data_from_array = DoubleMLRDDData.from_arrays(data["X"], data["Y"], data["D"], score=data["score"])
"""
# Prepare score variable
score = check_array(score, ensure_2d=False, allow_nd=False)
Expand Down
6 changes: 3 additions & 3 deletions doubleml/data/ssm_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ class DoubleMLSSMData(DoubleMLData):
>>> from doubleml.irm.datasets import make_ssm_data
>>> # initialization from pandas.DataFrame
>>> df = make_ssm_data(return_type='DataFrame')
>>> obj_dml_data_from_df = DoubleMLSSMData(df, 'y', 'd', 's')
>>> obj_dml_data_from_df = DoubleMLSSMData(df, 'y', 'd', s_col='s')
>>> # initialization from np.ndarray
>>> (x, y, d, s) = make_ssm_data(return_type='array')
>>> (x, y, d, _, s) = make_ssm_data(return_type='array')
>>> obj_dml_data_from_array = DoubleMLSSMData.from_arrays(x, y, d, s=s)
"""

Expand Down Expand Up @@ -186,7 +186,7 @@ def from_arrays(
--------
>>> from doubleml import DoubleMLSSMData
>>> from doubleml.irm.datasets import make_ssm_data
>>> (x, y, d, s) = make_ssm_data(return_type='array')
>>> (x, y, d, _, s) = make_ssm_data(return_type='array')
>>> obj_dml_data_from_array = DoubleMLSSMData.from_arrays(x, y, d, s=s)
"""
# Prepare selection variable
Expand Down
5 changes: 3 additions & 2 deletions doubleml/did/did.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,9 @@ class DoubleMLDID(LinearScoreMixin, DoubleML):
>>> obj_dml_data = dml.DoubleMLDIDData(data, 'y', 'd')
>>> dml_did_obj = dml.DoubleMLDID(obj_dml_data, ml_g, ml_m)
>>> dml_did_obj.fit().summary
coef std err t P>|t| 2.5 % 97.5 %
d -2.685104 1.798071 -1.493325 0.135352 -6.209257 0.83905
coef std err t P>|t| 2.5 % 97.5 %
d -2.840718 1.760386 -1.613691 0.106595 -6.291011 0.609575

"""

def __init__(
Expand Down
4 changes: 2 additions & 2 deletions doubleml/did/did_cs.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ class DoubleMLDIDCS(LinearScoreMixin, DoubleML):
>>> obj_dml_data = dml.DoubleMLDIDData(data, 'y', 'd', t_col='t')
>>> dml_did_obj = dml.DoubleMLDIDCS(obj_dml_data, ml_g, ml_m)
>>> dml_did_obj.fit().summary
coef std err t P>|t| 2.5 % 97.5 %
d -6.604603 8.725802 -0.756905 0.449107 -23.706862 10.497655
coef std err t P>|t| 2.5 % 97.5 %
d -4.9944 7.561785 -0.660479 0.508947 -19.815226 9.826426
"""

def __init__(
Expand Down
17 changes: 16 additions & 1 deletion doubleml/did/did_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,22 @@ class DoubleMLDIDMulti:
... gt_combinations="standard",
... control_group="never_treated",
... )
>>> print(dml_did_obj.fit())
>>> print(dml_did_obj.fit().summary)
coef std err ... 2.5 % 97.5 %
ATT(2025-03,2025-01,2025-02) -0.797617 0.459617 ... -1.698450 0.103215
ATT(2025-03,2025-02,2025-03) 0.270311 0.456453 ... -0.624320 1.164941
ATT(2025-03,2025-02,2025-04) 0.628213 0.895275 ... -1.126494 2.382919
ATT(2025-03,2025-02,2025-05) 1.281360 1.327121 ... -1.319750 3.882470
ATT(2025-04,2025-01,2025-02) -0.078095 0.407758 ... -0.877287 0.721097
ATT(2025-04,2025-02,2025-03) 0.223625 0.479288 ... -0.715764 1.163013
ATT(2025-04,2025-03,2025-04) 1.008674 0.455564 ... 0.115785 1.901563
ATT(2025-04,2025-03,2025-05) 2.941047 0.832991 ... 1.308415 4.573679
ATT(2025-05,2025-01,2025-02) -0.102282 0.454129 ... -0.992359 0.787795
ATT(2025-05,2025-02,2025-03) 0.108742 0.547794 ... -0.964914 1.182399
ATT(2025-05,2025-03,2025-04) 0.253610 0.422984 ... -0.575423 1.082643
ATT(2025-05,2025-04,2025-05) 1.264255 0.487934 ... 0.307923 2.220587
<BLANKLINE>
[12 rows x 6 columns]
"""

def __init__(
Expand Down
10 changes: 4 additions & 6 deletions doubleml/double_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -1207,14 +1207,12 @@ def evaluate_learners(self, learners=None, metric=_rmse):
>>> data = make_irm_data(theta=0.5, n_obs=500, dim_x=20, return_type='DataFrame')
>>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd')
>>> dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_g, ml_m)
>>> dml_irm_obj.fit()
>>> _ = dml_irm_obj.fit()
>>> def mae(y_true, y_pred):
>>> subset = np.logical_not(np.isnan(y_true))
>>> return mean_absolute_error(y_true[subset], y_pred[subset])
... subset = np.logical_not(np.isnan(y_true))
... return mean_absolute_error(y_true[subset], y_pred[subset])
>>> dml_irm_obj.evaluate_learners(metric=mae)
{'ml_g0': array([[0.85974356]]),
'ml_g1': array([[0.85280376]]),
'ml_m': array([[0.35365143]])}
{'ml_g0': array([[0.88173585]]), 'ml_g1': array([[0.83854057]]), 'ml_m': array([[0.35871235]])}
"""
# if no learners are provided try to evaluate all learners
if learners is None:
Expand Down
17 changes: 8 additions & 9 deletions doubleml/double_ml_sampling_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,19 +91,18 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None):
>>> ml_m = learner
>>> obj_dml_data = make_plr_CCDDHNR2018(n_obs=10, alpha=0.5)
>>> dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m)
>>> # simple sample splitting with two folds and without cross-fitting
>>> smpls = ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9])
>>> dml_plr_obj.set_sample_splitting(smpls)
>>> # sample splitting with two folds and cross-fitting
>>> smpls = [([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]),
>>> ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])]
>>> dml_plr_obj.set_sample_splitting(smpls)
... ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])]
>>> dml_plr_obj.set_sample_splitting(smpls) # doctest: +ELLIPSIS
<doubleml.plm.plr.DoubleMLPLR object at 0x...>
>>> # sample splitting with two folds and repeated cross-fitting with n_rep = 2
>>> smpls = [[([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]),
>>> ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
>>> [([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]),
>>> ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])]]
>>> dml_plr_obj.set_sample_splitting(smpls)
... ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
... [([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]),
... ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])]]
>>> dml_plr_obj.set_sample_splitting(smpls) # doctest: +ELLIPSIS
<doubleml.plm.plr.DoubleMLPLR object at 0x...>
"""
self._smpls, self._smpls_cluster, self._n_rep, self._n_folds = _check_sample_splitting(
all_smpls, all_smpls_cluster, self._dml_data, self._is_cluster_data, n_obs=self._n_obs_sample_splitting
Expand Down
5 changes: 3 additions & 2 deletions doubleml/irm/cvar.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,9 @@ class DoubleMLCVAR(LinearScoreMixin, DoubleML):
>>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd')
>>> dml_cvar_obj = dml.DoubleMLCVAR(obj_dml_data, ml_g, ml_m, treatment=1, quantile=0.5)
>>> dml_cvar_obj.fit().summary
coef std err t P>|t| 2.5 % 97.5 %
d 1.591441 0.095781 16.615498 5.382582e-62 1.403715 1.779167
coef std err t P>|t| 2.5 % 97.5 %
d 1.588364 0.096616 16.43989 9.909942e-61 1.398999 1.777728

"""

def __init__(
Expand Down
2 changes: 1 addition & 1 deletion doubleml/irm/iivm.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ class DoubleMLIIVM(LinearScoreMixin, DoubleML):
>>> dml_iivm_obj = dml.DoubleMLIIVM(obj_dml_data, ml_g, ml_m, ml_r)
>>> dml_iivm_obj.fit().summary
coef std err t P>|t| 2.5 % 97.5 %
d 0.378351 0.190648 1.984551 0.047194 0.004688 0.752015
d 0.362398 0.191578 1.891649 0.058538 -0.013088 0.737884

Notes
-----
Expand Down
2 changes: 1 addition & 1 deletion doubleml/irm/irm.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class DoubleMLIRM(LinearScoreMixin, DoubleML):
>>> dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_g, ml_m)
>>> dml_irm_obj.fit().summary
coef std err t P>|t| 2.5 % 97.5 %
d 0.414073 0.238529 1.735941 0.082574 -0.053436 0.881581
d 0.371972 0.206802 1.798685 0.072069 -0.033353 0.777297

Notes
-----
Expand Down
8 changes: 4 additions & 4 deletions doubleml/irm/ssm.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class DoubleMLSSM(LinearScoreMixin, DoubleML):
>>> import numpy as np
>>> import doubleml as dml
>>> from doubleml import DoubleMLSSMData
>>> from sklearn.linear_model import LassoCV, LogisticRegressionCV()
>>> from sklearn.linear_model import LassoCV, LogisticRegressionCV
>>> from sklearn.base import clone
>>> np.random.seed(3146)
>>> n = 2000
Expand All @@ -88,10 +88,10 @@ class DoubleMLSSM(LinearScoreMixin, DoubleML):
>>> ml_g_sim = clone(learner)
>>> ml_pi_sim = clone(learner_class)
>>> ml_m_sim = clone(learner_class)
>>> obj_dml_sim = DoubleMLS(simul_data, ml_g_sim, ml_pi_sim, ml_m_sim)
>>> obj_dml_sim = DoubleMLSSM(simul_data, ml_g_sim, ml_pi_sim, ml_m_sim)
>>> obj_dml_sim.fit().summary
coef std err t P>|t| 2.5 % 97.5 %
d 0.49135 0.070534 6.966097 3.258541e-12 0.353105 0.629595
coef std err t P>|t| 2.5 % 97.5 %
d 0.518517 0.065535 7.912033 2.532202e-15 0.39007 0.646963

Notes
-----
Expand Down
4 changes: 2 additions & 2 deletions doubleml/plm/pliv.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ class DoubleMLPLIV(LinearScoreMixin, DoubleML):
>>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd', z_cols='Z1')
>>> dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, ml_l, ml_m, ml_r)
>>> dml_pliv_obj.fit().summary
coef std err t P>|t| 2.5 % 97.5 %
d 0.522753 0.082263 6.354688 2.088504e-10 0.361521 0.683984
coef std err t P>|t| 2.5 % 97.5 %
d 0.511722 0.087184 5.869427 4.373034e-09 0.340844 0.6826

Notes
-----
Expand Down
2 changes: 1 addition & 1 deletion doubleml/plm/plr.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class DoubleMLPLR(LinearScoreMixin, DoubleML):
>>> dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m)
>>> dml_plr_obj.fit().summary
coef std err t P>|t| 2.5 % 97.5 %
d 0.482251 0.040629 11.869585 1.703108e-32 0.402619 0.561883
d 0.480691 0.040533 11.859129 1.929729e-32 0.401247 0.560135

Notes
-----
Expand Down
10 changes: 7 additions & 3 deletions doubleml/rdd/rdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,16 +86,20 @@ class RDFlex:
... x=data_dict["X"],
... y=data_dict["Y"],
... d=data_dict["D"],
... s=data_dict["score"]
... score=data_dict["score"]
... )
>>> ml_g = RandomForestRegressor()
>>> ml_m = RandomForestClassifier()
>>> rdflex_obj = dml.rdd.RDFlex(obj_dml_data, ml_g, ml_m, fuzzy=True)
>>> print(rdflex_obj.fit())
Method Coef. S.E. t-stat P>|t| 95% CI
-------------------------------------------------------------------------
Conventional 0.935 0.220 4.244 2.196e-05 [0.503, 1.367]
Robust - - 3.635 2.785e-04 [0.418, 1.396]
Conventional 0.950 0.225 4.230 2.333e-05 [0.510, 1.391]
Robust - - 3.653 2.589e-04 [0.431, 1.429]
Design Type: Fuzzy
Cutoff: 0
First Stage Kernel: triangular
Final Bandwidth: [0.74746872]

"""

Expand Down
21 changes: 16 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,22 @@ dev = [
"pre-commit>=4.2.0",
]

[project.urls]
Documentation = "https://docs.doubleml.org"
Source = "https://github.com/DoubleML/doubleml-for-py"
"Bug Tracker" = "https://github.com/DoubleML/doubleml-for-py/issues"

[tool.pytest.ini_options]
addopts = [
"--doctest-modules",
"--doctest-ignore-import-errors"
]
doctest_optionflags = [
"NORMALIZE_WHITESPACE",
"IGNORE_EXCEPTION_DETAIL",
"ELLIPSIS"
]

[tool.black]
line-length = 127
target-version = ['py39', 'py310', 'py311', 'py312']
Expand Down Expand Up @@ -79,8 +95,3 @@ ignore = [
# isinstance checks
"E721",
]

[project.urls]
Documentation = "https://docs.doubleml.org"
Source = "https://github.com/DoubleML/doubleml-for-py"
"Bug Tracker" = "https://github.com/DoubleML/doubleml-for-py/issues"