Skip to content

Commit 06eaa14

Browse files
committed
Merge branch 'main' into 0.9.X
2 parents 433d9c7 + e5308be commit 06eaa14

25 files changed

+203
-170
lines changed

.github/workflows/deploy_pkg.yml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,12 @@ jobs:
2424
- name: Install dependencies
2525
run: |
2626
python -m pip install --upgrade pip
27-
pip install -r requirements.txt
28-
pip install .
27+
pip install --upgrade build
28+
pip install -e .[dev,rdd]
2929
3030
- name: Build package
3131
run: |
32-
pip install wheel
33-
python setup.py sdist bdist_wheel
32+
python -m build
3433
3534
- uses: actions/upload-artifact@v4
3635
with:

.github/workflows/pytest.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,7 @@ jobs:
4444
run: |
4545
python -m pip install --upgrade pip
4646
python -m pip install flake8
47-
pip install -r requirements.txt
48-
pip install -r requirements-dev.txt
49-
pip install .
47+
pip install -e .[dev,rdd]
5048
- name: Lint with flake8
5149
run: |
5250
# stop the build if there are Python syntax errors or undefined names
@@ -59,6 +57,7 @@ jobs:
5957
matrix.config.python-version != '3.9'
6058
run: |
6159
pytest -m ci
60+
pytest -m ci_rdd
6261
6362
- name: Test with pytest and coverage
6463
if: |

CONTRIBUTING.md

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -79,27 +79,21 @@ $ git fetch upstream
7979
$ git merge upstream/main
8080
```
8181

82-
5. Install the **development dependencies** via
83-
```bash
84-
$ pip install -r requirements.txt
85-
$ pip install -r requirements-dev.txt
86-
```
87-
88-
6. **Install DoubleML in editable mode** (more details can be found
82+
5. **Install DoubleML in editable mode** (more details can be found
8983
[here](https://docs.doubleml.org/stable/intro/install.html#python-building-the-package-from-source))
9084
via
9185
```bash
92-
$ pip install --editable .
86+
$ pip install --editable .[dev, rdd]
9387
```
9488

95-
7. **Develop** your code changes. The changes can be added and pushed via
89+
6. **Develop** your code changes. The changes can be added and pushed via
9690
```bash
9791
$ git add your_new_file your_modified_file
9892
$ git commit -m "A commit message which briefly summarizes the changes made"
9993
$ git push origin my_feature_branch
10094
```
10195

102-
8. Generate a **pull request** from your fork.
96+
7. Generate a **pull request** from your fork.
10397
Please follow our guidelines for pull requests.
10498
When opening the PR you will be guided with a checklist.
10599

doubleml/rdd/_utils.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import importlib
2+
3+
4+
def _is_rdrobust_available():
5+
try:
6+
rdrobust = importlib.import_module("rdrobust")
7+
return rdrobust
8+
except ImportError:
9+
msg = (
10+
"rdrobust is not installed. "
11+
"Please install it using 'pip install DoubleML[rdd]'")
12+
raise ImportError(msg)

doubleml/rdd/datasets/simple_dgp.py

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,47 +5,52 @@
55
def make_simple_rdd_data(n_obs=5000, p=4, fuzzy=True, binary_outcome=False, **kwargs):
66
"""
77
Generates synthetic data for a regression discontinuity design (RDD) analysis.
8+
The data generating process is defined as
89
910
.. math::
10-
Y_0 &= g_0 + g_{cov} + \\epsilon_0 \\
11-
Y_1 &= g_1 + g_{cov} + \\epsilon_1 \\
12-
g_0 &= 0.1 \\cdot \\text{score}^2 \\
13-
g_1 &= \tau + 0.1 \\cdot \\text{score}^2 - 0.5 \\cdot \\text{score}^2 \\
14-
g_{cov} &= \\sum_{i=1}^{\text{dim\\_x}} \text{Polynomial}(X_i) \\
15-
\\epsilon_0, \\epsilon_1 &\\sim \\mathcal{N}(0, 0.2^2)
11+
Y_0 &= g_0 + g_{cov} + \\epsilon_0,
12+
13+
Y_1 &= g_1 + g_{cov} + \\epsilon_1,
14+
15+
g_0 &= 0.1 \\cdot \\text{score}^2,
16+
17+
g_1 &= \\tau + 0.1 \\cdot score^2 - 0.5 \\cdot score^2 + a
18+
\\sum_{i=1}^{\\text{dim}_x} X_i \\cdot score,
19+
20+
g_{cov} &= \\sum_{i=1}^{\\text{dim}_x} \\text{Polynomial}(X_i),
21+
22+
with random noise :math:`\\epsilon_0, \\epsilon_1 \\sim \\mathcal{N}(0, 0.2^2)` and :math:`X_i`
23+
being drawn independently from a uniform distribution.
1624
1725
Parameters
1826
----------
1927
n_obs : int
2028
Number of observations to generate. Default is 5000.
2129
2230
p : int
23-
Degree of the polynomial for covariates. Default is 4.
31+
Degree of the polynomial for covariates. Default is 4. If zero, no covariate effect is considered.
2432
2533
fuzzy : bool
2634
If True, generates data for a fuzzy RDD. Default is True.
2735
2836
binary_outcome : bool
29-
If True, generates binary outcomes. Default is False.
37+
If True, generates binary outcomes based on a logistic transformation. Default is False.
3038
3139
**kwargs : Additional keyword arguments.
3240
cutoff : float
3341
The cutoff value for the score. Default is 0.0.
3442
dim_x : int
3543
The number of independent covariates. Default is 3.
3644
a : float
37-
Factor to control interaction of score and covariates to the outcome equation. Default is 0.0.
45+
Factor to control interaction of score and covariates in the outcome equation. Default is 0.0.
3846
tau : float
3947
Parameter to control the true effect in the generated data at the given cutoff. Default is 1.0.
4048
4149
Returns
4250
-------
43-
dict: A dictionary containing the generated data with keys:
44-
'score' (np.ndarray): The running variable.
45-
'X' (np.ndarray): The independent covariates.
46-
'Y0' (np.ndarray): The potential outcomes without treatment.
47-
'Y1' (np.ndarray): The potential outcomes with treatment.
48-
'intended_treatment' (np.ndarray): The intended treatment assignment.
51+
res_dict : dictionary
52+
Dictionary with entries ``score``, ``X``, ``Y``, ``D``, and ``oracle_values``.
53+
The oracle values contain the potential outcomes.
4954
"""
5055

5156
cutoff = kwargs.get('cutoff', 0.0)

doubleml/rdd/rdd.py

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from collections.abc import Callable
55

66
from scipy.stats import norm
7-
from rdrobust import rdrobust, rdbwselect
87

98
from sklearn.base import clone
109
from sklearn.utils.multiclass import type_of_target
@@ -13,6 +12,10 @@
1312
from doubleml.double_ml import DoubleML
1413
from doubleml.utils.resampling import DoubleMLResampling
1514
from doubleml.utils._checks import _check_resampling_specification, _check_supports_sample_weights
15+
from doubleml.rdd._utils import _is_rdrobust_available
16+
17+
# validate optional rdrobust import
18+
rdrobust = _is_rdrobust_available()
1619

1720

1821
class RDFlex():
@@ -30,7 +33,7 @@ class RDFlex():
3033
defined as :math:`\\eta_0(X) = (g_0^{+}(X) + g_0^{-}(X))/2`.
3134
3235
ml_m : classifier implementing ``fit()`` and ``predict_proba()`` or None
33-
A machine learner implementing ``fit()`` and ``predict_proba()`` methods and support ``sample_weights``(e.g.
36+
A machine learner implementing ``fit()`` and ``predict_proba()`` methods and support ``sample_weights`` (e.g.
3437
:py:class:`sklearn.ensemble.RandomForestClassifier`) for the nuisance functions
3538
:math:`m_0^{\\pm}(X) = E[D|\\text{score}=\\text{cutoff}^{\\pm}, X]`. The adjustment function is then
3639
defined as :math:`\\eta_0(X) = (m_0^{+}(X) + m_0^{-}(X))/2`.
@@ -66,17 +69,29 @@ class RDFlex():
6669
Default is ``cutoff``.
6770
6871
fs_kernel : str
69-
Kernel for the first stage estimation. ``uniform``, ``triangular`` and ``epanechnikov``are supported.
72+
Kernel for the first stage estimation. ``uniform``, ``triangular`` and ``epanechnikov`` are supported.
7073
Default is ``triangular``.
7174
7275
**kwargs : kwargs
7376
Key-worded arguments that are not used within RDFlex but directly handed to rdrobust.
7477
7578
Examples
7679
--------
77-
78-
Notes
79-
-----
80+
>>> import numpy as np
81+
>>> import doubleml as dml
82+
>>> from doubleml.rdd.datasets import make_simple_rdd_data
83+
>>> from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
84+
>>> np.random.seed(123)
85+
>>> data_dict = make_simple_rdd_data(fuzzy=True)
86+
>>> obj_dml_data = dml.DoubleMLData.from_arrays(x=data_dict["X"], y=data_dict["Y"], d=data_dict["D"], s=data_dict["score"])
87+
>>> ml_g = RandomForestRegressor()
88+
>>> ml_m = RandomForestClassifier()
89+
>>> rdflex_obj = dml.rdd.RDFlex(obj_dml_data, ml_g, ml_m, fuzzy=True)
90+
>>> print(rdflex_obj.fit())
91+
Method Coef. S.E. t-stat P>|t| 95% CI
92+
-------------------------------------------------------------------------
93+
Conventional 0.935 0.220 4.244 2.196e-05 [0.503, 1.367]
94+
Robust - - 3.635 2.785e-04 [0.418, 1.396]
8095
8196
"""
8297

@@ -112,9 +127,10 @@ def __init__(self,
112127

113128
if h_fs is None:
114129
fuzzy = self._dml_data.d if self._fuzzy else None
115-
self._h_fs = rdbwselect(y=obj_dml_data.y,
116-
x=self._score,
117-
fuzzy=fuzzy).bws.values.flatten().max()
130+
self._h_fs = rdrobust.rdbwselect(
131+
y=obj_dml_data.y,
132+
x=self._score,
133+
fuzzy=fuzzy).bws.values.flatten().max()
118134
else:
119135
if not isinstance(h_fs, (float)):
120136
raise TypeError("Initial bandwidth 'h_fs' has to be a float. "
@@ -437,11 +453,13 @@ def _update_weights(self):
437453

438454
def _fit_rdd(self, h=None, b=None):
439455
if self.fuzzy:
440-
rdd_res = rdrobust(y=self._M_Y[:, self._i_rep], x=self._score,
441-
fuzzy=self._M_D[:, self._i_rep], h=h, b=b, **self.kwargs)
456+
rdd_res = rdrobust.rdrobust(
457+
y=self._M_Y[:, self._i_rep], x=self._score,
458+
fuzzy=self._M_D[:, self._i_rep], h=h, b=b, **self.kwargs)
442459
else:
443-
rdd_res = rdrobust(y=self._M_Y[:, self._i_rep], x=self._score,
444-
h=h, b=b, **self.kwargs)
460+
rdd_res = rdrobust.rdrobust(
461+
y=self._M_Y[:, self._i_rep], x=self._score,
462+
h=h, b=b, **self.kwargs)
445463
return rdd_res
446464

447465
def _set_coefs(self, rdd_res, h):

doubleml/rdd/tests/conftest.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,11 @@
77
from doubleml import DoubleMLData
88
from doubleml.rdd import RDFlex
99

10-
from rdrobust import rdrobust
11-
1210
from sklearn.dummy import DummyRegressor, DummyClassifier
1311

12+
from doubleml.rdd._utils import _is_rdrobust_available
13+
# validate optional rdrobust import
14+
rdrobust = _is_rdrobust_available()
1415

1516
DATA_SIZE = 500
1617

@@ -37,7 +38,7 @@ def _predict_dummy(data: DoubleMLData, cutoff, alpha, n_rep, p, fs_specification
3738
dml_rdflex.fit(n_iterations=1)
3839
ci_manual = dml_rdflex.confint(level=1-alpha)
3940

40-
rdrobust_model = rdrobust(
41+
rdrobust_model = rdrobust.rdrobust(
4142
y=data.y,
4243
x=data.s,
4344
c=cutoff,

doubleml/rdd/tests/test_rdd_classifier.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,6 @@
2424
dml_rdflex = RDFlex(dml_data, ml_g=LogisticRegression(), ml_m=LogisticRegression(), fuzzy=True)
2525

2626

27-
@pytest.mark.ci
27+
@pytest.mark.ci_rdd
2828
def test_rdd_classifier():
2929
dml_rdflex.fit()

doubleml/rdd/tests/test_rdd_classifier_fuzzy.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,37 +62,37 @@ def predict_nonplacebo(predict_dummy, data, cutoff, alpha, p, n_rep, fs_specific
6262
)
6363

6464

65-
@pytest.mark.ci
65+
@pytest.mark.ci_rdd
6666
def test_rdd_placebo_coef(predict_placebo):
6767
reference, actual = predict_placebo
6868
assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
6969

7070

71-
@pytest.mark.ci
71+
@pytest.mark.ci_rdd
7272
def test_rdd_nonplacebo_coef(predict_nonplacebo):
7373
reference, actual = predict_nonplacebo
7474
assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
7575

7676

77-
@pytest.mark.ci
77+
@pytest.mark.ci_rdd
7878
def test_rdd_placebo_se(predict_placebo):
7979
reference, actual = predict_placebo
8080
assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
8181

8282

83-
@pytest.mark.ci
83+
@pytest.mark.ci_rdd
8484
def test_rdd_nonplacebo_se(predict_nonplacebo):
8585
reference, actual = predict_nonplacebo
8686
assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
8787

8888

89-
@pytest.mark.ci
89+
@pytest.mark.ci_rdd
9090
def test_rdd_placebo_ci(predict_placebo):
9191
reference, actual = predict_placebo
9292
assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)
9393

9494

95-
@pytest.mark.ci
95+
@pytest.mark.ci_rdd
9696
def test_rdd_nonplacebo_ci(predict_nonplacebo):
9797
reference, actual = predict_nonplacebo
9898
assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)

doubleml/rdd/tests/test_rdd_classifier_fuzzy_left.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,37 +62,37 @@ def predict_nonplacebo(predict_dummy, data, cutoff, alpha, p, n_rep, fs_specific
6262
)
6363

6464

65-
@pytest.mark.ci
65+
@pytest.mark.ci_rdd
6666
def test_rdd_placebo_coef(predict_placebo):
6767
reference, actual = predict_placebo
6868
assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
6969

7070

71-
@pytest.mark.ci
71+
@pytest.mark.ci_rdd
7272
def test_rdd_nonplacebo_coef(predict_nonplacebo):
7373
reference, actual = predict_nonplacebo
7474
assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
7575

7676

77-
@pytest.mark.ci
77+
@pytest.mark.ci_rdd
7878
def test_rdd_placebo_se(predict_placebo):
7979
reference, actual = predict_placebo
8080
assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
8181

8282

83-
@pytest.mark.ci
83+
@pytest.mark.ci_rdd
8484
def test_rdd_nonplacebo_se(predict_nonplacebo):
8585
reference, actual = predict_nonplacebo
8686
assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
8787

8888

89-
@pytest.mark.ci
89+
@pytest.mark.ci_rdd
9090
def test_rdd_placebo_ci(predict_placebo):
9191
reference, actual = predict_placebo
9292
assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)
9393

9494

95-
@pytest.mark.ci
95+
@pytest.mark.ci_rdd
9696
def test_rdd_nonplacebo_ci(predict_nonplacebo):
9797
reference, actual = predict_nonplacebo
9898
assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)

0 commit comments

Comments
 (0)