DoubleML
diff --git a/‎.github/workflows/deploy_pkg.yml‎
Lines changed: 3 additions & 4 deletions b/‎.github/workflows/deploy_pkg.yml‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎.github/workflows/pytest.yml‎
Lines changed: 2 additions & 3 deletions b/‎.github/workflows/pytest.yml‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 4 additions & 10 deletions b/‎CONTRIBUTING.md‎
Lines changed: 4 additions & 10 deletions
diff --git a/‎doubleml/rdd/_utils.py‎
Lines changed: 12 additions & 0 deletions b/‎doubleml/rdd/_utils.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎doubleml/rdd/datasets/simple_dgp.py‎
Lines changed: 20 additions & 15 deletions b/‎doubleml/rdd/datasets/simple_dgp.py‎
Lines changed: 20 additions & 15 deletions
diff --git a/‎doubleml/rdd/rdd.py‎
Lines changed: 31 additions & 13 deletions b/‎doubleml/rdd/rdd.py‎
Lines changed: 31 additions & 13 deletions
diff --git a/‎doubleml/rdd/tests/conftest.py‎
Lines changed: 4 additions & 3 deletions b/‎doubleml/rdd/tests/conftest.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎doubleml/rdd/tests/test_rdd_classifier.py‎
Lines changed: 1 addition & 1 deletion b/‎doubleml/rdd/tests/test_rdd_classifier.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doubleml/rdd/tests/test_rdd_classifier_fuzzy.py‎
Lines changed: 6 additions & 6 deletions b/‎doubleml/rdd/tests/test_rdd_classifier_fuzzy.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎doubleml/rdd/tests/test_rdd_classifier_fuzzy_left.py‎
Lines changed: 6 additions & 6 deletions b/‎doubleml/rdd/tests/test_rdd_classifier_fuzzy_left.py‎
Lines changed: 6 additions & 6 deletions
@@ -24,13 +24,12 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install -r requirements.txt
-        pip install .
+        pip install --upgrade build
+        pip install -e .[dev,rdd]
 
     - name: Build package
       run: |
-        pip install wheel
-        python setup.py sdist bdist_wheel
+        python -m build
 
     - uses: actions/upload-artifact@v4
       with:
 
@@ -44,9 +44,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         python -m pip install flake8
-        pip install -r requirements.txt
-        pip install -r requirements-dev.txt
-        pip install .
+        pip install -e .[dev,rdd]
     - name: Lint with flake8
       run: |
         # stop the build if there are Python syntax errors or undefined names
@@ -59,6 +57,7 @@ jobs:
         matrix.config.python-version != '3.9'
       run: |
         pytest -m ci
+        pytest -m ci_rdd
 
     - name: Test with pytest and coverage
       if: |
 
@@ -79,27 +79,21 @@ $ git fetch upstream
 $ git merge upstream/main
 ```
 
-5. Install the **development dependencies** via
-```bash
-$ pip install -r requirements.txt
-$ pip install -r requirements-dev.txt
-```
-
-6. **Install DoubleML in editable mode** (more details can be found
+5. **Install DoubleML in editable mode** (more details can be found
 [here](https://docs.doubleml.org/stable/intro/install.html#python-building-the-package-from-source))
 via 
 ```bash
-$ pip install --editable .
+$ pip install --editable .[dev, rdd]
 ```
 
-7. **Develop** your code changes. The changes can be added and pushed via
+6. **Develop** your code changes. The changes can be added and pushed via
 ```bash
 $ git add your_new_file your_modified_file
 $ git commit -m "A commit message which briefly summarizes the changes made"
 $ git push origin my_feature_branch
 ```
 
-8. Generate a **pull request** from your fork.
+7. Generate a **pull request** from your fork.
 Please follow our guidelines for pull requests.
 When opening the PR you will be guided with a checklist.
 
 
@@ -0,0 +1,12 @@
+import importlib
+
+
+def _is_rdrobust_available():
+    try:
+        rdrobust = importlib.import_module("rdrobust")
+        return rdrobust
+    except ImportError:
+        msg = (
+            "rdrobust is not installed. "
+            "Please install it using 'pip install DoubleML[rdd]'")
+        raise ImportError(msg)
@@ -5,47 +5,52 @@
 def make_simple_rdd_data(n_obs=5000, p=4, fuzzy=True, binary_outcome=False, **kwargs):
     """
     Generates synthetic data for a regression discontinuity design (RDD) analysis.
+    The data generating process is defined as
 
     .. math::
-        Y_0 &= g_0 + g_{cov} + \\epsilon_0 \\
-        Y_1 &= g_1 + g_{cov} + \\epsilon_1 \\
-        g_0 &= 0.1 \\cdot \\text{score}^2 \\
-        g_1 &= \tau + 0.1 \\cdot \\text{score}^2 - 0.5 \\cdot \\text{score}^2 \\
-        g_{cov} &= \\sum_{i=1}^{\text{dim\\_x}} \text{Polynomial}(X_i) \\
-        \\epsilon_0, \\epsilon_1 &\\sim \\mathcal{N}(0, 0.2^2)
+        Y_0 &= g_0 + g_{cov} + \\epsilon_0,
+
+        Y_1 &= g_1 + g_{cov} + \\epsilon_1,
+
+        g_0 &= 0.1 \\cdot \\text{score}^2,
+
+        g_1 &= \\tau + 0.1 \\cdot score^2 - 0.5 \\cdot score^2 + a
+        \\sum_{i=1}^{\\text{dim}_x} X_i \\cdot score,
+
+        g_{cov} &= \\sum_{i=1}^{\\text{dim}_x} \\text{Polynomial}(X_i),
+
+    with random noise :math:`\\epsilon_0, \\epsilon_1 \\sim \\mathcal{N}(0, 0.2^2)` and :math:`X_i`
+    being drawn independently from a uniform distribution.
 
     Parameters
     ----------
     n_obs : int
         Number of observations to generate. Default is 5000.
 
     p : int
-        Degree of the polynomial for covariates. Default is 4.
+        Degree of the polynomial for covariates. Default is 4. If zero, no covariate effect is considered.
 
     fuzzy : bool
         If True, generates data for a fuzzy RDD. Default is True.
 
     binary_outcome : bool
-        If True, generates binary outcomes. Default is False.
+        If True, generates binary outcomes based on a logistic transformation. Default is False.
 
     **kwargs : Additional keyword arguments.
         cutoff : float
             The cutoff value for the score. Default is 0.0.
         dim_x : int
             The number of independent covariates. Default is 3.
         a : float
-            Factor to control interaction of score and covariates to the outcome equation. Default is 0.0.
+            Factor to control interaction of score and covariates in the outcome equation. Default is 0.0.
         tau : float
             Parameter to control the true effect in the generated data at the given cutoff. Default is 1.0.
 
     Returns
     -------
-    dict: A dictionary containing the generated data with keys:
-        'score' (np.ndarray): The running variable.
-        'X' (np.ndarray): The independent covariates.
-        'Y0' (np.ndarray): The potential outcomes without treatment.
-        'Y1' (np.ndarray): The potential outcomes with treatment.
-        'intended_treatment' (np.ndarray): The intended treatment assignment.
+    res_dict : dictionary
+        Dictionary with entries ``score``, ``X``, ``Y``, ``D``, and ``oracle_values``.
+        The oracle values contain the potential outcomes.
     """
 
     cutoff = kwargs.get('cutoff', 0.0)
 
@@ -4,7 +4,6 @@
 from collections.abc import Callable
 
 from scipy.stats import norm
-from rdrobust import rdrobust, rdbwselect
 
 from sklearn.base import clone
 from sklearn.utils.multiclass import type_of_target
@@ -13,6 +12,10 @@
 from doubleml.double_ml import DoubleML
 from doubleml.utils.resampling import DoubleMLResampling
 from doubleml.utils._checks import _check_resampling_specification, _check_supports_sample_weights
+from doubleml.rdd._utils import _is_rdrobust_available
+
+# validate optional rdrobust import
+rdrobust = _is_rdrobust_available()
 
 
 class RDFlex():
@@ -30,7 +33,7 @@ class RDFlex():
         defined as :math:`\\eta_0(X) = (g_0^{+}(X) + g_0^{-}(X))/2`.
 
     ml_m : classifier implementing ``fit()`` and ``predict_proba()`` or None
-        A machine learner implementing ``fit()`` and ``predict_proba()`` methods and support ``sample_weights``(e.g.
+        A machine learner implementing ``fit()`` and ``predict_proba()`` methods and support ``sample_weights`` (e.g.
         :py:class:`sklearn.ensemble.RandomForestClassifier`) for the nuisance functions
         :math:`m_0^{\\pm}(X) = E[D|\\text{score}=\\text{cutoff}^{\\pm}, X]`. The adjustment function is then
         defined as :math:`\\eta_0(X) = (m_0^{+}(X) + m_0^{-}(X))/2`.
@@ -66,17 +69,29 @@ class RDFlex():
         Default is ``cutoff``.
 
     fs_kernel : str
-        Kernel for the first stage estimation. ``uniform``, ``triangular`` and ``epanechnikov``are supported.
+        Kernel for the first stage estimation. ``uniform``, ``triangular`` and ``epanechnikov`` are supported.
         Default is ``triangular``.
 
     **kwargs : kwargs
         Key-worded arguments that are not used within RDFlex but directly handed to rdrobust.
 
     Examples
     --------
-
-    Notes
-    -----
+    >>> import numpy as np
+    >>> import doubleml as dml
+    >>> from doubleml.rdd.datasets import make_simple_rdd_data
+    >>> from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
+    >>> np.random.seed(123)
+    >>> data_dict = make_simple_rdd_data(fuzzy=True)
+    >>> obj_dml_data = dml.DoubleMLData.from_arrays(x=data_dict["X"], y=data_dict["Y"], d=data_dict["D"], s=data_dict["score"])
+    >>> ml_g = RandomForestRegressor()
+    >>> ml_m = RandomForestClassifier()
+    >>> rdflex_obj = dml.rdd.RDFlex(obj_dml_data, ml_g, ml_m, fuzzy=True)
+    >>> print(rdflex_obj.fit())
+    Method             Coef.     S.E.     t-stat       P>|t|           95% CI
+    -------------------------------------------------------------------------
+    Conventional      0.935     0.220     4.244    2.196e-05  [0.503, 1.367]
+    Robust                 -        -     3.635    2.785e-04  [0.418, 1.396]
 
     """
 
@@ -112,9 +127,10 @@ def __init__(self,
 
         if h_fs is None:
             fuzzy = self._dml_data.d if self._fuzzy else None
-            self._h_fs = rdbwselect(y=obj_dml_data.y,
-                                    x=self._score,
-                                    fuzzy=fuzzy).bws.values.flatten().max()
+            self._h_fs = rdrobust.rdbwselect(
+                y=obj_dml_data.y,
+                x=self._score,
+                fuzzy=fuzzy).bws.values.flatten().max()
         else:
             if not isinstance(h_fs, (float)):
                 raise TypeError("Initial bandwidth 'h_fs' has to be a float. "
@@ -437,11 +453,13 @@ def _update_weights(self):
 
     def _fit_rdd(self, h=None, b=None):
         if self.fuzzy:
-            rdd_res = rdrobust(y=self._M_Y[:, self._i_rep], x=self._score,
-                               fuzzy=self._M_D[:, self._i_rep], h=h, b=b, **self.kwargs)
+            rdd_res = rdrobust.rdrobust(
+                y=self._M_Y[:, self._i_rep], x=self._score,
+                fuzzy=self._M_D[:, self._i_rep], h=h, b=b, **self.kwargs)
         else:
-            rdd_res = rdrobust(y=self._M_Y[:, self._i_rep], x=self._score,
-                               h=h, b=b, **self.kwargs)
+            rdd_res = rdrobust.rdrobust(
+                y=self._M_Y[:, self._i_rep], x=self._score,
+                h=h, b=b, **self.kwargs)
         return rdd_res
 
     def _set_coefs(self, rdd_res, h):
 
@@ -7,10 +7,11 @@
 from doubleml import DoubleMLData
 from doubleml.rdd import RDFlex
 
-from rdrobust import rdrobust
-
 from sklearn.dummy import DummyRegressor, DummyClassifier
 
+from doubleml.rdd._utils import _is_rdrobust_available
+# validate optional rdrobust import
+rdrobust = _is_rdrobust_available()
 
 DATA_SIZE = 500
 
@@ -37,7 +38,7 @@ def _predict_dummy(data: DoubleMLData, cutoff, alpha, n_rep, p, fs_specification
         dml_rdflex.fit(n_iterations=1)
         ci_manual = dml_rdflex.confint(level=1-alpha)
 
-        rdrobust_model = rdrobust(
+        rdrobust_model = rdrobust.rdrobust(
             y=data.y,
             x=data.s,
             c=cutoff,
 
@@ -24,6 +24,6 @@
 dml_rdflex = RDFlex(dml_data, ml_g=LogisticRegression(), ml_m=LogisticRegression(), fuzzy=True)
 
 
-@pytest.mark.ci
+@pytest.mark.ci_rdd
 def test_rdd_classifier():
     dml_rdflex.fit()
@@ -62,37 +62,37 @@ def predict_nonplacebo(predict_dummy, data, cutoff, alpha, p, n_rep, fs_specific
     )
 
 
-@pytest.mark.ci
+@pytest.mark.ci_rdd
 def test_rdd_placebo_coef(predict_placebo):
     reference, actual = predict_placebo
     assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
 
 
-@pytest.mark.ci
+@pytest.mark.ci_rdd
 def test_rdd_nonplacebo_coef(predict_nonplacebo):
     reference, actual = predict_nonplacebo
     assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
 
 
-@pytest.mark.ci
+@pytest.mark.ci_rdd
 def test_rdd_placebo_se(predict_placebo):
     reference, actual = predict_placebo
     assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
 
 
-@pytest.mark.ci
+@pytest.mark.ci_rdd
 def test_rdd_nonplacebo_se(predict_nonplacebo):
     reference, actual = predict_nonplacebo
     assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
 
 
-@pytest.mark.ci
+@pytest.mark.ci_rdd
 def test_rdd_placebo_ci(predict_placebo):
     reference, actual = predict_placebo
     assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)
 
 
-@pytest.mark.ci
+@pytest.mark.ci_rdd
 def test_rdd_nonplacebo_ci(predict_nonplacebo):
     reference, actual = predict_nonplacebo
     assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)
@@ -62,37 +62,37 @@ def predict_nonplacebo(predict_dummy, data, cutoff, alpha, p, n_rep, fs_specific
     )
 
 
-@pytest.mark.ci
+@pytest.mark.ci_rdd
 def test_rdd_placebo_coef(predict_placebo):
     reference, actual = predict_placebo
     assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
 
 
-@pytest.mark.ci
+@pytest.mark.ci_rdd
 def test_rdd_nonplacebo_coef(predict_nonplacebo):
     reference, actual = predict_nonplacebo
     assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
 
 
-@pytest.mark.ci
+@pytest.mark.ci_rdd
 def test_rdd_placebo_se(predict_placebo):
     reference, actual = predict_placebo
     assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
 
 
-@pytest.mark.ci
+@pytest.mark.ci_rdd
 def test_rdd_nonplacebo_se(predict_nonplacebo):
     reference, actual = predict_nonplacebo
     assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
 
 
-@pytest.mark.ci
+@pytest.mark.ci_rdd
 def test_rdd_placebo_ci(predict_placebo):
     reference, actual = predict_placebo
     assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)
 
 
-@pytest.mark.ci
+@pytest.mark.ci_rdd
 def test_rdd_nonplacebo_ci(predict_nonplacebo):
     reference, actual = predict_nonplacebo
     assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)