DoubleML · OliverSchacht · Jan 7, 2025 · Jan 6, 2025 · Jan 6, 2025 · Jan 6, 2025
diff --git a/doubleml/rdd/datasets/simple_dgp.py b/doubleml/rdd/datasets/simple_dgp.py
@@ -5,47 +5,52 @@
 def make_simple_rdd_data(n_obs=5000, p=4, fuzzy=True, binary_outcome=False, **kwargs):
     """
     Generates synthetic data for a regression discontinuity design (RDD) analysis.
+    The data generating process is defined as
 
     .. math::
-        Y_0 &= g_0 + g_{cov} + \\epsilon_0 \\
-        Y_1 &= g_1 + g_{cov} + \\epsilon_1 \\
-        g_0 &= 0.1 \\cdot \\text{score}^2 \\
-        g_1 &= \tau + 0.1 \\cdot \\text{score}^2 - 0.5 \\cdot \\text{score}^2 \\
-        g_{cov} &= \\sum_{i=1}^{\text{dim\\_x}} \text{Polynomial}(X_i) \\
-        \\epsilon_0, \\epsilon_1 &\\sim \\mathcal{N}(0, 0.2^2)
+        Y_0 &= g_0 + g_{cov} + \\epsilon_0,
+
+        Y_1 &= g_1 + g_{cov} + \\epsilon_1,
+
+        g_0 &= 0.1 \\cdot \\text{score}^2,
+
+        g_1 &= \\tau + 0.1 \\cdot score^2 - 0.5 \\cdot score^2 + a
+        \\sum_{i=1}^{\\text{dim}_x} X_i \\cdot score,
+
+        g_{cov} &= \\sum_{i=1}^{\\text{dim}_x} \\text{Polynomial}(X_i),
+
+    with random noise :math:`\\epsilon_0, \\epsilon_1 \\sim \\mathcal{N}(0, 0.2^2)` and :math:`X_i`
+    being drawn independently from a uniform distribution.
 
     Parameters
     ----------
     n_obs : int
         Number of observations to generate. Default is 5000.
 
     p : int
-        Degree of the polynomial for covariates. Default is 4.
+        Degree of the polynomial for covariates. Default is 4. If zero, no covariate effect is considered.
 
     fuzzy : bool
         If True, generates data for a fuzzy RDD. Default is True.
 
     binary_outcome : bool
-        If True, generates binary outcomes. Default is False.
+        If True, generates binary outcomes based on a logistic transformation. Default is False.
 
     **kwargs : Additional keyword arguments.
         cutoff : float
             The cutoff value for the score. Default is 0.0.
         dim_x : int
             The number of independent covariates. Default is 3.
         a : float
-            Factor to control interaction of score and covariates to the outcome equation. Default is 0.0.
+            Factor to control interaction of score and covariates in the outcome equation. Default is 0.0.
         tau : float
             Parameter to control the true effect in the generated data at the given cutoff. Default is 1.0.
 
     Returns
     -------
-    dict: A dictionary containing the generated data with keys:
-        'score' (np.ndarray): The running variable.
-        'X' (np.ndarray): The independent covariates.
-        'Y0' (np.ndarray): The potential outcomes without treatment.
-        'Y1' (np.ndarray): The potential outcomes with treatment.
-        'intended_treatment' (np.ndarray): The intended treatment assignment.
+    res_dict : dictionary
+        Dictionary with entries ``score``, ``X``, ``Y``, ``D``, and ``oracle_values``.
+        The oracle values contain the potential outcomes.
     """
 
     cutoff = kwargs.get('cutoff', 0.0)

diff --git a/doubleml/rdd/rdd.py b/doubleml/rdd/rdd.py
@@ -30,7 +30,7 @@ class RDFlex():
         defined as :math:`\\eta_0(X) = (g_0^{+}(X) + g_0^{-}(X))/2`.
 
     ml_m : classifier implementing ``fit()`` and ``predict_proba()`` or None
-        A machine learner implementing ``fit()`` and ``predict_proba()`` methods and support ``sample_weights``(e.g.
+        A machine learner implementing ``fit()`` and ``predict_proba()`` methods and support ``sample_weights`` (e.g.
         :py:class:`sklearn.ensemble.RandomForestClassifier`) for the nuisance functions
         :math:`m_0^{\\pm}(X) = E[D|\\text{score}=\\text{cutoff}^{\\pm}, X]`. The adjustment function is then
         defined as :math:`\\eta_0(X) = (m_0^{+}(X) + m_0^{-}(X))/2`.
@@ -66,17 +66,29 @@ class RDFlex():
         Default is ``cutoff``.
 
     fs_kernel : str
-        Kernel for the first stage estimation. ``uniform``, ``triangular`` and ``epanechnikov``are supported.
+        Kernel for the first stage estimation. ``uniform``, ``triangular`` and ``epanechnikov`` are supported.
         Default is ``triangular``.
 
     **kwargs : kwargs
         Key-worded arguments that are not used within RDFlex but directly handed to rdrobust.
 
     Examples
     --------
-
-    Notes
-    -----
+    >>> import numpy as np
+    >>> import doubleml as dml
+    >>> from doubleml.rdd.datasets import make_simple_rdd_data
+    >>> from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
+    >>> np.random.seed(123)
+    >>> data_dict = make_simple_rdd_data(fuzzy=True)
+    >>> obj_dml_data = dml.DoubleMLData.from_arrays(x=data_dict["X"], y=data_dict["Y"], d=data_dict["D"], s=data_dict["score"])
+    >>> ml_g = RandomForestRegressor()
+    >>> ml_m = RandomForestClassifier()
+    >>> rdflex_obj = dml.rdd.RDFlex(obj_dml_data, ml_g, ml_m, fuzzy=True)
+    >>> print(rdflex_obj.fit())
+    Method             Coef.     S.E.     t-stat       P>|t|           95% CI
+    -------------------------------------------------------------------------
+    Conventional      0.935     0.220     4.244    2.196e-05  [0.503, 1.367]
+    Robust                 -        -     3.635    2.785e-04  [0.418, 1.396]
 
     """
 

diff --git a/setup.py b/setup.py
@@ -29,6 +29,8 @@
         'scikit-learn>=1.4.0,<1.6.0',
         'statsmodels',
         'plotly',
+        'matplotlib',
+        'rdrobust',
     ],
     python_requires=">=3.9",
     classifiers=[