diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
index baa6d6250..3e5321ea7 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -23,12 +23,10 @@ body:
     attributes:
       label: Minimum reproducible code snippet
       description: |
-        Please provide a short reproducible code snippet. Example:
-
-        ```python
+        Please provide a short reproducible code snippet. Example:        ```python
         import numpy as np
         import doubleml as dml
-        from doubleml.datasets import make_plr_CCDDHNR2018
+        from doubleml.plm.datasets import make_plr_CCDDHNR2018
         from sklearn.ensemble import RandomForestRegressor
         from sklearn.base import clone
         np.random.seed(3141)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 4809c62a6..a614dd73b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -15,7 +15,7 @@ To submit a **bug report**, you can use our
 ```python
 import numpy as np
 import doubleml as dml
-from doubleml.datasets import make_plr_CCDDHNR2018
+from doubleml.plm.datasets import make_plr_CCDDHNR2018
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.base import clone
 np.random.seed(3141)
diff --git a/doubleml/data/base_data.py b/doubleml/data/base_data.py
index 318508e98..7a1142207 100644
--- a/doubleml/data/base_data.py
+++ b/doubleml/data/base_data.py
@@ -135,9 +135,8 @@ class DoubleMLData(DoubleMLBaseData):
         Default is ``True``.
 
     Examples
-    --------
-    >>> from doubleml import DoubleMLData
-    >>> from doubleml.datasets import make_plr_CCDDHNR2018
+    --------    >>> from doubleml import DoubleMLData
+    >>> from doubleml.plm.datasets import make_plr_CCDDHNR2018
     >>> # initialization from pandas.DataFrame
     >>> df = make_plr_CCDDHNR2018(return_type='DataFrame')
     >>> obj_dml_data_from_df = DoubleMLData(df, 'y', 'd')
@@ -266,9 +265,8 @@ def from_arrays(
             Default is ``True``.
 
         Examples
-        --------
-        >>> from doubleml import DoubleMLData
-        >>> from doubleml.datasets import make_plr_CCDDHNR2018
+        --------        >>> from doubleml import DoubleMLData
+        >>> from doubleml.plm.datasets import make_plr_CCDDHNR2018
         >>> (x, y, d) = make_plr_CCDDHNR2018(return_type='array')
         >>> obj_dml_data_from_array = DoubleMLData.from_arrays(x, y, d)
         """
diff --git a/doubleml/data/cluster_data.py b/doubleml/data/cluster_data.py
index 658ab0cca..89947b736 100644
--- a/doubleml/data/cluster_data.py
+++ b/doubleml/data/cluster_data.py
@@ -61,9 +61,8 @@ class DoubleMLClusterData(DoubleMLData):
         Default is ``True``.
 
     Examples
-    --------
-    >>> from doubleml import DoubleMLClusterData
-    >>> from doubleml.datasets import make_pliv_multiway_cluster_CKMS2021
+    --------    >>> from doubleml import DoubleMLClusterData
+    >>> from doubleml.plm.datasets import make_pliv_multiway_cluster_CKMS2021
     >>> # initialization from pandas.DataFrame
     >>> df = make_pliv_multiway_cluster_CKMS2021(return_type='DataFrame')
     >>> obj_dml_data_from_df = DoubleMLClusterData(df, 'Y', 'D', ['cluster_var_i', 'cluster_var_j'], z_cols='Z')
@@ -172,9 +171,8 @@ def from_arrays(
             Default is ``True``.
 
         Examples
-        --------
-        >>> from doubleml import DoubleMLClusterData
-        >>> from doubleml.datasets import make_pliv_multiway_cluster_CKMS2021
+        --------        >>> from doubleml import DoubleMLClusterData
+        >>> from doubleml.plm.datasets import make_pliv_multiway_cluster_CKMS2021
         >>> (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(return_type='array')
         >>> obj_dml_data_from_array = DoubleMLClusterData.from_arrays(x, y, d, cluster_vars, z)
         """
diff --git a/doubleml/data/tests/conftest.py b/doubleml/data/tests/conftest.py
index 6960b58a7..fcefabce5 100644
--- a/doubleml/data/tests/conftest.py
+++ b/doubleml/data/tests/conftest.py
@@ -2,7 +2,8 @@
 import pandas as pd
 import pytest
 
-from doubleml.datasets import make_irm_data, make_plr_turrell2018
+from doubleml.irm.datasets import make_irm_data
+from doubleml.plm.datasets import make_plr_turrell2018
 
 
 @pytest.fixture(scope="session", params=[(500, 10), (1000, 20), (1000, 100)])
diff --git a/doubleml/data/tests/test_cluster_data.py b/doubleml/data/tests/test_cluster_data.py
index e95dfa033..b02a32752 100644
--- a/doubleml/data/tests/test_cluster_data.py
+++ b/doubleml/data/tests/test_cluster_data.py
@@ -3,7 +3,7 @@
 import pytest
 
 from doubleml import DoubleMLClusterData
-from doubleml.datasets import make_pliv_multiway_cluster_CKMS2021, make_plr_CCDDHNR2018
+from doubleml.plm.datasets import make_pliv_multiway_cluster_CKMS2021, make_plr_CCDDHNR2018
 
 
 @pytest.mark.ci
diff --git a/doubleml/data/tests/test_dml_data.py b/doubleml/data/tests/test_dml_data.py
index 7cf394b54..a2ada74bb 100644
--- a/doubleml/data/tests/test_dml_data.py
+++ b/doubleml/data/tests/test_dml_data.py
@@ -5,12 +5,12 @@
 
 from doubleml import DoubleMLData, DoubleMLDIDCS, DoubleMLPLR, DoubleMLSSM
 from doubleml.data.base_data import DoubleMLBaseData
-from doubleml.datasets import (
+from doubleml.plm.datasets import (
     _make_pliv_data,
     make_pliv_CHS2015,
     make_plr_CCDDHNR2018,
-    make_ssm_data,
 )
+from doubleml.irm.datasets import make_ssm_data
 from doubleml.did.datasets import make_did_SZ2020
 
 
diff --git a/doubleml/datasets.py b/doubleml/datasets.py
deleted file mode 100644
index 0dcd33c7c..000000000
--- a/doubleml/datasets.py
+++ /dev/null
@@ -1,1620 +0,0 @@
-import warnings
-
-import numpy as np
-import pandas as pd
-from scipy.linalg import toeplitz
-from scipy.optimize import minimize_scalar
-from sklearn.datasets import make_spd_matrix
-from sklearn.preprocessing import OneHotEncoder, PolynomialFeatures
-
-from doubleml.data import DoubleMLClusterData, DoubleMLData
-from doubleml.utils._aliases import _get_array_alias, _get_data_frame_alias, _get_dml_cluster_data_alias, _get_dml_data_alias
-
-_array_alias = _get_array_alias()
-_data_frame_alias = _get_data_frame_alias()
-_dml_data_alias = _get_dml_data_alias()
-_dml_cluster_data_alias = _get_dml_cluster_data_alias()
-
-
-def fetch_401K(return_type="DoubleMLData", polynomial_features=False):
-    """
-    Data set on financial wealth and 401(k) plan participation.
-
-    Parameters
-    ----------
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-    polynomial_features :
-        If ``True`` polynomial features are added (see replication files of Chernozhukov et al. (2018)).
-
-    References
-    ----------
-    Abadie, A. (2003), Semiparametric instrumental variable estimation of treatment response models. Journal of
-    Econometrics, 113(2): 231-263.
-
-    Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W. and Robins, J. (2018),
-    Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
-    doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
-    """
-    url = "https://github.com/VC2015/DMLonGitHub/raw/master/sipp1991.dta"
-    raw_data = pd.read_stata(url)
-
-    y_col = "net_tfa"
-    d_cols = ["e401"]
-    x_cols = ["age", "inc", "educ", "fsize", "marr", "twoearn", "db", "pira", "hown"]
-
-    data = raw_data.copy()
-
-    if polynomial_features:
-        raise NotImplementedError("polynomial_features os not implemented yet for fetch_401K.")
-
-    if return_type in _data_frame_alias + _dml_data_alias:
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, y_col, d_cols, x_cols)
-    else:
-        raise ValueError("Invalid return_type.")
-
-
-def fetch_bonus(return_type="DoubleMLData", polynomial_features=False):
-    """
-    Data set on the Pennsylvania Reemployment Bonus experiment.
-
-    Parameters
-    ----------
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-    polynomial_features :
-        If ``True`` polynomial features are added (see replication files of Chernozhukov et al. (2018)).
-
-    References
-    ----------
-    Bilias Y. (2000), Sequential Testing of Duration Data: The Case of Pennsylvania 'Reemployment Bonus' Experiment.
-    Journal of Applied Econometrics, 15(6): 575-594.
-
-    Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W. and Robins, J. (2018),
-    Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
-    doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
-    """
-    url = "https://raw.githubusercontent.com/VC2015/DMLonGitHub/master/penn_jae.dat"
-    raw_data = pd.read_csv(url, sep=r"\s+")
-
-    ind = (raw_data["tg"] == 0) | (raw_data["tg"] == 4)
-    data = raw_data.copy()[ind]
-    data.reset_index(inplace=True)
-    data["tg"] = data["tg"].replace(4, 1)
-    data["inuidur1"] = np.log(data["inuidur1"])
-
-    # variable dep as factor (dummy encoding)
-    dummy_enc = OneHotEncoder(drop="first", categories="auto").fit(data.loc[:, ["dep"]])
-    xx = dummy_enc.transform(data.loc[:, ["dep"]]).toarray()
-    data["dep1"] = xx[:, 0]
-    data["dep2"] = xx[:, 1]
-
-    y_col = "inuidur1"
-    d_cols = ["tg"]
-    x_cols = [
-        "female",
-        "black",
-        "othrace",
-        "dep1",
-        "dep2",
-        "q2",
-        "q3",
-        "q4",
-        "q5",
-        "q6",
-        "agelt35",
-        "agegt54",
-        "durable",
-        "lusd",
-        "husd",
-    ]
-
-    if polynomial_features:
-        poly = PolynomialFeatures(2, include_bias=False)
-        data_transf = poly.fit_transform(data[x_cols])
-        x_cols = list(poly.get_feature_names_out(x_cols))
-
-        data_transf = pd.DataFrame(data_transf, columns=x_cols)
-        data = pd.concat((data[[y_col] + d_cols], data_transf), axis=1, sort=False)
-
-    if return_type in _data_frame_alias + _dml_data_alias:
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, y_col, d_cols, x_cols)
-    else:
-        raise ValueError("Invalid return_type.")
-
-
-def _g(x):
-    return np.power(np.sin(x), 2)
-
-
-def _m(x, nu=0.0, gamma=1.0):
-    return 0.5 / np.pi * (np.sinh(gamma)) / (np.cosh(gamma) - np.cos(x - nu))
-
-
-def make_plr_CCDDHNR2018(n_obs=500, dim_x=20, alpha=0.5, return_type="DoubleMLData", **kwargs):
-    """
-    Generates data from a partially linear regression model used in Chernozhukov et al. (2018) for Figure 1.
-    The data generating process is defined as
-
-    .. math::
-
-        d_i &= m_0(x_i) + s_1 v_i, & &v_i \\sim \\mathcal{N}(0,1),
-
-        y_i &= \\alpha d_i + g_0(x_i) + s_2 \\zeta_i, & &\\zeta_i \\sim \\mathcal{N}(0,1),
-
-
-    with covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a matrix with entries
-    :math:`\\Sigma_{kj} = 0.7^{|j-k|}`.
-    The nuisance functions are given by
-
-    .. math::
-
-        m_0(x_i) &= a_0 x_{i,1} + a_1 \\frac{\\exp(x_{i,3})}{1+\\exp(x_{i,3})},
-
-        g_0(x_i) &= b_0 \\frac{\\exp(x_{i,1})}{1+\\exp(x_{i,1})} + b_1 x_{i,3}.
-
-    Parameters
-    ----------
-    n_obs :
-        The number of observations to simulate.
-    dim_x :
-        The number of covariates.
-    alpha :
-        The value of the causal parameter.
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d)``.
-    **kwargs
-        Additional keyword arguments to set non-default values for the parameters
-        :math:`a_0=1`, :math:`a_1=0.25`, :math:`s_1=1`, :math:`b_0=1`, :math:`b_1=0.25` or :math:`s_2=1`.
-
-    References
-    ----------
-    Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W. and Robins, J. (2018),
-    Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
-    doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
-    """
-    a_0 = kwargs.get("a_0", 1.0)
-    a_1 = kwargs.get("a_1", 0.25)
-    s_1 = kwargs.get("s_1", 1.0)
-
-    b_0 = kwargs.get("b_0", 1.0)
-    b_1 = kwargs.get("b_1", 0.25)
-    s_2 = kwargs.get("s_2", 1.0)
-
-    cov_mat = toeplitz([np.power(0.7, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(
-        np.zeros(dim_x),
-        cov_mat,
-        size=[
-            n_obs,
-        ],
-    )
-
-    d = (
-        a_0 * x[:, 0]
-        + a_1 * np.divide(np.exp(x[:, 2]), 1 + np.exp(x[:, 2]))
-        + s_1
-        * np.random.standard_normal(
-            size=[
-                n_obs,
-            ]
-        )
-    )
-    y = (
-        alpha * d
-        + b_0 * np.divide(np.exp(x[:, 0]), 1 + np.exp(x[:, 0]))
-        + b_1 * x[:, 2]
-        + s_2
-        * np.random.standard_normal(
-            size=[
-                n_obs,
-            ]
-        )
-    )
-
-    if return_type in _array_alias:
-        return x, y, d
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + ["y", "d"])
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, "y", "d", x_cols)
-    else:
-        raise ValueError("Invalid return_type.")
-
-
-def make_plr_turrell2018(n_obs=100, dim_x=20, theta=0.5, return_type="DoubleMLData", **kwargs):
-    """
-    Generates data from a partially linear regression model used in a blog article by Turrell (2018).
-    The data generating process is defined as
-
-    .. math::
-
-        d_i &= m_0(x_i' b) + v_i, & &v_i \\sim \\mathcal{N}(0,1),
-
-        y_i &= \\theta d_i + g_0(x_i' b) + u_i, & &u_i \\sim \\mathcal{N}(0,1),
-
-
-    with covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a random symmetric,
-    positive-definite matrix generated with :py:meth:`sklearn.datasets.make_spd_matrix`.
-    :math:`b` is a vector with entries :math:`b_j=\\frac{1}{j}` and the nuisance functions are given by
-
-    .. math::
-
-        m_0(x_i) &= \\frac{1}{2 \\pi} \\frac{\\sinh(\\gamma)}{\\cosh(\\gamma) - \\cos(x_i-\\nu)},
-
-        g_0(x_i) &= \\sin(x_i)^2.
-
-    Parameters
-    ----------
-    n_obs :
-        The number of observations to simulate.
-    dim_x :
-        The number of covariates.
-    theta :
-        The value of the causal parameter.
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d)``.
-    **kwargs
-        Additional keyword arguments to set non-default values for the parameters
-        :math:`\\nu=0`, or :math:`\\gamma=1`.
-
-    References
-    ----------
-    Turrell, A. (2018), Econometrics in Python part I - Double machine learning, Markov Wanderer: A blog on economics,
-    science, coding and data. `https://aeturrell.com/blog/posts/econometrics-in-python-parti-ml/
-    <https://aeturrell.com/blog/posts/econometrics-in-python-parti-ml/>`_.
-    """
-    nu = kwargs.get("nu", 0.0)
-    gamma = kwargs.get("gamma", 1.0)
-
-    b = [1 / k for k in range(1, dim_x + 1)]
-    sigma = make_spd_matrix(dim_x)
-
-    x = np.random.multivariate_normal(
-        np.zeros(dim_x),
-        sigma,
-        size=[
-            n_obs,
-        ],
-    )
-    G = _g(np.dot(x, b))
-    M = _m(np.dot(x, b), nu=nu, gamma=gamma)
-    d = M + np.random.standard_normal(
-        size=[
-            n_obs,
-        ]
-    )
-    y = (
-        np.dot(theta, d)
-        + G
-        + np.random.standard_normal(
-            size=[
-                n_obs,
-            ]
-        )
-    )
-
-    if return_type in _array_alias:
-        return x, y, d
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + ["y", "d"])
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, "y", "d", x_cols)
-    else:
-        raise ValueError("Invalid return_type.")
-
-
-def make_irm_data(n_obs=500, dim_x=20, theta=0, R2_d=0.5, R2_y=0.5, return_type="DoubleMLData"):
-    """
-    Generates data from a interactive regression (IRM) model.
-    The data generating process is defined as
-
-    .. math::
-
-        d_i &= 1\\left\\lbrace \\frac{\\exp(c_d x_i' \\beta)}{1+\\exp(c_d x_i' \\beta)} > v_i \\right\\rbrace, & &v_i
-        \\sim \\mathcal{U}(0,1),
-
-        y_i &= \\theta d_i + c_y x_i' \\beta d_i + \\zeta_i, & &\\zeta_i \\sim \\mathcal{N}(0,1),
-
-    with covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a matrix with entries
-    :math:`\\Sigma_{kj} = 0.5^{|j-k|}`.
-    :math:`\\beta` is a `dim_x`-vector with entries :math:`\\beta_j=\\frac{1}{j^2}` and the constants :math:`c_y` and
-    :math:`c_d` are given by
-
-    .. math::
-
-        c_y = \\sqrt{\\frac{R_y^2}{(1-R_y^2) \\beta' \\Sigma \\beta}}, \\qquad c_d =
-        \\sqrt{\\frac{(\\pi^2 /3) R_d^2}{(1-R_d^2) \\beta' \\Sigma \\beta}}.
-
-    The data generating process is inspired by a process used in the simulation experiment (see Appendix P) of Belloni
-    et al. (2017).
-
-    Parameters
-    ----------
-    n_obs :
-        The number of observations to simulate.
-    dim_x :
-        The number of covariates.
-    theta :
-        The value of the causal parameter.
-    R2_d :
-        The value of the parameter :math:`R_d^2`.
-    R2_y :
-        The value of the parameter :math:`R_y^2`.
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d)``.
-
-    References
-    ----------
-    Belloni, A., Chernozhukov, V., Fernández‐Val, I. and Hansen, C. (2017). Program Evaluation and Causal Inference With
-    High‐Dimensional Data. Econometrica, 85: 233-298.
-    """
-    # inspired by https://onlinelibrary.wiley.com/doi/abs/10.3982/ECTA12723, see suplement
-    v = np.random.uniform(
-        size=[
-            n_obs,
-        ]
-    )
-    zeta = np.random.standard_normal(
-        size=[
-            n_obs,
-        ]
-    )
-
-    cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(
-        np.zeros(dim_x),
-        cov_mat,
-        size=[
-            n_obs,
-        ],
-    )
-
-    beta = [1 / (k**2) for k in range(1, dim_x + 1)]
-    b_sigma_b = np.dot(np.dot(cov_mat, beta), beta)
-    c_y = np.sqrt(R2_y / ((1 - R2_y) * b_sigma_b))
-    c_d = np.sqrt(np.pi**2 / 3.0 * R2_d / ((1 - R2_d) * b_sigma_b))
-
-    xx = np.exp(np.dot(x, np.multiply(beta, c_d)))
-    d = 1.0 * ((xx / (1 + xx)) > v)
-
-    y = d * theta + d * np.dot(x, np.multiply(beta, c_y)) + zeta
-
-    if return_type in _array_alias:
-        return x, y, d
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + ["y", "d"])
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, "y", "d", x_cols)
-    else:
-        raise ValueError("Invalid return_type.")
-
-
-def make_iivm_data(n_obs=500, dim_x=20, theta=1.0, alpha_x=0.2, return_type="DoubleMLData"):
-    """
-    Generates data from a interactive IV regression (IIVM) model.
-    The data generating process is defined as
-
-    .. math::
-
-        d_i &= 1\\left\\lbrace \\alpha_x Z + v_i > 0 \\right\\rbrace,
-
-        y_i &= \\theta d_i + x_i' \\beta + u_i,
-
-    with :math:`Z \\sim \\text{Bernoulli}(0.5)` and
-
-    .. math::
-
-        \\left(\\begin{matrix} u_i \\\\ v_i \\end{matrix} \\right) \\sim
-        \\mathcal{N}\\left(0, \\left(\\begin{matrix} 1 & 0.3 \\\\ 0.3 & 1 \\end{matrix} \\right) \\right).
-
-    The covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a matrix with entries
-    :math:`\\Sigma_{kj} = 0.5^{|j-k|}` and :math:`\\beta` is a `dim_x`-vector with entries
-    :math:`\\beta_j=\\frac{1}{j^2}`.
-
-    The data generating process is inspired by a process used in the simulation experiment of Farbmacher, Gruber and
-    Klaassen (2020).
-
-    Parameters
-    ----------
-    n_obs :
-        The number of observations to simulate.
-    dim_x :
-        The number of covariates.
-    theta :
-        The value of the causal parameter.
-    alpha_x :
-        The value of the parameter :math:`\\alpha_x`.
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d, z)``.
-
-    References
-    ----------
-    Farbmacher, H., Guber, R. and Klaaßen, S. (2020). Instrument Validity Tests with Causal Forests. MEA Discussion
-    Paper No. 13-2020. Available at SSRN: http://dx.doi.org/10.2139/ssrn.3619201.
-    """
-    # inspired by https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3619201
-    xx = np.random.multivariate_normal(
-        np.zeros(2),
-        np.array([[1.0, 0.3], [0.3, 1.0]]),
-        size=[
-            n_obs,
-        ],
-    )
-    u = xx[:, 0]
-    v = xx[:, 1]
-
-    cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(
-        np.zeros(dim_x),
-        cov_mat,
-        size=[
-            n_obs,
-        ],
-    )
-
-    beta = [1 / (k**2) for k in range(1, dim_x + 1)]
-
-    z = np.random.binomial(
-        p=0.5,
-        n=1,
-        size=[
-            n_obs,
-        ],
-    )
-    d = 1.0 * (alpha_x * z + v > 0)
-
-    y = d * theta + np.dot(x, beta) + u
-
-    if return_type in _array_alias:
-        return x, y, d, z
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["y", "d", "z"])
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, "y", "d", x_cols, "z")
-    else:
-        raise ValueError("Invalid return_type.")
-
-
-def _make_pliv_data(n_obs=100, dim_x=20, theta=0.5, gamma_z=0.4, return_type="DoubleMLData"):
-    b = [1 / k for k in range(1, dim_x + 1)]
-    sigma = make_spd_matrix(dim_x)
-
-    x = np.random.multivariate_normal(
-        np.zeros(dim_x),
-        sigma,
-        size=[
-            n_obs,
-        ],
-    )
-    G = _g(np.dot(x, b))
-    # instrument
-    z = _m(np.dot(x, b)) + np.random.standard_normal(
-        size=[
-            n_obs,
-        ]
-    )
-    # treatment
-    M = _m(gamma_z * z + np.dot(x, b))
-    d = M + np.random.standard_normal(
-        size=[
-            n_obs,
-        ]
-    )
-    y = (
-        np.dot(theta, d)
-        + G
-        + np.random.standard_normal(
-            size=[
-                n_obs,
-            ]
-        )
-    )
-
-    if return_type in _array_alias:
-        return x, y, d, z
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["y", "d", "z"])
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, "y", "d", x_cols, "z")
-    else:
-        raise ValueError("Invalid return_type.")
-
-
-def make_pliv_CHS2015(n_obs, alpha=1.0, dim_x=200, dim_z=150, return_type="DoubleMLData"):
-    """
-    Generates data from a partially linear IV regression model used in Chernozhukov, Hansen and Spindler (2015).
-    The data generating process is defined as
-
-    .. math::
-
-        z_i &= \\Pi x_i + \\zeta_i,
-
-        d_i &= x_i' \\gamma + z_i' \\delta + u_i,
-
-        y_i &= \\alpha d_i + x_i' \\beta + \\varepsilon_i,
-
-    with
-
-    .. math::
-
-        \\left(\\begin{matrix} \\varepsilon_i \\\\ u_i \\\\ \\zeta_i \\\\ x_i \\end{matrix} \\right) \\sim
-        \\mathcal{N}\\left(0, \\left(\\begin{matrix} 1 & 0.6 & 0 & 0 \\\\ 0.6 & 1 & 0 & 0 \\\\
-        0 & 0 & 0.25 I_{p_n^z} & 0 \\\\ 0 & 0 & 0 & \\Sigma \\end{matrix} \\right) \\right)
-
-    where  :math:`\\Sigma` is a :math:`p_n^x \\times p_n^x` matrix with entries
-    :math:`\\Sigma_{kj} = 0.5^{|j-k|}` and :math:`I_{p_n^z}` is the :math:`p_n^z \\times p_n^z` identity matrix.
-    :math:`\\beta = \\gamma` is a :math:`p_n^x`-vector with entries :math:`\\beta_j=\\frac{1}{j^2}`,
-    :math:`\\delta` is a :math:`p_n^z`-vector with entries :math:`\\delta_j=\\frac{1}{j^2}`
-    and :math:`\\Pi = (I_{p_n^z}, 0_{p_n^z \\times (p_n^x - p_n^z)})`.
-
-    Parameters
-    ----------
-    n_obs :
-        The number of observations to simulate.
-    alpha :
-        The value of the causal parameter.
-    dim_x :
-        The number of covariates.
-    dim_z :
-        The number of instruments.
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d, z)``.
-
-    References
-    ----------
-    Chernozhukov, V., Hansen, C. and Spindler, M. (2015), Post-Selection and Post-Regularization Inference in Linear
-    Models with Many Controls and Instruments. American Economic Review: Papers and Proceedings, 105 (5): 486-90.
-    """
-    assert dim_x >= dim_z
-    # see https://assets.aeaweb.org/asset-server/articles-attachments/aer/app/10505/P2015_1022_app.pdf
-    xx = np.random.multivariate_normal(
-        np.zeros(2),
-        np.array([[1.0, 0.6], [0.6, 1.0]]),
-        size=[
-            n_obs,
-        ],
-    )
-    epsilon = xx[:, 0]
-    u = xx[:, 1]
-
-    sigma = toeplitz([np.power(0.5, k) for k in range(0, dim_x)])
-    x = np.random.multivariate_normal(
-        np.zeros(dim_x),
-        sigma,
-        size=[
-            n_obs,
-        ],
-    )
-
-    I_z = np.eye(dim_z)
-    xi = np.random.multivariate_normal(
-        np.zeros(dim_z),
-        0.25 * I_z,
-        size=[
-            n_obs,
-        ],
-    )
-
-    beta = [1 / (k**2) for k in range(1, dim_x + 1)]
-    gamma = beta
-    delta = [1 / (k**2) for k in range(1, dim_z + 1)]
-    Pi = np.hstack((I_z, np.zeros((dim_z, dim_x - dim_z))))
-
-    z = np.dot(x, np.transpose(Pi)) + xi
-    d = np.dot(x, gamma) + np.dot(z, delta) + u
-    y = alpha * d + np.dot(x, beta) + epsilon
-
-    if return_type in _array_alias:
-        return x, y, d, z
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
-        z_cols = [f"Z{i + 1}" for i in np.arange(dim_z)]
-        data = pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["y", "d"] + z_cols)
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLData(data, "y", "d", x_cols, z_cols)
-    else:
-        raise ValueError("Invalid return_type.")
-
-
-def make_pliv_multiway_cluster_CKMS2021(N=25, M=25, dim_X=100, theta=1.0, return_type="DoubleMLClusterData", **kwargs):
-    """
-    Generates data from a partially linear IV regression model with multiway cluster sample used in Chiang et al.
-    (2021). The data generating process is defined as
-
-    .. math::
-
-        Z_{ij} &= X_{ij}' \\xi_0 + V_{ij},
-
-        D_{ij} &= Z_{ij}' \\pi_{10} + X_{ij}' \\pi_{20} + v_{ij},
-
-        Y_{ij} &= D_{ij} \\theta + X_{ij}' \\zeta_0 + \\varepsilon_{ij},
-
-    with
-
-    .. math::
-
-        X_{ij} &= (1 - \\omega_1^X - \\omega_2^X) \\alpha_{ij}^X
-        + \\omega_1^X \\alpha_{i}^X + \\omega_2^X \\alpha_{j}^X,
-
-        \\varepsilon_{ij} &= (1 - \\omega_1^\\varepsilon - \\omega_2^\\varepsilon) \\alpha_{ij}^\\varepsilon
-        + \\omega_1^\\varepsilon \\alpha_{i}^\\varepsilon + \\omega_2^\\varepsilon \\alpha_{j}^\\varepsilon,
-
-        v_{ij} &= (1 - \\omega_1^v - \\omega_2^v) \\alpha_{ij}^v
-        + \\omega_1^v \\alpha_{i}^v + \\omega_2^v \\alpha_{j}^v,
-
-        V_{ij} &= (1 - \\omega_1^V - \\omega_2^V) \\alpha_{ij}^V
-        + \\omega_1^V \\alpha_{i}^V + \\omega_2^V \\alpha_{j}^V,
-
-    and :math:`\\alpha_{ij}^X, \\alpha_{i}^X, \\alpha_{j}^X \\sim \\mathcal{N}(0, \\Sigma)`
-    where  :math:`\\Sigma` is a :math:`p_x \\times p_x` matrix with entries
-    :math:`\\Sigma_{kj} = s_X^{|j-k|}`.
-    Further
-
-    .. math::
-
-        \\left(\\begin{matrix} \\alpha_{ij}^\\varepsilon \\\\ \\alpha_{ij}^v \\end{matrix}\\right),
-        \\left(\\begin{matrix} \\alpha_{i}^\\varepsilon \\\\ \\alpha_{i}^v \\end{matrix}\\right),
-        \\left(\\begin{matrix} \\alpha_{j}^\\varepsilon \\\\ \\alpha_{j}^v \\end{matrix}\\right)
-        \\sim \\mathcal{N}\\left(0, \\left(\\begin{matrix} 1 & s_{\\varepsilon v} \\\\
-        s_{\\varepsilon v} & 1 \\end{matrix} \\right) \\right)
-
-
-    and :math:`\\alpha_{ij}^V, \\alpha_{i}^V, \\alpha_{j}^V \\sim \\mathcal{N}(0, 1)`.
-
-    Parameters
-    ----------
-    N :
-        The number of observations (first dimension).
-    M :
-        The number of observations (second dimension).
-    dim_X :
-        The number of covariates.
-    theta :
-        The value of the causal parameter.
-    return_type :
-        If ``'DoubleMLClusterData'`` or ``DoubleMLClusterData``, returns a ``DoubleMLClusterData`` object where
-        ``DoubleMLClusterData.data`` is a ``pd.DataFrame``.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s
-        ``(x, y, d, cluster_vars, z)``.
-    **kwargs
-        Additional keyword arguments to set non-default values for the parameters
-        :math:`\\pi_{10}=1.0`, :math:`\\omega_X = \\omega_{\\varepsilon} = \\omega_V = \\omega_v = (0.25, 0.25)`,
-        :math:`s_X = s_{\\varepsilon v} = 0.25`,
-        or the :math:`p_x`-vectors :math:`\\zeta_0 = \\pi_{20} = \\xi_0` with default entries
-        :math:`(\\zeta_{0})_j = 0.5^j`.
-
-    References
-    ----------
-    Chiang, H. D., Kato K., Ma, Y. and Sasaki, Y. (2021), Multiway Cluster Robust Double/Debiased Machine Learning,
-    Journal of Business & Economic Statistics,
-    doi: `10.1080/07350015.2021.1895815 <https://doi.org/10.1080/07350015.2021.1895815>`_,
-    arXiv:`1909.03489 <https://arxiv.org/abs/1909.03489>`_.
-    """
-    # additional parameters specifiable via kwargs
-    pi_10 = kwargs.get("pi_10", 1.0)
-
-    xx = np.arange(1, dim_X + 1)
-    zeta_0 = kwargs.get("zeta_0", np.power(0.5, xx))
-    pi_20 = kwargs.get("pi_20", np.power(0.5, xx))
-    xi_0 = kwargs.get("xi_0", np.power(0.5, xx))
-
-    omega_X = kwargs.get("omega_X", np.array([0.25, 0.25]))
-    omega_epsilon = kwargs.get("omega_epsilon", np.array([0.25, 0.25]))
-    omega_v = kwargs.get("omega_v", np.array([0.25, 0.25]))
-    omega_V = kwargs.get("omega_V", np.array([0.25, 0.25]))
-
-    s_X = kwargs.get("s_X", 0.25)
-    s_epsilon_v = kwargs.get("s_epsilon_v", 0.25)
-
-    # use np.tile() and np.repeat() for repeating vectors in different styles, i.e.,
-    # np.tile([v1, v2, v3], 2) [v1, v2, v3, v1, v2, v3]
-    # np.repeat([v1, v2, v3], 2) [v1, v1, v2, v2, v3, v3]
-
-    alpha_V = np.random.normal(size=(N * M))
-    alpha_V_i = np.repeat(np.random.normal(size=N), M)
-    alpha_V_j = np.tile(np.random.normal(size=M), N)
-
-    cov_mat = np.array([[1, s_epsilon_v], [s_epsilon_v, 1]])
-    alpha_eps_v = np.random.multivariate_normal(
-        np.zeros(2),
-        cov_mat,
-        size=[
-            N * M,
-        ],
-    )
-    alpha_eps = alpha_eps_v[:, 0]
-    alpha_v = alpha_eps_v[:, 1]
-
-    alpha_eps_v_i = np.random.multivariate_normal(
-        np.zeros(2),
-        cov_mat,
-        size=[
-            N,
-        ],
-    )
-    alpha_eps_i = np.repeat(alpha_eps_v_i[:, 0], M)
-    alpha_v_i = np.repeat(alpha_eps_v_i[:, 1], M)
-
-    alpha_eps_v_j = np.random.multivariate_normal(
-        np.zeros(2),
-        cov_mat,
-        size=[
-            M,
-        ],
-    )
-    alpha_eps_j = np.tile(alpha_eps_v_j[:, 0], N)
-    alpha_v_j = np.tile(alpha_eps_v_j[:, 1], N)
-
-    cov_mat = toeplitz([np.power(s_X, k) for k in range(dim_X)])
-    alpha_X = np.random.multivariate_normal(
-        np.zeros(dim_X),
-        cov_mat,
-        size=[
-            N * M,
-        ],
-    )
-    alpha_X_i = np.repeat(
-        np.random.multivariate_normal(
-            np.zeros(dim_X),
-            cov_mat,
-            size=[
-                N,
-            ],
-        ),
-        M,
-        axis=0,
-    )
-    alpha_X_j = np.tile(
-        np.random.multivariate_normal(
-            np.zeros(dim_X),
-            cov_mat,
-            size=[
-                M,
-            ],
-        ),
-        (N, 1),
-    )
-
-    # generate variables
-    x = (1 - omega_X[0] - omega_X[1]) * alpha_X + omega_X[0] * alpha_X_i + omega_X[1] * alpha_X_j
-
-    eps = (
-        (1 - omega_epsilon[0] - omega_epsilon[1]) * alpha_eps + omega_epsilon[0] * alpha_eps_i + omega_epsilon[1] * alpha_eps_j
-    )
-
-    v = (1 - omega_v[0] - omega_v[1]) * alpha_v + omega_v[0] * alpha_v_i + omega_v[1] * alpha_v_j
-
-    V = (1 - omega_V[0] - omega_V[1]) * alpha_V + omega_V[0] * alpha_V_i + omega_V[1] * alpha_V_j
-
-    z = np.matmul(x, xi_0) + V
-    d = z * pi_10 + np.matmul(x, pi_20) + v
-    y = d * theta + np.matmul(x, zeta_0) + eps
-
-    cluster_cols = ["cluster_var_i", "cluster_var_j"]
-    cluster_vars = pd.MultiIndex.from_product([range(N), range(M)]).to_frame(name=cluster_cols).reset_index(drop=True)
-
-    if return_type in _array_alias:
-        return x, y, d, cluster_vars.values, z
-    elif return_type in _data_frame_alias + _dml_cluster_data_alias:
-        x_cols = [f"X{i + 1}" for i in np.arange(dim_X)]
-        data = pd.concat((cluster_vars, pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["Y", "D", "Z"])), axis=1)
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            return DoubleMLClusterData(data, "Y", "D", cluster_cols, x_cols, "Z")
-    else:
-        raise ValueError("Invalid return_type.")
-
-
-def make_confounded_irm_data(n_obs=500, theta=0.0, gamma_a=0.127, beta_a=0.58, linear=False, **kwargs):
-    """
-    Generates counfounded data from an interactive regression model.
-
-    The data generating process is defined as follows (inspired by the Monte Carlo simulation used
-    in Sant'Anna and Zhao (2020)).
-
-    Let :math:`X= (X_1, X_2, X_3, X_4, X_5)^T \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` corresponds
-    to the identity matrix.
-    Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
-    where
-
-    .. math::
-
-        \\tilde{Z}_1 &= \\exp(0.5 \\cdot X_1)
-
-        \\tilde{Z}_2 &= 10 + X_2/(1 + \\exp(X_1))
-
-        \\tilde{Z}_3 &= (0.6 + X_1 \\cdot X_3 / 25)^3
-
-        \\tilde{Z}_4 &= (20 + X_2 + X_4)^2
-
-        \\tilde{Z}_5 &= X_5.
-
-    Additionally, generate a confounder :math:`A \\sim \\mathcal{U}[-1, 1]`.
-    At first, define the propensity score as
-
-    .. math::
-
-        m(X, A) = P(D=1|X,A) = p(Z) + \\gamma_A \\cdot A
-
-    where
-
-    .. math::
-
-        p(Z) &= \\frac{\\exp(f_{ps}(Z))}{1 + \\exp(f_{ps}(Z))},
-
-        f_{ps}(Z) &= 0.75 \\cdot (-Z_1 + 0.1 \\cdot Z_2 -0.25 \\cdot Z_3 - 0.1 \\cdot Z_4).
-
-    and generate the treatment :math:`D = 1\\{m(X, A) \\ge U\\}` with :math:`U \\sim \\mathcal{U}[0, 1]`.
-    Since :math:`A` is independent of :math:`X`, the short form of the propensity score is given as
-
-    .. math::
-
-        P(D=1|X) = p(Z).
-
-    Further, generate the outcome of interest :math:`Y` as
-
-    .. math::
-
-        Y &= \\theta \\cdot D (Z_5 + 1) + g(Z) + \\beta_A \\cdot A + \\varepsilon
-
-        g(Z) &= 2.5 + 0.74 \\cdot Z_1 + 0.25 \\cdot Z_2 + 0.137 \\cdot (Z_3 + Z_4)
-
-    where :math:`\\varepsilon \\sim \\mathcal{N}(0,5)`.
-    This implies an average treatment effect of :math:`\\theta`. Additionally, the long and short forms of
-    the conditional expectation take the following forms
-
-    .. math::
-
-        \\mathbb{E}[Y|D, X, A] &= \\theta \\cdot D (Z_5 + 1) + g(Z) + \\beta_A \\cdot A
-
-        \\mathbb{E}[Y|D, X] &= (\\theta + \\beta_A \\frac{\\mathrm{Cov}(A, D(Z_5 + 1))}{\\mathrm{Var}(D(Z_5 + 1))})
-            \\cdot D (Z_5 + 1) + g(Z).
-
-    Consequently, the strength of confounding is determined via :math:`\\gamma_A` and :math:`\\beta_A`, which can be
-    set via the parameters ``gamma_a`` and ``beta_a``.
-
-    The observed data is given as :math:`W = (Y, D, Z)`.
-    Further, orcale values of the confounder :math:`A`, the transformed covariated :math:`Z`,
-    the potential outcomes of :math:`Y`, the long and short forms of the main regression and the propensity score and
-    in sample versions of the confounding parameters :math:`cf_d` and :math:`cf_y` (for ATE and ATTE)
-    are returned in a dictionary.
-
-    Parameters
-    ----------
-    n_obs : int
-        The number of observations to simulate.
-        Default is ``500``.
-    theta : float or int
-        Average treatment effect.
-        Default is ``0.0``.
-    gamma_a : float
-        Coefficient of the unobserved confounder in the propensity score.
-        Default is ``0.127``.
-    beta_a : float
-        Coefficient of the unobserved confounder in the outcome regression.
-        Default is ``0.58``.
-    linear : bool
-        If ``True``, the Z will be set to X, such that the underlying (short) models are linear/logistic.
-        Default is ``False``.
-
-    Returns
-    -------
-    res_dict : dictionary
-       Dictionary with entries ``x``, ``y``, ``d`` and ``oracle_values``.
-
-    References
-    ----------
-    Sant’Anna, P. H. and Zhao, J. (2020),
-    Doubly robust difference-in-differences estimators. Journal of Econometrics, 219(1), 101-122.
-    doi:`10.1016/j.jeconom.2020.06.003 <https://doi.org/10.1016/j.jeconom.2020.06.003>`_.
-    """
-    c = 0.0  # the confounding strength is only valid for c=0
-    xi = 0.75
-    dim_x = kwargs.get("dim_x", 5)
-    trimming_threshold = kwargs.get("trimming_threshold", 0.01)
-    var_eps_y = kwargs.get("var_eps_y", 1.0)
-
-    # Specification of main regression function
-    def f_reg(w):
-        res = 2.5 + 0.74 * w[:, 0] + 0.25 * w[:, 1] + 0.137 * (w[:, 2] + w[:, 3])
-        return res
-
-    # Specification of prop score function
-    def f_ps(w, xi):
-        res = xi * (-w[:, 0] + 0.1 * w[:, 1] - 0.25 * w[:, 2] - 0.1 * w[:, 3])
-        return res
-
-    # observed covariates
-    cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(
-        np.zeros(dim_x),
-        cov_mat,
-        size=[
-            n_obs,
-        ],
-    )
-    z_tilde_1 = np.exp(0.5 * x[:, 0])
-    z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
-    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
-    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
-    z_tilde_5 = x[:, 4]
-    z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, z_tilde_5))
-    z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
-    # error terms and unobserved confounder
-    eps_y = np.random.normal(loc=0, scale=np.sqrt(var_eps_y), size=n_obs)
-    # unobserved confounder
-    a_bounds = (-1, 1)
-    a = np.random.uniform(low=a_bounds[0], high=a_bounds[1], size=n_obs)
-    var_a = np.square(a_bounds[1] - a_bounds[0]) / 12
-
-    # Choose the features used in the models
-    if linear:
-        features_ps = x
-        features_reg = x
-    else:
-        features_ps = z
-        features_reg = z
-
-    p = np.exp(f_ps(features_ps, xi)) / (1 + np.exp(f_ps(features_ps, xi)))
-    # compute short and long form of propensity score
-    m_long = p + gamma_a * a
-    m_short = p
-    # check propensity score bounds
-    if np.any(m_long < trimming_threshold) or np.any(m_long > 1.0 - trimming_threshold):
-        m_long = np.clip(m_long, trimming_threshold, 1.0 - trimming_threshold)
-        m_short = np.clip(m_short, trimming_threshold, 1.0 - trimming_threshold)
-        warnings.warn(
-            f"Propensity score is close to 0 or 1. "
-            f"Trimming is at {trimming_threshold} and {1.0 - trimming_threshold} is applied"
-        )
-    # generate treatment based on long form
-    u = np.random.uniform(low=0, high=1, size=n_obs)
-    d = 1.0 * (m_long >= u)
-    # add treatment heterogeneity
-    d1x = z[:, 4] + 1
-    var_dx = np.var(d * (d1x))
-    cov_adx = gamma_a * var_a
-    # Outcome regression
-    g_partial_reg = f_reg(features_reg)
-    # short model
-    g_short_d0 = g_partial_reg
-    g_short_d1 = (theta + beta_a * cov_adx / var_dx) * d1x + g_partial_reg
-    g_short = d * g_short_d1 + (1.0 - d) * g_short_d0
-    # long model
-    g_long_d0 = g_partial_reg + beta_a * a
-    g_long_d1 = theta * d1x + g_partial_reg + beta_a * a
-    g_long = d * g_long_d1 + (1.0 - d) * g_long_d0
-    # Potential outcomes
-    y_0 = g_long_d0 + eps_y
-    y_1 = g_long_d1 + eps_y
-    # Realized outcome
-    y = d * y_1 + (1.0 - d) * y_0
-    # In-sample values for confounding strength
-    explained_residual_variance = np.square(g_long - g_short)
-    residual_variance = np.square(y - g_short)
-    cf_y = np.mean(explained_residual_variance) / np.mean(residual_variance)
-    # compute the Riesz representation
-    treated_weight = d / np.mean(d)
-    untreated_weight = (1.0 - d) / np.mean(d)
-    # Odds ratios
-    propensity_ratio_long = m_long / (1.0 - m_long)
-    rr_long_ate = d / m_long - (1.0 - d) / (1.0 - m_long)
-    rr_long_atte = treated_weight - np.multiply(untreated_weight, propensity_ratio_long)
-    propensity_ratio_short = m_short / (1.0 - m_short)
-    rr_short_ate = d / m_short - (1.0 - d) / (1.0 - m_short)
-    rr_short_atte = treated_weight - np.multiply(untreated_weight, propensity_ratio_short)
-    cf_d_ate = (np.mean(1 / (m_long * (1 - m_long))) - np.mean(1 / (m_short * (1 - m_short)))) / np.mean(
-        1 / (m_long * (1 - m_long))
-    )
-    cf_d_atte = (np.mean(propensity_ratio_long) - np.mean(propensity_ratio_short)) / np.mean(propensity_ratio_long)
-    if (beta_a == 0) | (gamma_a == 0):
-        rho_ate = 0.0
-        rho_atte = 0.0
-    else:
-        rho_ate = np.corrcoef((g_long - g_short), (rr_long_ate - rr_short_ate))[0, 1]
-        rho_atte = np.corrcoef((g_long - g_short), (rr_long_atte - rr_short_atte))[0, 1]
-    oracle_values = {
-        "g_long": g_long,
-        "g_short": g_short,
-        "m_long": m_long,
-        "m_short": m_short,
-        "gamma_a": gamma_a,
-        "beta_a": beta_a,
-        "a": a,
-        "y_0": y_0,
-        "y_1": y_1,
-        "z": z,
-        "cf_y": cf_y,
-        "cf_d_ate": cf_d_ate,
-        "cf_d_atte": cf_d_atte,
-        "rho_ate": rho_ate,
-        "rho_atte": rho_atte,
-    }
-    res_dict = {"x": x, "y": y, "d": d, "oracle_values": oracle_values}
-    return res_dict
-
-
-def make_confounded_plr_data(n_obs=500, theta=5.0, cf_y=0.04, cf_d=0.04, **kwargs):
-    """
-    Generates counfounded data from an partially linear regression model.
-
-    The data generating process is defined as follows (similar to the Monte Carlo simulation used
-    in Sant'Anna and Zhao (2020)). Let :math:`X= (X_1, X_2, X_3, X_4, X_5)^T \\sim \\mathcal{N}(0, \\Sigma)`,
-    where  :math:`\\Sigma` is a matrix with entries
-    :math:`\\Sigma_{kj} = c^{|j-k|}`. The default value is  :math:`c = 0`, corresponding to the identity matrix.
-    Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
-    where
-
-    .. math::
-
-        \\tilde{Z}_1 &= \\exp(0.5 \\cdot X_1)
-
-        \\tilde{Z}_2 &= 10 + X_2/(1 + \\exp(X_1))
-
-        \\tilde{Z}_3 &= (0.6 + X_1 \\cdot X_3 / 25)^3
-
-        \\tilde{Z}_4 &= (20 + X_2 + X_4)^2.
-
-    Additionally, generate a confounder :math:`A \\sim \\mathcal{U}[-1, 1]`.
-    At first, define the treatment as
-
-    .. math::
-
-        D = -Z_1 + 0.5 \\cdot Z_2 - 0.25 \\cdot Z_3 - 0.1 \\cdot Z_4 + \\gamma_A \\cdot A + \\varepsilon_D
-
-    and with :math:`\\varepsilon \\sim \\mathcal{N}(0,1)`.
-    Since :math:`A` is independent of :math:`X`, the long and short form of the treatment regression are given as
-
-    .. math::
-
-        E[D|X,A] = -Z_1 + 0.5 \\cdot Z_2 - 0.25 \\cdot Z_3 - 0.1 \\cdot Z_4 + \\gamma_A \\cdot A
-
-        E[D|X] = -Z_1 + 0.5 \\cdot Z_2 - 0.25 \\cdot Z_3 - 0.1 \\cdot Z_4.
-
-    Further, generate the outcome of interest :math:`Y` as
-
-    .. math::
-
-        Y &= \\theta \\cdot D + g(Z) + \\beta_A \\cdot A + \\varepsilon
-
-        g(Z) &= 210 + 27.4 \\cdot Z_1 +13.7 \\cdot (Z_2 + Z_3 + Z_4)
-
-    where :math:`\\varepsilon \\sim \\mathcal{N}(0,5)`.
-    This implies an average treatment effect of :math:`\\theta`. Additionally, the long and short forms of
-    the conditional expectation take the following forms
-
-    .. math::
-
-        \\mathbb{E}[Y|D, X, A] &= \\theta \\cdot D + g(Z) + \\beta_A \\cdot A
-
-        \\mathbb{E}[Y|D, X] &= (\\theta + \\gamma_A\\beta_A \\frac{\\mathrm{Var}(A)}{\\mathrm{Var}(D)}) \\cdot D + g(Z).
-
-    Consequently, the strength of confounding is determined via :math:`\\gamma_A` and :math:`\\beta_A`.
-    Both are chosen to obtain the desired confounding of the outcome and Riesz Representer (in sample).
-
-    The observed data is given as :math:`W = (Y, D, X)`.
-    Further, orcale values of the confounder :math:`A`, the transformed covariated :math:`Z`, the effect :math:`\\theta`,
-    the coefficients :math:`\\gamma_a`, :math:`\\beta_a`, the long and short forms of the main regression and
-    the propensity score are returned in a dictionary.
-
-    Parameters
-    ----------
-    n_obs : int
-        The number of observations to simulate.
-        Default is ``500``.
-    theta : float or int
-        Average treatment effect.
-        Default is ``5.0``.
-    cf_y : float
-        Percentage of the residual variation of the outcome explained by latent/confounding variable.
-        Default is ``0.04``.
-    cf_d : float
-        Percentage gains in the variation of the Riesz Representer generated by latent/confounding variable.
-        Default is ``0.04``.
-
-    Returns
-    -------
-    res_dict : dictionary
-       Dictionary with entries ``x``, ``y``, ``d`` and ``oracle_values``.
-
-    References
-    ----------
-    Sant’Anna, P. H. and Zhao, J. (2020),
-    Doubly robust difference-in-differences estimators. Journal of Econometrics, 219(1), 101-122.
-    doi:`10.1016/j.jeconom.2020.06.003 <https://doi.org/10.1016/j.jeconom.2020.06.003>`_.
-    """
-    c = kwargs.get("c", 0.0)
-    dim_x = kwargs.get("dim_x", 4)
-
-    # observed covariates
-    cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(
-        np.zeros(dim_x),
-        cov_mat,
-        size=[
-            n_obs,
-        ],
-    )
-
-    z_tilde_1 = np.exp(0.5 * x[:, 0])
-    z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
-    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
-    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
-
-    z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, x[:, 4:]))
-    z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
-
-    # error terms
-    var_eps_y = 5
-    eps_y = np.random.normal(loc=0, scale=np.sqrt(var_eps_y), size=n_obs)
-    var_eps_d = 1
-    eps_d = np.random.normal(loc=0, scale=np.sqrt(var_eps_d), size=n_obs)
-
-    # unobserved confounder
-    a_bounds = (-1, 1)
-    a = np.random.uniform(low=a_bounds[0], high=a_bounds[1], size=n_obs)
-    var_a = np.square(a_bounds[1] - a_bounds[0]) / 12
-
-    # get the required impact of the confounder on the propensity score
-    m_short = -z[:, 0] + 0.5 * z[:, 1] - 0.25 * z[:, 2] - 0.1 * z[:, 3]
-
-    def f_m(gamma_a):
-        rr_long = eps_d / var_eps_d
-        rr_short = (gamma_a * a + eps_d) / (gamma_a**2 * var_a + var_eps_d)
-        C2_D = (np.mean(np.square(rr_long)) - np.mean(np.square(rr_short))) / np.mean(np.square(rr_short))
-        return np.square(C2_D / (1 + C2_D) - cf_d)
-
-    gamma_a = minimize_scalar(f_m).x
-    m_long = m_short + gamma_a * a
-    d = m_long + eps_d
-
-    # short and long version of g
-    g_partial_reg = 210 + 27.4 * z[:, 0] + 13.7 * (z[:, 1] + z[:, 2] + z[:, 3])
-
-    var_d = np.var(d)
-
-    def f_g(beta_a):
-        g_diff = beta_a * (a - gamma_a * (var_a / var_d) * d)
-        y_diff = eps_y + g_diff
-        return np.square(np.mean(np.square(g_diff)) / np.mean(np.square(y_diff)) - cf_y)
-
-    beta_a = minimize_scalar(f_g).x
-
-    g_long = theta * d + g_partial_reg + beta_a * a
-    g_short = (theta + gamma_a * beta_a * var_a / var_d) * d + g_partial_reg
-
-    y = g_long + eps_y
-
-    oracle_values = {
-        "g_long": g_long,
-        "g_short": g_short,
-        "m_long": m_long,
-        "m_short": m_short,
-        "theta": theta,
-        "gamma_a": gamma_a,
-        "beta_a": beta_a,
-        "a": a,
-        "z": z,
-    }
-
-    res_dict = {"x": x, "y": y, "d": d, "oracle_values": oracle_values}
-
-    return res_dict
-
-
-def make_heterogeneous_data(n_obs=200, p=30, support_size=5, n_x=1, binary_treatment=False):
-    """
-    Creates a simple synthetic example for heterogeneous treatment effects.
-    The data generating process is based on the Monte Carlo simulation from Oprescu et al. (2019).
-
-    The data is generated as
-
-    .. math::
-
-        Y_i & = \\theta_0(X_i)D_i + \\langle X_i,\\gamma_0\\rangle + \\epsilon_i
-
-        D_i & = \\langle X_i,\\beta_0\\rangle + \\eta_i,
-
-    where :math:`X_i\\sim\\mathcal{U}[0,1]^{p}` and :math:`\\epsilon_i,\\eta_i
-    \\sim\\mathcal{U}[-1,1]`.
-    If the treatment is set to be binary, the treatment is generated as
-
-    .. math::
-        D_i = 1\\{\\langle X_i,\\beta_0\\rangle \\ge \\eta_i\\}.
-
-    The coefficient vectors :math:`\\gamma_0` and :math:`\\beta_0` both have small random (identical) support
-    which values are drawn independently from :math:`\\mathcal{U}[0,1]` and :math:`\\mathcal{U}[0,0.3]`.
-    Further, :math:`\\theta_0(x)` defines the conditional treatment effect, which is defined differently depending
-    on the dimension of :math:`x`.
-
-    If the heterogeneity is univariate the conditional treatment effect takes the following form
-
-    .. math::
-            \\theta_0(x) = \\exp(2x_0) + 3\\sin(4x_0),
-
-    whereas for the two-dimensional case the conditional treatment effect is defined as
-
-    .. math::
-        \\theta_0(x) = \\exp(2x_0) + 3\\sin(4x_1).
-
-    Parameters
-    ----------
-    n_obs : int
-        Number of observations to simulate.
-        Default is ``200``.
-
-    p : int
-        Dimension of covariates.
-        Default is ``30``.
-
-    support_size : int
-        Number of relevant (confounding) covariates.
-        Default is ``5``.
-
-    n_x : int
-        Dimension of the heterogeneity. Can be either ``1`` or ``2``.
-        Default is ``1``.
-
-    binary_treatment : bool
-        Indicates whether the treatment is binary.
-        Default is ``False``.
-
-    Returns
-    -------
-    res_dict : dictionary
-       Dictionary with entries ``data``, ``effects``, ``treatment_effect``.
-
-    """
-    # simple input checks
-    assert n_x in [1, 2], "n_x must be either 1 or 2."
-    assert support_size <= p, "support_size must be smaller than p."
-    assert isinstance(binary_treatment, bool), "binary_treatment must be a boolean."
-
-    # define treatment effects
-    if n_x == 1:
-
-        def treatment_effect(x):
-            return np.exp(2 * x[:, 0]) + 3 * np.sin(4 * x[:, 0])
-
-    else:
-        assert n_x == 2
-
-        # redefine treatment effect
-        def treatment_effect(x):
-            return np.exp(2 * x[:, 0]) + 3 * np.sin(4 * x[:, 1])
-
-    # Outcome support and coefficients
-    support_y = np.random.choice(np.arange(p), size=support_size, replace=False)
-    coefs_y = np.random.uniform(0, 1, size=support_size)
-    # treatment support and coefficients
-    support_d = support_y
-    coefs_d = np.random.uniform(0, 0.3, size=support_size)
-
-    # noise
-    epsilon = np.random.uniform(-1, 1, size=n_obs)
-    eta = np.random.uniform(-1, 1, size=n_obs)
-
-    # Generate controls, covariates, treatments and outcomes
-    x = np.random.uniform(0, 1, size=(n_obs, p))
-    # Heterogeneous treatment effects
-    te = treatment_effect(x)
-    if binary_treatment:
-        d = 1.0 * (np.dot(x[:, support_d], coefs_d) >= eta)
-    else:
-        d = np.dot(x[:, support_d], coefs_d) + eta
-    y = te * d + np.dot(x[:, support_y], coefs_y) + epsilon
-
-    # Now we build the dataset
-    y_df = pd.DataFrame({"y": y})
-    d_df = pd.DataFrame({"d": d})
-    x_df = pd.DataFrame(data=x, index=np.arange(x.shape[0]), columns=[f"X_{i}" for i in range(x.shape[1])])
-
-    data = pd.concat([y_df, d_df, x_df], axis=1)
-    res_dict = {"data": data, "effects": te, "treatment_effect": treatment_effect}
-    return res_dict
-
-
-def make_ssm_data(n_obs=8000, dim_x=100, theta=1, mar=True, return_type="DoubleMLData"):
-    """
-    Generates data from a sample selection model (SSM).
-    The data generating process is defined as
-
-    .. math::
-
-        y_i &= \\theta d_i + x_i' \\beta d_i + u_i,
-
-        s_i &= 1\\left\\lbrace d_i + \\gamma z_i + x_i' \\beta + v_i > 0 \\right\\rbrace,
-
-        d_i &= 1\\left\\lbrace x_i' \\beta + w_i > 0 \\right\\rbrace,
-
-    with Y being observed if :math:`s_i = 1` and covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma^2_x)`, where
-    :math:`\\Sigma^2_x` is a matrix with entries
-    :math:`\\Sigma_{kj} = 0.5^{|j-k|}`.
-    :math:`\\beta` is a `dim_x`-vector with entries :math:`\\beta_j=\\frac{0.4}{j^2}`
-    :math:`z_i \\sim \\mathcal{N}(0, 1)`,
-    :math:`(u_i,v_i) \\sim \\mathcal{N}(0, \\Sigma^2_{u,v})`,
-    :math:`w_i \\sim \\mathcal{N}(0, 1)`.
-
-
-    The data generating process is inspired by a process used in the simulation study (see Appendix E) of Bia,
-    Huber and Lafférs (2023).
-
-    Parameters
-    ----------
-    n_obs :
-        The number of observations to simulate.
-    dim_x :
-        The number of covariates.
-    theta :
-        The value of the causal parameter.
-    mar:
-        Boolean. Indicates whether missingness at random holds.
-    return_type :
-        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
-
-        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
-
-        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d, z, s)``.
-
-    References
-    ----------
-    Michela Bia, Martin Huber & Lukáš Lafférs (2023) Double Machine Learning for Sample Selection Models,
-    Journal of Business & Economic Statistics, DOI: 10.1080/07350015.2023.2271071
-    """
-    if mar:
-        sigma = np.array([[1, 0], [0, 1]])
-        gamma = 0
-    else:
-        sigma = np.array([[1, 0.8], [0.8, 1]])
-        gamma = 1
-
-    e = np.random.multivariate_normal(mean=[0, 0], cov=sigma, size=n_obs).T
-
-    cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(
-        np.zeros(dim_x),
-        cov_mat,
-        size=[
-            n_obs,
-        ],
-    )
-
-    beta = [0.4 / (k**2) for k in range(1, dim_x + 1)]
-
-    d = np.where(np.dot(x, beta) + np.random.randn(n_obs) > 0, 1, 0)
-    z = np.random.randn(n_obs)
-    s = np.where(np.dot(x, beta) + d + gamma * z + e[0] > 0, 1, 0)
-
-    y = np.dot(x, beta) + theta * d + e[1]
-    y[s == 0] = 0
-
-    if return_type in _array_alias:
-        return x, y, d, z, s
-    elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
-        if mar:
-            data = pd.DataFrame(np.column_stack((x, y, d, s)), columns=x_cols + ["y", "d", "s"])
-        else:
-            data = pd.DataFrame(np.column_stack((x, y, d, z, s)), columns=x_cols + ["y", "d", "z", "s"])
-        if return_type in _data_frame_alias:
-            return data
-        else:
-            if mar:
-                return DoubleMLData(data, "y", "d", x_cols, None, None, "s")
-            return DoubleMLData(data, "y", "d", x_cols, "z", None, "s")
-    else:
-        raise ValueError("Invalid return_type.")
-
-
-def make_irm_data_discrete_treatments(n_obs=200, n_levels=3, linear=False, random_state=None, **kwargs):
-    """
-    Generates data from a interactive regression (IRM) model with multiple treatment levels (based on an
-    underlying continous treatment).
-
-    The data generating process is defined as follows (similar to the Monte Carlo simulation used
-    in Sant'Anna and Zhao (2020)).
-
-    Let :math:`X= (X_1, X_2, X_3, X_4, X_5)^T \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` corresponds
-    to the identity matrix.
-    Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
-    where
-
-    .. math::
-
-            \\tilde{Z}_1 &= \\exp(0.5 \\cdot X_1)
-
-            \\tilde{Z}_2 &= 10 + X_2/(1 + \\exp(X_1))
-
-            \\tilde{Z}_3 &= (0.6 + X_1 \\cdot X_3 / 25)^3
-
-            \\tilde{Z}_4 &= (20 + X_2 + X_4)^2
-
-            \\tilde{Z}_5 &= X_5.
-
-    A continuous treatment :math:`D_{\\text{cont}}` is generated as
-
-    .. math::
-
-        D_{\\text{cont}} = \\xi (-Z_1 + 0.5 Z_2 - 0.25 Z_3 - 0.1 Z_4) + \\varepsilon_D,
-
-    where :math:`\\varepsilon_D \\sim \\mathcal{N}(0,1)` and :math:`\\xi=0.3`. The corresponding treatment
-    effect is defined as
-
-    .. math::
-
-        \\theta (d) = 0.1 \\exp(d) + 10 \\sin(0.7 d) + 2 d - 0.2 d^2.
-
-    Based on the continous treatment, a discrete treatment :math:`D` is generated as with a baseline level of
-    :math:`D=0` and additional levels based on the quantiles of :math:`D_{\\text{cont}}`. The number of levels
-    is defined by :math:`n_{\\text{levels}}`. Each level is chosen to have the same probability of being selected.
-
-    The potential outcomes are defined as
-
-    .. math::
-
-            Y(0) &= 210 + 27.4 Z_1 + 13.7 (Z_2 + Z_3 + Z_4) + \\varepsilon_Y
-
-            Y(1) &= \\theta (D_{\\text{cont}}) 1\\{D_{\\text{cont}} > 0\\} + Y(0),
-
-    where :math:`\\varepsilon_Y \\sim \\mathcal{N}(0,5)`. Further, the observed outcome is defined as
-
-    .. math::
-
-        Y = Y(1) 1\\{D > 0\\} + Y(0) 1\\{D = 0\\}.
-
-    The data is returned as a dictionary with the entries ``x``, ``y``, ``d`` and ``oracle_values``.
-
-    Parameters
-    ----------
-    n_obs : int
-        The number of observations to simulate.
-        Default is ``200``.
-
-    n_levels : int
-        The number of treatment levels.
-        Default is ``3``.
-
-    linear : bool
-        Indicates whether the true underlying regression is linear.
-        Default is ``False``.
-
-    random_state : int
-        Random seed for reproducibility.
-        Default is ``42``.
-
-    Returns
-    -------
-    res_dict : dictionary
-       Dictionary with entries ``x``, ``y``, ``d`` and ``oracle_values``.
-       The oracle values contain the continuous treatment, the level bounds, the potential level, ITE
-       and the potential outcome without treatment.
-
-    """
-    if random_state is not None:
-        np.random.seed(random_state)
-    xi = kwargs.get("xi", 0.3)
-    c = kwargs.get("c", 0.0)
-    dim_x = kwargs.get("dim_x", 5)
-
-    if not isinstance(n_levels, int):
-        raise ValueError("n_levels must be an integer.")
-    if n_levels < 2:
-        raise ValueError("n_levels must be at least 2.")
-
-    # observed covariates
-    cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(
-        np.zeros(dim_x),
-        cov_mat,
-        size=[
-            n_obs,
-        ],
-    )
-
-    def f_reg(w):
-        res = 210 + 27.4 * w[:, 0] + 13.7 * (w[:, 1] + w[:, 2] + w[:, 3])
-        return res
-
-    def f_treatment(w, xi):
-        res = xi * (-w[:, 0] + 0.5 * w[:, 1] - 0.25 * w[:, 2] - 0.1 * w[:, 3])
-        return res
-
-    def treatment_effect(d, scale=15):
-        return scale * (1 / (1 + np.exp(-d - 1.2 * np.cos(d)))) - 2
-
-    z_tilde_1 = np.exp(0.5 * x[:, 0])
-    z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
-    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
-    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
-
-    z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, x[:, 4:]))
-    z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
-
-    # error terms
-    var_eps_y = 5
-    eps_y = np.random.normal(loc=0, scale=np.sqrt(var_eps_y), size=n_obs)
-    var_eps_d = 1
-    eps_d = np.random.normal(loc=0, scale=np.sqrt(var_eps_d), size=n_obs)
-
-    if linear:
-        g = f_reg(x)
-        m = f_treatment(x, xi)
-    else:
-        assert not linear
-        g = f_reg(z)
-        m = f_treatment(z, xi)
-
-    cont_d = m + eps_d
-    level_bounds = np.quantile(cont_d, q=np.linspace(0, 1, n_levels + 1))
-    potential_level = sum([1.0 * (cont_d >= bound) for bound in level_bounds[1:-1]]) + 1
-    eta = np.random.uniform(0, 1, size=n_obs)
-    d = 1.0 * (eta >= 1 / n_levels) * potential_level
-
-    ite = treatment_effect(cont_d)
-    y0 = g + eps_y
-    # only treated for d > 0 compared to the baseline
-    y = ite * (d > 0) + y0
-
-    oracle_values = {
-        "cont_d": cont_d,
-        "level_bounds": level_bounds,
-        "potential_level": potential_level,
-        "ite": ite,
-        "y0": y0,
-    }
-
-    resul_dict = {"x": x, "y": y, "d": d, "oracle_values": oracle_values}
-
-    return resul_dict
diff --git a/doubleml/datasets/__init__.py b/doubleml/datasets/__init__.py
new file mode 100644
index 000000000..6a64a5c8d
--- /dev/null
+++ b/doubleml/datasets/__init__.py
@@ -0,0 +1,13 @@
+"""
+The :mod:`doubleml.datasets` module implements data generating processes for double machine learning simulations and provides access to real datasets.
+"""
+
+# Import fetch functions
+from .fetch_401K import fetch_401K
+from .fetch_bonus import fetch_bonus
+
+
+__all__ = [
+    "fetch_401K",
+    "fetch_bonus",
+]
diff --git a/doubleml/datasets/fetch_401K.py b/doubleml/datasets/fetch_401K.py
new file mode 100644
index 000000000..05a97fe73
--- /dev/null
+++ b/doubleml/datasets/fetch_401K.py
@@ -0,0 +1,65 @@
+"""
+Data set on financial wealth and 401(k) plan participation.
+"""
+
+import pandas as pd
+from doubleml import DoubleMLData
+
+
+def _get_array_alias():
+    return ["array", "np.array", "np.ndarray"]
+
+
+def _get_data_frame_alias():
+    return ["DataFrame", "pd.DataFrame", "pandas.DataFrame"]
+
+
+def _get_dml_data_alias():
+    return ["DoubleMLData"]
+
+
+def fetch_401K(return_type="DoubleMLData", polynomial_features=False):
+    """
+    Data set on financial wealth and 401(k) plan participation.
+
+    Parameters
+    ----------
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+    polynomial_features :
+        If ``True`` polynomial features are added (see replication files of Chernozhukov et al. (2018)).
+
+    References
+    ----------
+    Abadie, A. (2003), Semiparametric instrumental variable estimation of treatment response models. Journal of
+    Econometrics, 113(2): 231-263.
+
+    Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W. and Robins, J. (2018),
+    Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
+    doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
+    """
+    _array_alias = _get_array_alias()
+    _data_frame_alias = _get_data_frame_alias()
+    _dml_data_alias = _get_dml_data_alias()
+    
+    url = "https://github.com/VC2015/DMLonGitHub/raw/master/sipp1991.dta"
+    raw_data = pd.read_stata(url)
+
+    y_col = "net_tfa"
+    d_cols = ["e401"]
+    x_cols = ["age", "inc", "educ", "fsize", "marr", "twoearn", "db", "pira", "hown"]
+
+    data = raw_data.copy()
+
+    if polynomial_features:
+        raise NotImplementedError("polynomial_features os not implemented yet for fetch_401K.")
+
+    if return_type in _data_frame_alias + _dml_data_alias:
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, y_col, d_cols, x_cols)
+    else:
+        raise ValueError("Invalid return_type.")
diff --git a/doubleml/datasets/fetch_bonus.py b/doubleml/datasets/fetch_bonus.py
new file mode 100644
index 000000000..155100c31
--- /dev/null
+++ b/doubleml/datasets/fetch_bonus.py
@@ -0,0 +1,98 @@
+"""
+Data set on the Pennsylvania Reemployment Bonus experiment.
+"""
+
+import numpy as np
+import pandas as pd
+from sklearn.preprocessing import OneHotEncoder, PolynomialFeatures
+from doubleml import DoubleMLData
+
+
+def _get_array_alias():
+    return ["array", "np.array", "np.ndarray"]
+
+
+def _get_data_frame_alias():
+    return ["DataFrame", "pd.DataFrame", "pandas.DataFrame"]
+
+
+def _get_dml_data_alias():
+    return ["DoubleMLData"]
+
+
+def fetch_bonus(return_type="DoubleMLData", polynomial_features=False):
+    """
+    Data set on the Pennsylvania Reemployment Bonus experiment.
+
+    Parameters
+    ----------
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+    polynomial_features :
+        If ``True`` polynomial features are added (see replication files of Chernozhukov et al. (2018)).
+
+    References
+    ----------
+    Bilias Y. (2000), Sequential Testing of Duration Data: The Case of Pennsylvania 'Reemployment Bonus' Experiment.
+    Journal of Applied Econometrics, 15(6): 575-594.
+
+    Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W. and Robins, J. (2018),
+    Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
+    doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
+    """
+    _array_alias = _get_array_alias()
+    _data_frame_alias = _get_data_frame_alias()
+    _dml_data_alias = _get_dml_data_alias()
+    
+    url = "https://raw.githubusercontent.com/VC2015/DMLonGitHub/master/penn_jae.dat"
+    raw_data = pd.read_csv(url, sep=r"\s+")
+
+    ind = (raw_data["tg"] == 0) | (raw_data["tg"] == 4)
+    data = raw_data.copy()[ind]
+    data.reset_index(inplace=True)
+    data["tg"] = data["tg"].replace(4, 1)
+    data["inuidur1"] = np.log(data["inuidur1"])
+
+    # variable dep as factor (dummy encoding)
+    dummy_enc = OneHotEncoder(drop="first", categories="auto").fit(data.loc[:, ["dep"]])
+    xx = dummy_enc.transform(data.loc[:, ["dep"]]).toarray()
+    data["dep1"] = xx[:, 0]
+    data["dep2"] = xx[:, 1]
+
+    y_col = "inuidur1"
+    d_cols = ["tg"]
+    x_cols = [
+        "female",
+        "black",
+        "othrace",
+        "dep1",
+        "dep2",
+        "q2",
+        "q3",
+        "q4",
+        "q5",
+        "q6",
+        "agelt35",
+        "agegt54",
+        "durable",
+        "lusd",
+        "husd",
+    ]
+
+    if polynomial_features:
+        poly = PolynomialFeatures(2, include_bias=False)
+        data_transf = poly.fit_transform(data[x_cols])
+        x_cols = list(poly.get_feature_names_out(x_cols))
+
+        data_transf = pd.DataFrame(data_transf, columns=x_cols)
+        data = pd.concat((data[[y_col] + d_cols], data_transf), axis=1, sort=False)
+
+    if return_type in _data_frame_alias + _dml_data_alias:
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, y_col, d_cols, x_cols)
+    else:
+        raise ValueError("Invalid return_type.")
diff --git a/doubleml/did/did.py b/doubleml/did/did.py
index 7a671993c..170535ea4 100644
--- a/doubleml/did/did.py
+++ b/doubleml/did/did.py
@@ -63,10 +63,9 @@ class DoubleMLDID(LinearScoreMixin, DoubleML):
         Default is ``True``.
 
     Examples
-    --------
-    >>> import numpy as np
+    --------    >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml.datasets import make_did_SZ2020
+    >>> from doubleml.did.datasets import make_did_SZ2020
     >>> from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
     >>> np.random.seed(42)
     >>> ml_g = RandomForestRegressor(n_estimators=100, max_depth=5, min_samples_leaf=5)
diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py
index ab2af5b95..7f33210fe 100644
--- a/doubleml/did/did_cs.py
+++ b/doubleml/did/did_cs.py
@@ -63,10 +63,9 @@ class DoubleMLDIDCS(LinearScoreMixin, DoubleML):
         Default is ``True``.
 
     Examples
-    --------
-    >>> import numpy as np
+    --------    >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml.datasets import make_did_SZ2020
+    >>> from doubleml.did.datasets import make_did_SZ2020
     >>> from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
     >>> np.random.seed(42)
     >>> ml_g = RandomForestRegressor(n_estimators=100, max_depth=5, min_samples_leaf=5)
diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 764865a43..fe4cec5de 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1167,10 +1167,9 @@ def evaluate_learners(self, learners=None, metric=_rmse):
 
         Examples
         --------
-        >>> import numpy as np
-        >>> import doubleml as dml
+        >>> import numpy as np        >>> import doubleml as dml
         >>> from sklearn.metrics import mean_absolute_error
-        >>> from doubleml.datasets import make_irm_data
+        >>> from doubleml.irm.datasets import make_irm_data
         >>> from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
         >>> np.random.seed(3141)
         >>> ml_g = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
@@ -1284,10 +1283,9 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None):
         self : object
 
         Examples
-        --------
-        >>> import numpy as np
+        --------        >>> import numpy as np
         >>> import doubleml as dml
-        >>> from doubleml.datasets import make_plr_CCDDHNR2018
+        >>> from doubleml.plm.datasets import make_plr_CCDDHNR2018
         >>> from sklearn.ensemble import RandomForestRegressor
         >>> from sklearn.base import clone
         >>> np.random.seed(3141)
diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py
index 8099342aa..2960e90d3 100644
--- a/doubleml/irm/apos.py
+++ b/doubleml/irm/apos.py
@@ -673,7 +673,7 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None):
         --------
         >>> import numpy as np
         >>> import doubleml as dml
-        >>> from doubleml.datasets import make_plr_CCDDHNR2018
+        >>> from doubleml.plm.datasets import make_plr_CCDDHNR2018
         >>> from sklearn.ensemble import RandomForestRegressor
         >>> from sklearn.base import clone
         >>> np.random.seed(3141)
diff --git a/doubleml/irm/cvar.py b/doubleml/irm/cvar.py
index d2aeaced6..57347dce9 100644
--- a/doubleml/irm/cvar.py
+++ b/doubleml/irm/cvar.py
@@ -82,7 +82,7 @@ class DoubleMLCVAR(LinearScoreMixin, DoubleML):
     --------
     >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml.datasets import make_irm_data
+    >>> from doubleml.irm.datasets import make_irm_data
     >>> from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
     >>> np.random.seed(3141)
     >>> ml_g = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=10, min_samples_leaf=2)
diff --git a/doubleml/irm/datasets/__init__.py b/doubleml/irm/datasets/__init__.py
new file mode 100644
index 000000000..05f951343
--- /dev/null
+++ b/doubleml/irm/datasets/__init__.py
@@ -0,0 +1,20 @@
+"""
+The :mod:`doubleml.irm.datasets` module implements data generating processes for interactive regression models.
+"""
+
+from .dgp_confounded_irm_data import make_confounded_irm_data
+from .dgp_heterogeneous_data import make_heterogeneous_data
+from .dgp_iivm_data import make_iivm_data
+from .dgp_irm_data import make_irm_data
+from .dgp_irm_data_discrete_treatments import make_irm_data_discrete_treatments
+from .dgp_ssm_data import make_ssm_data
+
+
+__all__ = [
+    "make_confounded_irm_data",
+    "make_heterogeneous_data",
+    "make_iivm_data",
+    "make_irm_data",
+    "make_irm_data_discrete_treatments",
+    "make_ssm_data",
+]
diff --git a/doubleml/irm/datasets/dgp_confounded_irm_data.py b/doubleml/irm/datasets/dgp_confounded_irm_data.py
new file mode 100644
index 000000000..2452e896b
--- /dev/null
+++ b/doubleml/irm/datasets/dgp_confounded_irm_data.py
@@ -0,0 +1,232 @@
+import numpy as np
+import warnings
+from scipy.linalg import toeplitz
+
+
+def make_confounded_irm_data(n_obs=500, theta=0.0, gamma_a=0.127, beta_a=0.58, linear=False, **kwargs):
+    """
+    Generates counfounded data from an interactive regression model.
+
+    The data generating process is defined as follows (inspired by the Monte Carlo simulation used
+    in Sant'Anna and Zhao (2020)).
+
+    Let :math:`X= (X_1, X_2, X_3, X_4, X_5)^T \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` corresponds
+    to the identity matrix.
+    Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
+    where
+
+    .. math::
+
+        \\tilde{Z}_1 &= \\exp(0.5 \\cdot X_1)
+
+        \\tilde{Z}_2 &= 10 + X_2/(1 + \\exp(X_1))
+
+        \\tilde{Z}_3 &= (0.6 + X_1 \\cdot X_3 / 25)^3
+
+        \\tilde{Z}_4 &= (20 + X_2 + X_4)^2
+
+        \\tilde{Z}_5 &= X_5.
+
+    Additionally, generate a confounder :math:`A \\sim \\mathcal{U}[-1, 1]`.
+    At first, define the propensity score as
+
+    .. math::
+
+        m(X, A) = P(D=1|X,A) = p(Z) + \\gamma_A \\cdot A
+
+    where
+
+    .. math::
+
+        p(Z) &= \\frac{\\exp(f_{ps}(Z))}{1 + \\exp(f_{ps}(Z))},
+
+        f_{ps}(Z) &= 0.75 \\cdot (-Z_1 + 0.1 \\cdot Z_2 -0.25 \\cdot Z_3 - 0.1 \\cdot Z_4).
+
+    and generate the treatment :math:`D = 1\\{m(X, A) \\ge U\\}` with :math:`U \\sim \\mathcal{U}[0, 1]`.
+    Since :math:`A` is independent of :math:`X`, the short form of the propensity score is given as
+
+    .. math::
+
+        P(D=1|X) = p(Z).
+
+    Further, generate the outcome of interest :math:`Y` as
+
+    .. math::
+
+        Y &= \\theta \\cdot D (Z_5 + 1) + g(Z) + \\beta_A \\cdot A + \\varepsilon
+
+        g(Z) &= 2.5 + 0.74 \\cdot Z_1 + 0.25 \\cdot Z_2 + 0.137 \\cdot (Z_3 + Z_4)
+
+    where :math:`\\varepsilon \\sim \\mathcal{N}(0,5)`.
+    This implies an average treatment effect of :math:`\\theta`. Additionally, the long and short forms of
+    the conditional expectation take the following forms
+
+    .. math::
+
+        \\mathbb{E}[Y|D, X, A] &= \\theta \\cdot D (Z_5 + 1) + g(Z) + \\beta_A \\cdot A
+
+        \\mathbb{E}[Y|D, X] &= (\\theta + \\beta_A \\frac{\\mathrm{Cov}(A, D(Z_5 + 1))}{\\mathrm{Var}(D(Z_5 + 1))})
+            \\cdot D (Z_5 + 1) + g(Z).
+
+    Consequently, the strength of confounding is determined via :math:`\\gamma_A` and :math:`\\beta_A`, which can be
+    set via the parameters ``gamma_a`` and ``beta_a``.
+
+    The observed data is given as :math:`W = (Y, D, Z)`.
+    Further, orcale values of the confounder :math:`A`, the transformed covariated :math:`Z`,
+    the potential outcomes of :math:`Y`, the long and short forms of the main regression and the propensity score and
+    in sample versions of the confounding parameters :math:`cf_d` and :math:`cf_y` (for ATE and ATTE)
+    are returned in a dictionary.
+
+    Parameters
+    ----------
+    n_obs : int
+        The number of observations to simulate.
+        Default is ``500``.
+    theta : float or int
+        Average treatment effect.
+        Default is ``0.0``.
+    gamma_a : float
+        Coefficient of the unobserved confounder in the propensity score.
+        Default is ``0.127``.
+    beta_a : float
+        Coefficient of the unobserved confounder in the outcome regression.
+        Default is ``0.58``.
+    linear : bool
+        If ``True``, the Z will be set to X, such that the underlying (short) models are linear/logistic.
+        Default is ``False``.
+
+    Returns
+    -------
+    res_dict : dictionary
+       Dictionary with entries ``x``, ``y``, ``d`` and ``oracle_values``.
+
+    References
+    ----------
+    Sant'Anna, P. H. and Zhao, J. (2020),
+    Doubly robust difference-in-differences estimators. Journal of Econometrics, 219(1), 101-122.
+    doi:`10.1016/j.jeconom.2020.06.003 <https://doi.org/10.1016/j.jeconom.2020.06.003>`_.
+    """
+    c = 0.0  # the confounding strength is only valid for c=0
+    xi = 0.75
+    dim_x = kwargs.get("dim_x", 5)
+    trimming_threshold = kwargs.get("trimming_threshold", 0.01)
+    var_eps_y = kwargs.get("var_eps_y", 1.0)
+
+    # Specification of main regression function
+    def f_reg(w):
+        res = 2.5 + 0.74 * w[:, 0] + 0.25 * w[:, 1] + 0.137 * (w[:, 2] + w[:, 3])
+        return res
+
+    # Specification of prop score function
+    def f_ps(w, xi):
+        res = xi * (-w[:, 0] + 0.1 * w[:, 1] - 0.25 * w[:, 2] - 0.1 * w[:, 3])
+        return res
+
+    # observed covariates
+    cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
+    z_tilde_1 = np.exp(0.5 * x[:, 0])
+    z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
+    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
+    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
+    z_tilde_5 = x[:, 4]
+    z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, z_tilde_5))
+    z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
+    # error terms and unobserved confounder
+    eps_y = np.random.normal(loc=0, scale=np.sqrt(var_eps_y), size=n_obs)
+    # unobserved confounder
+    a_bounds = (-1, 1)
+    a = np.random.uniform(low=a_bounds[0], high=a_bounds[1], size=n_obs)
+    var_a = np.square(a_bounds[1] - a_bounds[0]) / 12
+
+    # Choose the features used in the models
+    if linear:
+        features_ps = x
+        features_reg = x
+    else:
+        features_ps = z
+        features_reg = z
+
+    p = np.exp(f_ps(features_ps, xi)) / (1 + np.exp(f_ps(features_ps, xi)))
+    # compute short and long form of propensity score
+    m_long = p + gamma_a * a
+    m_short = p
+    # check propensity score bounds
+    if np.any(m_long < trimming_threshold) or np.any(m_long > 1.0 - trimming_threshold):
+        m_long = np.clip(m_long, trimming_threshold, 1.0 - trimming_threshold)
+        m_short = np.clip(m_short, trimming_threshold, 1.0 - trimming_threshold)
+        warnings.warn(
+            f"Propensity score is close to 0 or 1. "
+            f"Trimming is at {trimming_threshold} and {1.0 - trimming_threshold} is applied"
+        )
+    # generate treatment based on long form
+    u = np.random.uniform(low=0, high=1, size=n_obs)
+    d = 1.0 * (m_long >= u)
+    # add treatment heterogeneity
+    d1x = z[:, 4] + 1
+    var_dx = np.var(d * (d1x))
+    cov_adx = gamma_a * var_a
+    # Outcome regression
+    g_partial_reg = f_reg(features_reg)
+    # short model
+    g_short_d0 = g_partial_reg
+    g_short_d1 = (theta + beta_a * cov_adx / var_dx) * d1x + g_partial_reg
+    g_short = d * g_short_d1 + (1.0 - d) * g_short_d0
+    # long model
+    g_long_d0 = g_partial_reg + beta_a * a
+    g_long_d1 = theta * d1x + g_partial_reg + beta_a * a
+    g_long = d * g_long_d1 + (1.0 - d) * g_long_d0
+    # Potential outcomes
+    y_0 = g_long_d0 + eps_y
+    y_1 = g_long_d1 + eps_y
+    # Realized outcome
+    y = d * y_1 + (1.0 - d) * y_0
+    # In-sample values for confounding strength
+    explained_residual_variance = np.square(g_long - g_short)
+    residual_variance = np.square(y - g_short)
+    cf_y = np.mean(explained_residual_variance) / np.mean(residual_variance)
+    # compute the Riesz representation
+    treated_weight = d / np.mean(d)
+    untreated_weight = (1.0 - d) / np.mean(d)
+    # Odds ratios
+    propensity_ratio_long = m_long / (1.0 - m_long)
+    rr_long_ate = d / m_long - (1.0 - d) / (1.0 - m_long)
+    rr_long_atte = treated_weight - np.multiply(untreated_weight, propensity_ratio_long)
+    propensity_ratio_short = m_short / (1.0 - m_short)
+    rr_short_ate = d / m_short - (1.0 - d) / (1.0 - m_short)
+    rr_short_atte = treated_weight - np.multiply(untreated_weight, propensity_ratio_short)
+    cf_d_ate = (np.mean(1 / (m_long * (1 - m_long))) - np.mean(1 / (m_short * (1 - m_short)))) / np.mean(
+        1 / (m_long * (1 - m_long))
+    )
+    cf_d_atte = (np.mean(propensity_ratio_long) - np.mean(propensity_ratio_short)) / np.mean(propensity_ratio_long)
+    if (beta_a == 0) | (gamma_a == 0):
+        rho_ate = 0.0
+        rho_atte = 0.0
+    else:
+        rho_ate = np.corrcoef((g_long - g_short), (rr_long_ate - rr_short_ate))[0, 1]
+        rho_atte = np.corrcoef((g_long - g_short), (rr_long_atte - rr_short_atte))[0, 1]
+    oracle_values = {
+        "g_long": g_long,
+        "g_short": g_short,
+        "m_long": m_long,
+        "m_short": m_short,
+        "gamma_a": gamma_a,
+        "beta_a": beta_a,
+        "a": a,
+        "y_0": y_0,
+        "y_1": y_1,
+        "z": z,
+        "cf_y": cf_y,
+        "cf_d_ate": cf_d_ate,
+        "cf_d_atte": cf_d_atte,
+        "rho_ate": rho_ate,
+        "rho_atte": rho_atte,
+    }
+    res_dict = {"x": x, "y": y, "d": d, "oracle_values": oracle_values}
+    return res_dict
diff --git a/doubleml/irm/datasets/dgp_heterogeneous_data.py b/doubleml/irm/datasets/dgp_heterogeneous_data.py
new file mode 100644
index 000000000..0f1a1b15c
--- /dev/null
+++ b/doubleml/irm/datasets/dgp_heterogeneous_data.py
@@ -0,0 +1,114 @@
+import numpy as np
+import pandas as pd
+
+
+def make_heterogeneous_data(n_obs=200, p=30, support_size=5, n_x=1, binary_treatment=False):
+    """
+    Creates a simple synthetic example for heterogeneous treatment effects.
+    The data generating process is based on the Monte Carlo simulation from Oprescu et al. (2019).
+
+    The data is generated as
+
+    .. math::
+
+        Y_i & = \\theta_0(X_i)D_i + \\langle X_i,\\gamma_0\\rangle + \\epsilon_i
+
+        D_i & = \\langle X_i,\\beta_0\\rangle + \\eta_i,
+
+    where :math:`X_i\\sim\\mathcal{U}[0,1]^{p}` and :math:`\\epsilon_i,\\eta_i
+    \\sim\\mathcal{U}[-1,1]`.
+    If the treatment is set to be binary, the treatment is generated as
+
+    .. math::
+        D_i = 1\\{\\langle X_i,\\beta_0\\rangle \\ge \\eta_i\\}.
+
+    The coefficient vectors :math:`\\gamma_0` and :math:`\\beta_0` both have small random (identical) support
+    which values are drawn independently from :math:`\\mathcal{U}[0,1]` and :math:`\\mathcal{U}[0,0.3]`.
+    Further, :math:`\\theta_0(x)` defines the conditional treatment effect, which is defined differently depending
+    on the dimension of :math:`x`.
+
+    If the heterogeneity is univariate the conditional treatment effect takes the following form
+
+    .. math::
+            \\theta_0(x) = \\exp(2x_0) + 3\\sin(4x_0),
+
+    whereas for the two-dimensional case the conditional treatment effect is defined as
+
+    .. math::
+        \\theta_0(x) = \\exp(2x_0) + 3\\sin(4x_1).
+
+    Parameters
+    ----------
+    n_obs : int
+        Number of observations to simulate.
+        Default is ``200``.
+
+    p : int
+        Dimension of covariates.
+        Default is ``30``.
+
+    support_size : int
+        Number of relevant (confounding) covariates.
+        Default is ``5``.
+
+    n_x : int
+        Dimension of the heterogeneity. Can be either ``1`` or ``2``.
+        Default is ``1``.
+
+    binary_treatment : bool
+        Indicates whether the treatment is binary.
+        Default is ``False``.
+
+    Returns
+    -------
+    res_dict : dictionary
+       Dictionary with entries ``data``, ``effects``, ``treatment_effect``.
+
+    """
+    # simple input checks
+    assert n_x in [1, 2], "n_x must be either 1 or 2."
+    assert support_size <= p, "support_size must be smaller than p."
+    assert isinstance(binary_treatment, bool), "binary_treatment must be a boolean."
+
+    # define treatment effects
+    if n_x == 1:
+
+        def treatment_effect(x):
+            return np.exp(2 * x[:, 0]) + 3 * np.sin(4 * x[:, 0])
+
+    else:
+        assert n_x == 2
+
+        # redefine treatment effect
+        def treatment_effect(x):
+            return np.exp(2 * x[:, 0]) + 3 * np.sin(4 * x[:, 1])
+
+    # Outcome support and coefficients
+    support_y = np.random.choice(np.arange(p), size=support_size, replace=False)
+    coefs_y = np.random.uniform(0, 1, size=support_size)
+    # treatment support and coefficients
+    support_d = support_y
+    coefs_d = np.random.uniform(0, 0.3, size=support_size)
+
+    # noise
+    epsilon = np.random.uniform(-1, 1, size=n_obs)
+    eta = np.random.uniform(-1, 1, size=n_obs)
+
+    # Generate controls, covariates, treatments and outcomes
+    x = np.random.uniform(0, 1, size=(n_obs, p))
+    # Heterogeneous treatment effects
+    te = treatment_effect(x)
+    if binary_treatment:
+        d = 1.0 * (np.dot(x[:, support_d], coefs_d) >= eta)
+    else:
+        d = np.dot(x[:, support_d], coefs_d) + eta
+    y = te * d + np.dot(x[:, support_y], coefs_y) + epsilon
+
+    # Now we build the dataset
+    y_df = pd.DataFrame({"y": y})
+    d_df = pd.DataFrame({"d": d})
+    x_df = pd.DataFrame(data=x, index=np.arange(x.shape[0]), columns=[f"X_{i}" for i in range(x.shape[1])])
+
+    data = pd.concat([y_df, d_df, x_df], axis=1)
+    res_dict = {"data": data, "effects": te, "treatment_effect": treatment_effect}
+    return res_dict
diff --git a/doubleml/irm/datasets/dgp_iivm_data.py b/doubleml/irm/datasets/dgp_iivm_data.py
new file mode 100644
index 000000000..e8c1130f2
--- /dev/null
+++ b/doubleml/irm/datasets/dgp_iivm_data.py
@@ -0,0 +1,102 @@
+import numpy as np
+import pandas as pd
+from scipy.linalg import toeplitz
+
+from doubleml.data import DoubleMLData
+from doubleml.utils._aliases import _get_array_alias, _get_data_frame_alias, _get_dml_data_alias
+
+_array_alias = _get_array_alias()
+_data_frame_alias = _get_data_frame_alias()
+_dml_data_alias = _get_dml_data_alias()
+
+
+def make_iivm_data(n_obs=500, dim_x=20, theta=1.0, alpha_x=0.2, return_type="DoubleMLData"):
+    """
+    Generates data from a interactive IV regression (IIVM) model.
+    The data generating process is defined as
+
+    .. math::
+
+        d_i &= 1\\left\\lbrace \\alpha_x Z + v_i > 0 \\right\\rbrace,
+
+        y_i &= \\theta d_i + x_i' \\beta + u_i,
+
+    with :math:`Z \\sim \\text{Bernoulli}(0.5)` and
+
+    .. math::
+
+        \\left(\\begin{matrix} u_i \\\\ v_i \\end{matrix} \\right) \\sim
+        \\mathcal{N}\\left(0, \\left(\\begin{matrix} 1 & 0.3 \\\\ 0.3 & 1 \\end{matrix} \\right) \\right).
+
+    The covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a matrix with entries
+    :math:`\\Sigma_{kj} = 0.5^{|j-k|}` and :math:`\\beta` is a `dim_x`-vector with entries
+    :math:`\\beta_j=\\frac{1}{j^2}`.
+
+    The data generating process is inspired by a process used in the simulation experiment of Farbmacher, Gruber and
+    Klaassen (2020).
+
+    Parameters
+    ----------
+    n_obs :
+        The number of observations to simulate.
+    dim_x :
+        The number of covariates.
+    theta :
+        The value of the causal parameter.
+    alpha_x :
+        The value of the parameter :math:`\\alpha_x`.
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d, z)``.
+
+    References
+    ----------
+    Farbmacher, H., Guber, R. and Klaaßen, S. (2020). Instrument Validity Tests with Causal Forests. MEA Discussion
+    Paper No. 13-2020. Available at SSRN: http://dx.doi.org/10.2139/ssrn.3619201.
+    """
+    # inspired by https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3619201
+    xx = np.random.multivariate_normal(
+        np.zeros(2),
+        np.array([[1.0, 0.3], [0.3, 1.0]]),
+        size=[
+            n_obs,
+        ],
+    )
+    u = xx[:, 0]
+    v = xx[:, 1]
+
+    cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
+
+    beta = [1 / (k**2) for k in range(1, dim_x + 1)]
+
+    z = np.random.binomial(
+        p=0.5,
+        n=1,
+        size=[
+            n_obs,
+        ],
+    )
+    d = 1.0 * (alpha_x * z + v > 0)
+    y = d * theta + np.dot(x, beta) + u
+
+    if return_type in _array_alias:
+        return x, y, d, z
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["y", "d", "z"])
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, "y", "d", x_cols, "z")
+    else:
+        raise ValueError("Invalid return_type.")
diff --git a/doubleml/irm/datasets/dgp_irm_data.py b/doubleml/irm/datasets/dgp_irm_data.py
new file mode 100644
index 000000000..973902ec6
--- /dev/null
+++ b/doubleml/irm/datasets/dgp_irm_data.py
@@ -0,0 +1,103 @@
+import numpy as np
+import pandas as pd
+from scipy.linalg import toeplitz
+
+from doubleml.data import DoubleMLData
+from doubleml.utils._aliases import _get_array_alias, _get_data_frame_alias, _get_dml_data_alias
+
+_array_alias = _get_array_alias()
+_data_frame_alias = _get_data_frame_alias()
+_dml_data_alias = _get_dml_data_alias()
+
+
+def make_irm_data(n_obs=500, dim_x=20, theta=0, R2_d=0.5, R2_y=0.5, return_type="DoubleMLData"):
+    """
+    Generates data from a interactive regression (IRM) model.
+    The data generating process is defined as
+
+    .. math::
+
+        d_i &= 1\\left\\lbrace \\frac{\\exp(c_d x_i' \\beta)}{1+\\exp(c_d x_i' \\beta)} > v_i \\right\\rbrace, & &v_i
+        \\sim \\mathcal{U}(0,1),
+
+        y_i &= \\theta d_i + c_y x_i' \\beta d_i + \\zeta_i, & &\\zeta_i \\sim \\mathcal{N}(0,1),
+
+    with covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a matrix with entries
+    :math:`\\Sigma_{kj} = 0.5^{|j-k|}`.
+    :math:`\\beta` is a `dim_x`-vector with entries :math:`\\beta_j=\\frac{1}{j^2}` and the constants :math:`c_y` and
+    :math:`c_d` are given by
+
+    .. math::
+
+        c_y = \\sqrt{\\frac{R_y^2}{(1-R_y^2) \\beta' \\Sigma \\beta}}, \\qquad c_d =
+        \\sqrt{\\frac{(\\pi^2 /3) R_d^2}{(1-R_d^2) \\beta' \\Sigma \\beta}}.
+
+    The data generating process is inspired by a process used in the simulation experiment (see Appendix P) of Belloni
+    et al. (2017).
+
+    Parameters
+    ----------
+    n_obs :
+        The number of observations to simulate.
+    dim_x :
+        The number of covariates.
+    theta :
+        The value of the causal parameter.
+    R2_d :
+        The value of the parameter :math:`R_d^2`.
+    R2_y :
+        The value of the parameter :math:`R_y^2`.
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d)``.
+
+    References
+    ----------
+    Belloni, A., Chernozhukov, V., Fernández‐Val, I. and Hansen, C. (2017). Program Evaluation and Causal Inference With
+    High‐Dimensional Data. Econometrica, 85: 233-298.
+    """
+    # inspired by https://onlinelibrary.wiley.com/doi/abs/10.3982/ECTA12723, see suplement
+    v = np.random.uniform(
+        size=[
+            n_obs,
+        ]
+    )
+    zeta = np.random.standard_normal(
+        size=[
+            n_obs,
+        ]
+    )
+
+    cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
+
+    beta = [1 / (k**2) for k in range(1, dim_x + 1)]
+    b_sigma_b = np.dot(np.dot(cov_mat, beta), beta)
+    c_y = np.sqrt(R2_y / ((1 - R2_y) * b_sigma_b))
+    c_d = np.sqrt(np.pi**2 / 3.0 * R2_d / ((1 - R2_d) * b_sigma_b))
+
+    xx = np.exp(np.dot(x, np.multiply(beta, c_d)))
+    d = 1.0 * ((xx / (1 + xx)) > v)
+
+    y = d * theta + d * np.dot(x, np.multiply(beta, c_y)) + zeta
+
+    if return_type in _array_alias:
+        return x, y, d
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + ["y", "d"])
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, "y", "d", x_cols)
+    else:
+        raise ValueError("Invalid return_type.")
diff --git a/doubleml/irm/datasets/dgp_irm_data_discrete_treatments.py b/doubleml/irm/datasets/dgp_irm_data_discrete_treatments.py
new file mode 100644
index 000000000..af621c9da
--- /dev/null
+++ b/doubleml/irm/datasets/dgp_irm_data_discrete_treatments.py
@@ -0,0 +1,164 @@
+import numpy as np
+from scipy.linalg import toeplitz
+
+
+def make_irm_data_discrete_treatments(n_obs=200, n_levels=3, linear=False, random_state=None, **kwargs):
+    """
+    Generates data from a interactive regression (IRM) model with multiple treatment levels (based on an
+    underlying continous treatment).
+
+    The data generating process is defined as follows (similar to the Monte Carlo simulation used
+    in Sant'Anna and Zhao (2020)).
+
+    Let :math:`X= (X_1, X_2, X_3, X_4, X_5)^T \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` corresponds
+    to the identity matrix.
+    Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
+    where
+
+    .. math::
+
+            \\tilde{Z}_1 &= \\exp(0.5 \\cdot X_1)
+
+            \\tilde{Z}_2 &= 10 + X_2/(1 + \\exp(X_1))
+
+            \\tilde{Z}_3 &= (0.6 + X_1 \\cdot X_3 / 25)^3
+
+            \\tilde{Z}_4 &= (20 + X_2 + X_4)^2
+
+            \\tilde{Z}_5 &= X_5.
+
+    A continuous treatment :math:`D_{\\text{cont}}` is generated as
+
+    .. math::
+
+        D_{\\text{cont}} = \\xi (-Z_1 + 0.5 Z_2 - 0.25 Z_3 - 0.1 Z_4) + \\varepsilon_D,
+
+    where :math:`\\varepsilon_D \\sim \\mathcal{N}(0,1)` and :math:`\\xi=0.3`. The corresponding treatment
+    effect is defined as
+
+    .. math::
+
+        \\theta (d) = 0.1 \\exp(d) + 10 \\sin(0.7 d) + 2 d - 0.2 d^2.
+
+    Based on the continous treatment, a discrete treatment :math:`D` is generated as with a baseline level of
+    :math:`D=0` and additional levels based on the quantiles of :math:`D_{\\text{cont}}`. The number of levels
+    is defined by :math:`n_{\\text{levels}}`. Each level is chosen to have the same probability of being selected.
+
+    The potential outcomes are defined as
+
+    .. math::
+
+            Y(0) &= 210 + 27.4 Z_1 + 13.7 (Z_2 + Z_3 + Z_4) + \\varepsilon_Y
+
+            Y(1) &= \\theta (D_{\\text{cont}}) 1\\{D_{\\text{cont}} > 0\\} + Y(0),
+
+    where :math:`\\varepsilon_Y \\sim \\mathcal{N}(0,5)`. Further, the observed outcome is defined as
+
+    .. math::
+
+        Y = Y(1) 1\\{D > 0\\} + Y(0) 1\\{D = 0\\}.
+
+    The data is returned as a dictionary with the entries ``x``, ``y``, ``d`` and ``oracle_values``.
+
+    Parameters
+    ----------
+    n_obs : int
+        The number of observations to simulate.
+        Default is ``200``.
+
+    n_levels : int
+        The number of treatment levels.
+        Default is ``3``.
+
+    linear : bool
+        Indicates whether the true underlying regression is linear.
+        Default is ``False``.
+
+    random_state : int
+        Random seed for reproducibility.
+        Default is ``42``.
+
+    Returns
+    -------
+    res_dict : dictionary
+       Dictionary with entries ``x``, ``y``, ``d`` and ``oracle_values``.
+       The oracle values contain the continuous treatment, the level bounds, the potential level, ITE
+       and the potential outcome without treatment.
+
+    """
+    if random_state is not None:
+        np.random.seed(random_state)
+    xi = kwargs.get("xi", 0.3)
+    c = kwargs.get("c", 0.0)
+    dim_x = kwargs.get("dim_x", 5)
+
+    if not isinstance(n_levels, int):
+        raise ValueError("n_levels must be an integer.")
+    if n_levels < 2:
+        raise ValueError("n_levels must be at least 2.")
+
+    # observed covariates
+    cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
+
+    def f_reg(w):
+        res = 210 + 27.4 * w[:, 0] + 13.7 * (w[:, 1] + w[:, 2] + w[:, 3])
+        return res
+
+    def f_treatment(w, xi):
+        res = xi * (-w[:, 0] + 0.5 * w[:, 1] - 0.25 * w[:, 2] - 0.1 * w[:, 3])
+        return res
+
+    def treatment_effect(d, scale=15):
+        return scale * (1 / (1 + np.exp(-d - 1.2 * np.cos(d)))) - 2
+
+    z_tilde_1 = np.exp(0.5 * x[:, 0])
+    z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
+    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
+    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
+
+    z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, x[:, 4:]))
+    z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
+
+    # error terms
+    var_eps_y = 5
+    eps_y = np.random.normal(loc=0, scale=np.sqrt(var_eps_y), size=n_obs)
+    var_eps_d = 1
+    eps_d = np.random.normal(loc=0, scale=np.sqrt(var_eps_d), size=n_obs)
+
+    if linear:
+        g = f_reg(x)
+        m = f_treatment(x, xi)
+    else:
+        assert not linear
+        g = f_reg(z)
+        m = f_treatment(z, xi)
+
+    cont_d = m + eps_d
+    level_bounds = np.quantile(cont_d, q=np.linspace(0, 1, n_levels + 1))
+    potential_level = sum([1.0 * (cont_d >= bound) for bound in level_bounds[1:-1]]) + 1
+    eta = np.random.uniform(0, 1, size=n_obs)
+    d = 1.0 * (eta >= 1 / n_levels) * potential_level
+
+    ite = treatment_effect(cont_d)
+    y0 = g + eps_y
+    # only treated for d > 0 compared to the baseline
+    y = ite * (d > 0) + y0
+
+    oracle_values = {
+        "cont_d": cont_d,
+        "level_bounds": level_bounds,
+        "potential_level": potential_level,
+        "ite": ite,
+        "y0": y0,
+    }
+
+    resul_dict = {"x": x, "y": y, "d": d, "oracle_values": oracle_values}
+
+    return resul_dict
diff --git a/doubleml/irm/datasets/dgp_ssm_data.py b/doubleml/irm/datasets/dgp_ssm_data.py
new file mode 100644
index 000000000..6a6a5bee8
--- /dev/null
+++ b/doubleml/irm/datasets/dgp_ssm_data.py
@@ -0,0 +1,102 @@
+import numpy as np
+import pandas as pd
+from scipy.linalg import toeplitz
+
+from doubleml.data import DoubleMLData
+from doubleml.utils._aliases import _get_array_alias, _get_data_frame_alias, _get_dml_data_alias
+
+_array_alias = _get_array_alias()
+_data_frame_alias = _get_data_frame_alias()
+_dml_data_alias = _get_dml_data_alias()
+
+
+def make_ssm_data(n_obs=8000, dim_x=100, theta=1, mar=True, return_type="DoubleMLData"):
+    """
+    Generates data from a sample selection model (SSM).
+    The data generating process is defined as
+
+    .. math::
+
+        y_i &= \\theta d_i + x_i' \\beta d_i + u_i,
+
+        s_i &= 1\\left\\lbrace d_i + \\gamma z_i + x_i' \\beta + v_i > 0 \\right\\rbrace,
+
+        d_i &= 1\\left\\lbrace x_i' \\beta + w_i > 0 \\right\\rbrace,
+
+    with Y being observed if :math:`s_i = 1` and covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma^2_x)`, where
+    :math:`\\Sigma^2_x` is a matrix with entries
+    :math:`\\Sigma_{kj} = 0.5^{|j-k|}`.
+    :math:`\\beta` is a `dim_x`-vector with entries :math:`\\beta_j=\\frac{0.4}{j^2}`
+    :math:`z_i \\sim \\mathcal{N}(0, 1)`,
+    :math:`(u_i,v_i) \\sim \\mathcal{N}(0, \\Sigma^2_{u,v})`,
+    :math:`w_i \\sim \\mathcal{N}(0, 1)`.
+
+
+    The data generating process is inspired by a process used in the simulation study (see Appendix E) of Bia,
+    Huber and Lafférs (2023).
+
+    Parameters
+    ----------
+    n_obs :
+        The number of observations to simulate.
+    dim_x :
+        The number of covariates.
+    theta :
+        The value of the causal parameter.
+    mar:
+        Boolean. Indicates whether missingness at random holds.
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d, z, s)``.
+
+    References
+    ----------
+    Michela Bia, Martin Huber & Lukáš Lafférs (2023) Double Machine Learning for Sample Selection Models,
+    Journal of Business & Economic Statistics, DOI: 10.1080/07350015.2023.2271071
+    """
+    if mar:
+        sigma = np.array([[1, 0], [0, 1]])
+        gamma = 0
+    else:
+        sigma = np.array([[1, 0.8], [0.8, 1]])
+        gamma = 1
+
+    e = np.random.multivariate_normal(mean=[0, 0], cov=sigma, size=n_obs).T
+
+    cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
+
+    beta = [0.4 / (k**2) for k in range(1, dim_x + 1)]
+
+    d = np.where(np.dot(x, beta) + np.random.randn(n_obs) > 0, 1, 0)
+    z = np.random.randn(n_obs)
+    s = np.where(np.dot(x, beta) + d + gamma * z + e[0] > 0, 1, 0)
+
+    y = np.dot(x, beta) + theta * d + e[1]
+    y[s == 0] = 0
+
+    if return_type in _array_alias:
+        return x, y, d, z, s
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        if mar:
+            data = pd.DataFrame(np.column_stack((x, y, d, s)), columns=x_cols + ["y", "d", "s"])
+        else:
+            data = pd.DataFrame(np.column_stack((x, y, d, z, s)), columns=x_cols + ["y", "d", "z", "s"])
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            if mar:
+                return DoubleMLData(data, "y", "d", x_cols, None, None, "s")
+            return DoubleMLData(data, "y", "d", x_cols, "z", None, "s")
+    else:
+        raise ValueError("Invalid return_type.")
diff --git a/doubleml/irm/iivm.py b/doubleml/irm/iivm.py
index a43c0a035..70c09cdee 100644
--- a/doubleml/irm/iivm.py
+++ b/doubleml/irm/iivm.py
@@ -80,7 +80,7 @@ class DoubleMLIIVM(LinearScoreMixin, DoubleML):
     --------
     >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml.datasets import make_iivm_data
+    >>> from doubleml.irm.datasets import make_iivm_data
     >>> from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
     >>> np.random.seed(3141)
     >>> ml_g = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py
index 9bf5ed35f..10f6377c7 100644
--- a/doubleml/irm/irm.py
+++ b/doubleml/irm/irm.py
@@ -84,7 +84,7 @@ class DoubleMLIRM(LinearScoreMixin, DoubleML):
     --------
     >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml.datasets import make_irm_data
+    >>> from doubleml.irm.datasets import make_irm_data
     >>> from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
     >>> np.random.seed(3141)
     >>> ml_g = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
diff --git a/doubleml/irm/lpq.py b/doubleml/irm/lpq.py
index c98e8fa2d..f46fb38c0 100644
--- a/doubleml/irm/lpq.py
+++ b/doubleml/irm/lpq.py
@@ -83,7 +83,7 @@ class DoubleMLLPQ(NonLinearScoreMixin, DoubleML):
     --------
     >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml.datasets import make_iivm_data
+    >>> from doubleml.irm.datasets import make_iivm_data
     >>> from sklearn.ensemble import RandomForestClassifier
     >>> np.random.seed(3141)
     >>> ml_g = RandomForestClassifier(n_estimators=100, max_features=20, max_depth=10, min_samples_leaf=2)
diff --git a/doubleml/irm/pq.py b/doubleml/irm/pq.py
index f64dc4719..d0425845b 100644
--- a/doubleml/irm/pq.py
+++ b/doubleml/irm/pq.py
@@ -90,7 +90,7 @@ class DoubleMLPQ(NonLinearScoreMixin, DoubleML):
     --------
     >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml.datasets import make_irm_data
+    >>> from doubleml.irm.datasets import make_irm_data
     >>> from sklearn.ensemble import RandomForestClassifier
     >>> np.random.seed(3141)
     >>> ml_g = RandomForestClassifier(n_estimators=100, max_features=20, max_depth=10, min_samples_leaf=2)
diff --git a/doubleml/irm/qte.py b/doubleml/irm/qte.py
index 68b91a9a7..a2c803a34 100644
--- a/doubleml/irm/qte.py
+++ b/doubleml/irm/qte.py
@@ -72,7 +72,7 @@ class DoubleMLQTE:
     --------
     >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml.datasets import make_irm_data
+    >>> from doubleml.irm.datasets import make_irm_data
     >>> from sklearn.ensemble import RandomForestClassifier
     >>> np.random.seed(3141)
     >>> ml_g = RandomForestClassifier(n_estimators=100, max_features=20, max_depth=10, min_samples_leaf=2)
@@ -499,7 +499,7 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None):
         --------
         >>> import numpy as np
         >>> import doubleml as dml
-        >>> from doubleml.datasets import make_plr_CCDDHNR2018
+        >>> from doubleml.plm.datasets import make_plr_CCDDHNR2018
         >>> from sklearn.ensemble import RandomForestRegressor
         >>> from sklearn.base import clone
         >>> np.random.seed(3141)
diff --git a/doubleml/irm/tests/conftest.py b/doubleml/irm/tests/conftest.py
index 1cf1d5250..0a3d4db8a 100644
--- a/doubleml/irm/tests/conftest.py
+++ b/doubleml/irm/tests/conftest.py
@@ -4,7 +4,7 @@
 from scipy.linalg import toeplitz
 from sklearn.datasets import make_spd_matrix
 
-from doubleml.datasets import make_iivm_data, make_irm_data
+from doubleml.irm.datasets import make_iivm_data, make_irm_data
 
 
 def _g(x):
diff --git a/doubleml/irm/tests/test_apo.py b/doubleml/irm/tests/test_apo.py
index df4ec284f..7558b7c17 100644
--- a/doubleml/irm/tests/test_apo.py
+++ b/doubleml/irm/tests/test_apo.py
@@ -8,7 +8,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
-from doubleml.datasets import make_irm_data, make_irm_data_discrete_treatments
+from doubleml.irm.datasets import make_irm_data, make_irm_data_discrete_treatments
 
 from ...tests._utils import draw_smpls
 from ._utils_apo_manual import boot_apo, fit_apo, fit_sensitivity_elements_apo
diff --git a/doubleml/irm/tests/test_apo_exceptions.py b/doubleml/irm/tests/test_apo_exceptions.py
index cfb6e93b5..e643efcaf 100644
--- a/doubleml/irm/tests/test_apo_exceptions.py
+++ b/doubleml/irm/tests/test_apo_exceptions.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import Lasso, LogisticRegression
 
 from doubleml import DoubleMLAPO, DoubleMLData
-from doubleml.datasets import make_iivm_data, make_irm_data, make_irm_data_discrete_treatments
+from doubleml.irm.datasets import make_iivm_data, make_irm_data, make_irm_data_discrete_treatments
 
 n = 100
 data_apo = make_irm_data_discrete_treatments(n_obs=n)
diff --git a/doubleml/irm/tests/test_apo_external_predictions.py b/doubleml/irm/tests/test_apo_external_predictions.py
index 2bbe50e81..246ef0217 100644
--- a/doubleml/irm/tests/test_apo_external_predictions.py
+++ b/doubleml/irm/tests/test_apo_external_predictions.py
@@ -6,7 +6,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 from doubleml import DoubleMLAPO, DoubleMLData
-from doubleml.datasets import make_irm_data_discrete_treatments
+from doubleml.irm.datasets import make_irm_data_discrete_treatments
 from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
 
 from ...tests._utils import draw_smpls
diff --git a/doubleml/irm/tests/test_apos.py b/doubleml/irm/tests/test_apos.py
index 746cb63c9..55a48ced8 100644
--- a/doubleml/irm/tests/test_apos.py
+++ b/doubleml/irm/tests/test_apos.py
@@ -6,7 +6,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
-from doubleml.datasets import make_irm_data, make_irm_data_discrete_treatments
+from doubleml.irm.datasets import make_irm_data, make_irm_data_discrete_treatments
 
 from ...tests._utils import confint_manual
 from ._utils_apos_manual import boot_apos, fit_apos
diff --git a/doubleml/irm/tests/test_apos_classfier.py b/doubleml/irm/tests/test_apos_classfier.py
index 06fdc3085..f9cfc10c8 100644
--- a/doubleml/irm/tests/test_apos_classfier.py
+++ b/doubleml/irm/tests/test_apos_classfier.py
@@ -6,7 +6,7 @@
 from sklearn.linear_model import LogisticRegression
 
 import doubleml as dml
-from doubleml.datasets import make_irm_data_discrete_treatments
+from doubleml.irm.datasets import make_irm_data_discrete_treatments
 
 from ...tests._utils import confint_manual
 from ._utils_apos_manual import boot_apos, fit_apos
diff --git a/doubleml/irm/tests/test_apos_exceptions.py b/doubleml/irm/tests/test_apos_exceptions.py
index c309b7e24..f1c9b3d6e 100644
--- a/doubleml/irm/tests/test_apos_exceptions.py
+++ b/doubleml/irm/tests/test_apos_exceptions.py
@@ -4,7 +4,7 @@
 from sklearn.linear_model import Lasso, LogisticRegression
 
 from doubleml import DoubleMLAPOS, DoubleMLData
-from doubleml.datasets import make_iivm_data, make_irm_data_discrete_treatments
+from doubleml.irm.datasets import make_iivm_data, make_irm_data_discrete_treatments
 
 n = 100
 data = make_irm_data_discrete_treatments(n_obs=n)
diff --git a/doubleml/irm/tests/test_apos_external_predictions.py b/doubleml/irm/tests/test_apos_external_predictions.py
index 9e97de072..ed4323adf 100644
--- a/doubleml/irm/tests/test_apos_external_predictions.py
+++ b/doubleml/irm/tests/test_apos_external_predictions.py
@@ -6,7 +6,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 from doubleml import DoubleMLAPOS, DoubleMLData
-from doubleml.datasets import make_irm_data_discrete_treatments
+from doubleml.irm.datasets import make_irm_data_discrete_treatments
 from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
 
 from ...tests._utils import draw_smpls
diff --git a/doubleml/irm/tests/test_apos_weighted_scores.py b/doubleml/irm/tests/test_apos_weighted_scores.py
index ea612decf..6d0a7f653 100644
--- a/doubleml/irm/tests/test_apos_weighted_scores.py
+++ b/doubleml/irm/tests/test_apos_weighted_scores.py
@@ -6,7 +6,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
-from doubleml.datasets import make_irm_data_discrete_treatments
+from doubleml.irm.datasets import make_irm_data_discrete_treatments
 
 
 @pytest.fixture(
diff --git a/doubleml/irm/tests/test_iivm_external_predictions.py b/doubleml/irm/tests/test_iivm_external_predictions.py
index 7f4626e95..d71d2bb51 100644
--- a/doubleml/irm/tests/test_iivm_external_predictions.py
+++ b/doubleml/irm/tests/test_iivm_external_predictions.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 from doubleml import DoubleMLData, DoubleMLIIVM
-from doubleml.datasets import make_iivm_data
+from doubleml.irm.datasets import make_iivm_data
 from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
 
 
diff --git a/doubleml/irm/tests/test_irm.py b/doubleml/irm/tests/test_irm.py
index f99f2253b..856c7f59d 100644
--- a/doubleml/irm/tests/test_irm.py
+++ b/doubleml/irm/tests/test_irm.py
@@ -8,7 +8,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
-from doubleml.datasets import make_irm_data
+from doubleml.irm.datasets import make_irm_data
 from doubleml.utils.resampling import DoubleMLResampling
 
 from ...tests._utils import draw_smpls
diff --git a/doubleml/irm/tests/test_irm_external_predictions.py b/doubleml/irm/tests/test_irm_external_predictions.py
index dabf6c0eb..5d0412d5f 100644
--- a/doubleml/irm/tests/test_irm_external_predictions.py
+++ b/doubleml/irm/tests/test_irm_external_predictions.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 from doubleml import DoubleMLData, DoubleMLIRM
-from doubleml.datasets import make_irm_data
+from doubleml.irm.datasets import make_irm_data
 from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
 
 
diff --git a/doubleml/irm/tests/test_lpq_external_predictions.py b/doubleml/irm/tests/test_lpq_external_predictions.py
index 66f2ece6e..48cb42f57 100644
--- a/doubleml/irm/tests/test_lpq_external_predictions.py
+++ b/doubleml/irm/tests/test_lpq_external_predictions.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import LogisticRegression
 
 from doubleml import DoubleMLData, DoubleMLLPQ
-from doubleml.datasets import make_iivm_data
+from doubleml.irm.datasets import make_iivm_data
 from doubleml.utils import DMLDummyClassifier
 
 from ...tests._utils import draw_smpls
diff --git a/doubleml/irm/tests/test_pq_external_predictions.py b/doubleml/irm/tests/test_pq_external_predictions.py
index 28f8ec660..9674c4648 100644
--- a/doubleml/irm/tests/test_pq_external_predictions.py
+++ b/doubleml/irm/tests/test_pq_external_predictions.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import LogisticRegression
 
 from doubleml import DoubleMLData, DoubleMLPQ
-from doubleml.datasets import make_irm_data
+from doubleml.irm.datasets import make_irm_data
 from doubleml.utils import DMLDummyClassifier
 
 from ...tests._utils import draw_smpls
diff --git a/doubleml/irm/tests/test_qte.py b/doubleml/irm/tests/test_qte.py
index 0557c85b8..7fcbeec23 100644
--- a/doubleml/irm/tests/test_qte.py
+++ b/doubleml/irm/tests/test_qte.py
@@ -8,7 +8,7 @@
 from sklearn.linear_model import LogisticRegression
 
 import doubleml as dml
-from doubleml.datasets import make_irm_data
+from doubleml.irm.datasets import make_irm_data
 
 from ...tests._utils import confint_manual, draw_smpls
 from ...utils._estimation import _default_kde
diff --git a/doubleml/irm/tests/test_qte_exceptions.py b/doubleml/irm/tests/test_qte_exceptions.py
index 9f94f5d4a..f4e951107 100644
--- a/doubleml/irm/tests/test_qte_exceptions.py
+++ b/doubleml/irm/tests/test_qte_exceptions.py
@@ -6,7 +6,7 @@
 
 from doubleml import DoubleMLData, DoubleMLQTE
 from doubleml.data.base_data import DoubleMLBaseData
-from doubleml.datasets import make_irm_data
+from doubleml.irm.datasets import make_irm_data
 
 np.random.seed(42)
 n = 100
diff --git a/doubleml/irm/tests/test_ssm_exceptions.py b/doubleml/irm/tests/test_ssm_exceptions.py
index 6ff276e32..50b082ec5 100644
--- a/doubleml/irm/tests/test_ssm_exceptions.py
+++ b/doubleml/irm/tests/test_ssm_exceptions.py
@@ -6,7 +6,7 @@
 
 from doubleml import DoubleMLSSM
 from doubleml.data.base_data import DoubleMLBaseData
-from doubleml.datasets import make_ssm_data
+from doubleml.irm.datasets import make_ssm_data
 
 np.random.seed(3141)
 n = 100
diff --git a/doubleml/plm/datasets/__init__.py b/doubleml/plm/datasets/__init__.py
new file mode 100644
index 000000000..f8928902e
--- /dev/null
+++ b/doubleml/plm/datasets/__init__.py
@@ -0,0 +1,20 @@
+"""
+The :mod:`doubleml.plm.datasets` module implements data generating processes for partially linear models.
+"""
+
+from .dgp_plr_CCDDHNR2018 import make_plr_CCDDHNR2018
+from .dgp_plr_turrell2018 import make_plr_turrell2018
+from .dgp_confounded_plr_data import make_confounded_plr_data
+from .dgp_pliv_CHS2015 import make_pliv_CHS2015
+from .dgp_pliv_multiway_cluster_CKMS2021 import make_pliv_multiway_cluster_CKMS2021
+from ._make_pliv_data import _make_pliv_data
+
+
+__all__ = [
+    "make_plr_CCDDHNR2018",
+    "make_plr_turrell2018",
+    "make_confounded_plr_data",
+    "make_pliv_CHS2015",
+    "make_pliv_multiway_cluster_CKMS2021",
+    "_make_pliv_data",
+]
diff --git a/doubleml/plm/datasets/_make_pliv_data.py b/doubleml/plm/datasets/_make_pliv_data.py
new file mode 100644
index 000000000..deb7cc539
--- /dev/null
+++ b/doubleml/plm/datasets/_make_pliv_data.py
@@ -0,0 +1,70 @@
+"""
+Helper function for partially linear IV data generation.
+"""
+
+import numpy as np
+import pandas as pd
+from sklearn.datasets import make_spd_matrix
+
+from doubleml.data import DoubleMLData
+from doubleml.utils._aliases import _get_array_alias, _get_data_frame_alias, _get_dml_data_alias
+
+_array_alias = _get_array_alias()
+_data_frame_alias = _get_data_frame_alias()
+_dml_data_alias = _get_dml_data_alias()
+
+
+def _g(x):
+    return np.power(np.sin(x), 2)
+
+
+def _m(x, nu=0.0, gamma=1.0):
+    return 0.5 / np.pi * (np.sinh(gamma)) / (np.cosh(gamma) - np.cos(x - nu))
+
+
+def _make_pliv_data(n_obs=100, dim_x=20, theta=0.5, gamma_z=0.4, return_type="DoubleMLData"):
+    b = [1 / k for k in range(1, dim_x + 1)]
+    sigma = make_spd_matrix(dim_x)
+
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        sigma,
+        size=[
+            n_obs,
+        ],
+    )
+    G = _g(np.dot(x, b))
+    # instrument
+    z = _m(np.dot(x, b)) + np.random.standard_normal(
+        size=[
+            n_obs,
+        ]
+    )
+    # treatment
+    M = _m(gamma_z * z + np.dot(x, b))
+    d = M + np.random.standard_normal(
+        size=[
+            n_obs,
+        ]
+    )
+    y = (
+        np.dot(theta, d)
+        + G
+        + np.random.standard_normal(
+            size=[
+                n_obs,
+            ]
+        )
+    )
+
+    if return_type in _array_alias:
+        return x, y, d, z
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["y", "d", "z"])
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, "y", "d", x_cols, "z")
+    else:
+        raise ValueError("Invalid return_type.")
diff --git a/doubleml/plm/datasets/dgp_confounded_plr_data.py b/doubleml/plm/datasets/dgp_confounded_plr_data.py
new file mode 100644
index 000000000..794e3db12
--- /dev/null
+++ b/doubleml/plm/datasets/dgp_confounded_plr_data.py
@@ -0,0 +1,171 @@
+import numpy as np
+from scipy.linalg import toeplitz
+from scipy.optimize import minimize_scalar
+
+
+def make_confounded_plr_data(n_obs=500, theta=5.0, cf_y=0.04, cf_d=0.04, **kwargs):
+    """
+    Generates counfounded data from an partially linear regression model.
+
+    The data generating process is defined as follows (similar to the Monte Carlo simulation used
+    in Sant'Anna and Zhao (2020)). Let :math:`X= (X_1, X_2, X_3, X_4, X_5)^T \\sim \\mathcal{N}(0, \\Sigma)`,
+    where  :math:`\\Sigma` is a matrix with entries
+    :math:`\\Sigma_{kj} = c^{|j-k|}`. The default value is  :math:`c = 0`, corresponding to the identity matrix.
+    Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`,
+    where
+
+    .. math::
+
+        \\tilde{Z}_1 &= \\exp(0.5 \\cdot X_1)
+
+        \\tilde{Z}_2 &= 10 + X_2/(1 + \\exp(X_1))
+
+        \\tilde{Z}_3 &= (0.6 + X_1 \\cdot X_3 / 25)^3
+
+        \\tilde{Z}_4 &= (20 + X_2 + X_4)^2.
+
+    Additionally, generate a confounder :math:`A \\sim \\mathcal{U}[-1, 1]`.
+    At first, define the treatment as
+
+    .. math::
+
+        D = -Z_1 + 0.5 \\cdot Z_2 - 0.25 \\cdot Z_3 - 0.1 \\cdot Z_4 + \\gamma_A \\cdot A + \\varepsilon_D
+
+    and with :math:`\\varepsilon \\sim \\mathcal{N}(0,1)`.
+    Since :math:`A` is independent of :math:`X`, the long and short form of the treatment regression are given as
+
+    .. math::
+
+        E[D|X,A] = -Z_1 + 0.5 \\cdot Z_2 - 0.25 \\cdot Z_3 - 0.1 \\cdot Z_4 + \\gamma_A \\cdot A
+
+        E[D|X] = -Z_1 + 0.5 \\cdot Z_2 - 0.25 \\cdot Z_3 - 0.1 \\cdot Z_4.
+
+    Further, generate the outcome of interest :math:`Y` as
+
+    .. math::
+
+        Y &= \\theta \\cdot D + g(Z) + \\beta_A \\cdot A + \\varepsilon
+
+        g(Z) &= 210 + 27.4 \\cdot Z_1 +13.7 \\cdot (Z_2 + Z_3 + Z_4)
+
+    where :math:`\\varepsilon \\sim \\mathcal{N}(0,5)`.
+    This implies an average treatment effect of :math:`\\theta`. Additionally, the long and short forms of
+    the conditional expectation take the following forms
+
+    .. math::
+
+        \\mathbb{E}[Y|D, X, A] &= \\theta \\cdot D + g(Z) + \\beta_A \\cdot A
+
+        \\mathbb{E}[Y|D, X] &= (\\theta + \\gamma_A\\beta_A \\frac{\\mathrm{Var}(A)}{\\mathrm{Var}(D)}) \\cdot D + g(Z).
+
+    Consequently, the strength of confounding is determined via :math:`\\gamma_A` and :math:`\\beta_A`.
+    Both are chosen to obtain the desired confounding of the outcome and Riesz Representer (in sample).
+
+    The observed data is given as :math:`W = (Y, D, X)`.
+    Further, orcale values of the confounder :math:`A`, the transformed covariated :math:`Z`, the effect :math:`\\theta`,
+    the coefficients :math:`\\gamma_a`, :math:`\\beta_a`, the long and short forms of the main regression and
+    the propensity score are returned in a dictionary.
+
+    Parameters
+    ----------
+    n_obs : int
+        The number of observations to simulate.
+        Default is ``500``.
+    theta : float or int
+        Average treatment effect.
+        Default is ``5.0``.
+    cf_y : float
+        Percentage of the residual variation of the outcome explained by latent/confounding variable.
+        Default is ``0.04``.
+    cf_d : float
+        Percentage gains in the variation of the Riesz Representer generated by latent/confounding variable.
+        Default is ``0.04``.
+
+    Returns
+    -------
+    res_dict : dictionary
+       Dictionary with entries ``x``, ``y``, ``d`` and ``oracle_values``.
+
+    References
+    ----------
+    Sant'Anna, P. H. and Zhao, J. (2020),
+    Doubly robust difference-in-differences estimators. Journal of Econometrics, 219(1), 101-122.
+    doi:`10.1016/j.jeconom.2020.06.003 <https://doi.org/10.1016/j.jeconom.2020.06.003>`_.
+    """
+    c = kwargs.get("c", 0.0)
+    dim_x = kwargs.get("dim_x", 4)
+
+    # observed covariates
+    cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
+
+    z_tilde_1 = np.exp(0.5 * x[:, 0])
+    z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
+    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
+    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
+
+    z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, x[:, 4:]))
+    z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
+
+    # error terms
+    var_eps_y = 5
+    eps_y = np.random.normal(loc=0, scale=np.sqrt(var_eps_y), size=n_obs)
+    var_eps_d = 1
+    eps_d = np.random.normal(loc=0, scale=np.sqrt(var_eps_d), size=n_obs)
+
+    # unobserved confounder
+    a_bounds = (-1, 1)
+    a = np.random.uniform(low=a_bounds[0], high=a_bounds[1], size=n_obs)
+    var_a = np.square(a_bounds[1] - a_bounds[0]) / 12
+
+    # get the required impact of the confounder on the propensity score
+    m_short = -z[:, 0] + 0.5 * z[:, 1] - 0.25 * z[:, 2] - 0.1 * z[:, 3]
+
+    def f_m(gamma_a):
+        rr_long = eps_d / var_eps_d
+        rr_short = (gamma_a * a + eps_d) / (gamma_a**2 * var_a + var_eps_d)
+        C2_D = (np.mean(np.square(rr_long)) - np.mean(np.square(rr_short))) / np.mean(np.square(rr_short))
+        return np.square(C2_D / (1 + C2_D) - cf_d)
+
+    gamma_a = minimize_scalar(f_m).x
+    m_long = m_short + gamma_a * a
+    d = m_long + eps_d
+
+    # short and long version of g
+    g_partial_reg = 210 + 27.4 * z[:, 0] + 13.7 * (z[:, 1] + z[:, 2] + z[:, 3])
+
+    var_d = np.var(d)
+
+    def f_g(beta_a):
+        g_diff = beta_a * (a - gamma_a * (var_a / var_d) * d)
+        y_diff = eps_y + g_diff
+        return np.square(np.mean(np.square(g_diff)) / np.mean(np.square(y_diff)) - cf_y)
+
+    beta_a = minimize_scalar(f_g).x
+
+    g_long = theta * d + g_partial_reg + beta_a * a
+    g_short = (theta + gamma_a * beta_a * var_a / var_d) * d + g_partial_reg
+
+    y = g_long + eps_y
+
+    oracle_values = {
+        "g_long": g_long,
+        "g_short": g_short,
+        "m_long": m_long,
+        "m_short": m_short,
+        "theta": theta,
+        "gamma_a": gamma_a,
+        "beta_a": beta_a,
+        "a": a,
+        "z": z,
+    }
+
+    res_dict = {"x": x, "y": y, "d": d, "oracle_values": oracle_values}
+
+    return res_dict
diff --git a/doubleml/plm/datasets/dgp_pliv_CHS2015.py b/doubleml/plm/datasets/dgp_pliv_CHS2015.py
new file mode 100644
index 000000000..7542803a2
--- /dev/null
+++ b/doubleml/plm/datasets/dgp_pliv_CHS2015.py
@@ -0,0 +1,108 @@
+import numpy as np
+import pandas as pd
+from scipy.linalg import toeplitz
+
+from doubleml.data import DoubleMLData
+from doubleml.utils._aliases import _array_alias, _data_frame_alias, _dml_data_alias
+
+
+def make_pliv_CHS2015(n_obs, alpha=1.0, dim_x=200, dim_z=150, return_type="DoubleMLData"):
+    """
+    Generates data from a partially linear IV regression model used in Chernozhukov, Hansen and Spindler (2015).
+    The data generating process is defined as
+
+    .. math::
+
+        z_i &= \\Pi x_i + \\zeta_i,
+
+        d_i &= x_i' \\gamma + z_i' \\delta + u_i,
+
+        y_i &= \\alpha d_i + x_i' \\beta + \\varepsilon_i,
+
+    with
+
+    .. math::
+
+        \\left(\\begin{matrix} \\varepsilon_i \\\\ u_i \\\\ \\zeta_i \\\\ x_i \\end{matrix} \\right) \\sim
+        \\mathcal{N}\\left(0, \\left(\\begin{matrix} 1 & 0.6 & 0 & 0 \\\\ 0.6 & 1 & 0 & 0 \\\\
+        0 & 0 & 0.25 I_{p_n^z} & 0 \\\\ 0 & 0 & 0 & \\Sigma \\end{matrix} \\right) \\right)
+
+    where  :math:`\\Sigma` is a :math:`p_n^x \\times p_n^x` matrix with entries
+    :math:`\\Sigma_{kj} = 0.5^{|j-k|}` and :math:`I_{p_n^z}` is the :math:`p_n^z \\times p_n^z` identity matrix.
+    :math:`\\beta = \\gamma` is a :math:`p_n^x`-vector with entries :math:`\\beta_j=\\frac{1}{j^2}`,
+    :math:`\\delta` is a :math:`p_n^z`-vector with entries :math:`\\delta_j=\\frac{1}{j^2}`
+    and :math:`\\Pi = (I_{p_n^z}, 0_{p_n^z \\times (p_n^x - p_n^z)})`.
+
+    Parameters
+    ----------
+    n_obs :
+        The number of observations to simulate.
+    alpha :
+        The value of the causal parameter.
+    dim_x :
+        The number of covariates.
+    dim_z :
+        The number of instruments.
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d, z)``.
+
+    References
+    ----------
+    Chernozhukov, V., Hansen, C. and Spindler, M. (2015), Post-Selection and Post-Regularization Inference in Linear
+    Models with Many Controls and Instruments. American Economic Review: Papers and Proceedings, 105 (5): 486-90.
+    """
+    assert dim_x >= dim_z
+    # see https://assets.aeaweb.org/asset-server/articles-attachments/aer/app/10505/P2015_1022_app.pdf
+    xx = np.random.multivariate_normal(
+        np.zeros(2),
+        np.array([[1.0, 0.6], [0.6, 1.0]]),
+        size=[
+            n_obs,
+        ],
+    )
+    epsilon = xx[:, 0]
+    u = xx[:, 1]
+
+    sigma = toeplitz([np.power(0.5, k) for k in range(0, dim_x)])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        sigma,
+        size=[
+            n_obs,
+        ],
+    )
+
+    I_z = np.eye(dim_z)
+    xi = np.random.multivariate_normal(
+        np.zeros(dim_z),
+        0.25 * I_z,
+        size=[
+            n_obs,
+        ],
+    )
+
+    beta = [1 / (k**2) for k in range(1, dim_x + 1)]
+    gamma = beta
+    delta = [1 / (k**2) for k in range(1, dim_z + 1)]
+    Pi = np.hstack((I_z, np.zeros((dim_z, dim_x - dim_z))))
+
+    z = np.dot(x, np.transpose(Pi)) + xi
+    d = np.dot(x, gamma) + np.dot(z, delta) + u
+    y = alpha * d + np.dot(x, beta) + epsilon
+
+    if return_type in _array_alias:
+        return x, y, d, z
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        z_cols = [f"Z{i + 1}" for i in np.arange(dim_z)]
+        data = pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["y", "d"] + z_cols)
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, "y", "d", x_cols, z_cols)
+    else:
+        raise ValueError("Invalid return_type.")
diff --git a/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py b/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py
new file mode 100644
index 000000000..df2b4cbe5
--- /dev/null
+++ b/doubleml/plm/datasets/dgp_pliv_multiway_cluster_CKMS2021.py
@@ -0,0 +1,199 @@
+import numpy as np
+import pandas as pd
+from scipy.linalg import toeplitz
+
+from doubleml.data import DoubleMLClusterData
+from doubleml.utils._aliases import _array_alias, _data_frame_alias, _dml_cluster_data_alias
+
+
+def make_pliv_multiway_cluster_CKMS2021(N=25, M=25, dim_X=100, theta=1.0, return_type="DoubleMLClusterData", **kwargs):
+    """
+    Generates data from a partially linear IV regression model with multiway cluster sample used in Chiang et al.
+    (2021). The data generating process is defined as
+
+    .. math::
+
+        Z_{ij} &= X_{ij}' \\xi_0 + V_{ij},
+
+        D_{ij} &= Z_{ij}' \\pi_{10} + X_{ij}' \\pi_{20} + v_{ij},
+
+        Y_{ij} &= D_{ij} \\theta + X_{ij}' \\zeta_0 + \\varepsilon_{ij},
+
+    with
+
+    .. math::
+
+        X_{ij} &= (1 - \\omega_1^X - \\omega_2^X) \\alpha_{ij}^X
+        + \\omega_1^X \\alpha_{i}^X + \\omega_2^X \\alpha_{j}^X,
+
+        \\varepsilon_{ij} &= (1 - \\omega_1^\\varepsilon - \\omega_2^\\varepsilon) \\alpha_{ij}^\\varepsilon
+        + \\omega_1^\\varepsilon \\alpha_{i}^\\varepsilon + \\omega_2^\\varepsilon \\alpha_{j}^\\varepsilon,
+
+        v_{ij} &= (1 - \\omega_1^v - \\omega_2^v) \\alpha_{ij}^v
+        + \\omega_1^v \\alpha_{i}^v + \\omega_2^v \\alpha_{j}^v,
+
+        V_{ij} &= (1 - \\omega_1^V - \\omega_2^V) \\alpha_{ij}^V
+        + \\omega_1^V \\alpha_{i}^V + \\omega_2^V \\alpha_{j}^V,
+
+    and :math:`\\alpha_{ij}^X, \\alpha_{i}^X, \\alpha_{j}^X \\sim \\mathcal{N}(0, \\Sigma)`
+    where  :math:`\\Sigma` is a :math:`p_x \\times p_x` matrix with entries
+    :math:`\\Sigma_{kj} = s_X^{|j-k|}`.
+    Further
+
+    .. math::
+
+        \\left(\\begin{matrix} \\alpha_{ij}^\\varepsilon \\\\ \\alpha_{ij}^v \\end{matrix}\\right),
+        \\left(\\begin{matrix} \\alpha_{i}^\\varepsilon \\\\ \\alpha_{i}^v \\end{matrix}\\right),
+        \\left(\\begin{matrix} \\alpha_{j}^\\varepsilon \\\\ \\alpha_{j}^v \\end{matrix}\\right)
+        \\sim \\mathcal{N}\\left(0, \\left(\\begin{matrix} 1 & s_{\\varepsilon v} \\\\
+        s_{\\varepsilon v} & 1 \\end{matrix} \\right) \\right)
+
+
+    and :math:`\\alpha_{ij}^V, \\alpha_{i}^V, \\alpha_{j}^V \\sim \\mathcal{N}(0, 1)`.
+
+    Parameters
+    ----------
+    N :
+        The number of observations (first dimension).
+    M :
+        The number of observations (second dimension).
+    dim_X :
+        The number of covariates.
+    theta :
+        The value of the causal parameter.
+    return_type :
+        If ``'DoubleMLClusterData'`` or ``DoubleMLClusterData``, returns a ``DoubleMLClusterData`` object where
+        ``DoubleMLClusterData.data`` is a ``pd.DataFrame``.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s
+        ``(x, y, d, cluster_vars, z)``.
+    **kwargs
+        Additional keyword arguments to set non-default values for the parameters
+        :math:`\\pi_{10}=1.0`, :math:`\\omega_X = \\omega_{\\varepsilon} = \\omega_V = \\omega_v = (0.25, 0.25)`,
+        :math:`s_X = s_{\\varepsilon v} = 0.25`,
+        or the :math:`p_x`-vectors :math:`\\zeta_0 = \\pi_{20} = \\xi_0` with default entries
+        :math:`(\\zeta_{0})_j = 0.5^j`.
+
+    References
+    ----------
+    Chiang, H. D., Kato K., Ma, Y. and Sasaki, Y. (2021), Multiway Cluster Robust Double/Debiased Machine Learning,
+    Journal of Business & Economic Statistics,
+    doi: `10.1080/07350015.2021.1895815 <https://doi.org/10.1080/07350015.2021.1895815>`_,
+    arXiv:`1909.03489 <https://arxiv.org/abs/1909.03489>`_.
+    """
+    # additional parameters specifiable via kwargs
+    pi_10 = kwargs.get("pi_10", 1.0)
+
+    xx = np.arange(1, dim_X + 1)
+    zeta_0 = kwargs.get("zeta_0", np.power(0.5, xx))
+    pi_20 = kwargs.get("pi_20", np.power(0.5, xx))
+    xi_0 = kwargs.get("xi_0", np.power(0.5, xx))
+
+    omega_X = kwargs.get("omega_X", np.array([0.25, 0.25]))
+    omega_epsilon = kwargs.get("omega_epsilon", np.array([0.25, 0.25]))
+    omega_v = kwargs.get("omega_v", np.array([0.25, 0.25]))
+    omega_V = kwargs.get("omega_V", np.array([0.25, 0.25]))
+
+    s_X = kwargs.get("s_X", 0.25)
+    s_epsilon_v = kwargs.get("s_epsilon_v", 0.25)
+
+    # use np.tile() and np.repeat() for repeating vectors in different styles, i.e.,
+    # np.tile([v1, v2, v3], 2) [v1, v2, v3, v1, v2, v3]
+    # np.repeat([v1, v2, v3], 2) [v1, v1, v2, v2, v3, v3]
+
+    alpha_V = np.random.normal(size=(N * M))
+    alpha_V_i = np.repeat(np.random.normal(size=N), M)
+    alpha_V_j = np.tile(np.random.normal(size=M), N)
+
+    cov_mat = np.array([[1, s_epsilon_v], [s_epsilon_v, 1]])
+    alpha_eps_v = np.random.multivariate_normal(
+        np.zeros(2),
+        cov_mat,
+        size=[
+            N * M,
+        ],
+    )
+    alpha_eps = alpha_eps_v[:, 0]
+    alpha_v = alpha_eps_v[:, 1]
+
+    alpha_eps_v_i = np.random.multivariate_normal(
+        np.zeros(2),
+        cov_mat,
+        size=[
+            N,
+        ],
+    )
+    alpha_eps_i = np.repeat(alpha_eps_v_i[:, 0], M)
+    alpha_v_i = np.repeat(alpha_eps_v_i[:, 1], M)
+
+    alpha_eps_v_j = np.random.multivariate_normal(
+        np.zeros(2),
+        cov_mat,
+        size=[
+            M,
+        ],
+    )
+    alpha_eps_j = np.tile(alpha_eps_v_j[:, 0], N)
+    alpha_v_j = np.tile(alpha_eps_v_j[:, 1], N)
+
+    cov_mat = toeplitz([np.power(s_X, k) for k in range(dim_X)])
+    alpha_X = np.random.multivariate_normal(
+        np.zeros(dim_X),
+        cov_mat,
+        size=[
+            N * M,
+        ],
+    )
+    alpha_X_i = np.repeat(
+        np.random.multivariate_normal(
+            np.zeros(dim_X),
+            cov_mat,
+            size=[
+                N,
+            ],
+        ),
+        M,
+        axis=0,
+    )
+    alpha_X_j = np.tile(
+        np.random.multivariate_normal(
+            np.zeros(dim_X),
+            cov_mat,
+            size=[
+                M,
+            ],
+        ),
+        (N, 1),
+    )
+
+    # generate variables
+    x = (1 - omega_X[0] - omega_X[1]) * alpha_X + omega_X[0] * alpha_X_i + omega_X[1] * alpha_X_j
+
+    eps = (
+        (1 - omega_epsilon[0] - omega_epsilon[1]) * alpha_eps + omega_epsilon[0] * alpha_eps_i + omega_epsilon[1] * alpha_eps_j
+    )
+
+    v = (1 - omega_v[0] - omega_v[1]) * alpha_v + omega_v[0] * alpha_v_i + omega_v[1] * alpha_v_j
+
+    V = (1 - omega_V[0] - omega_V[1]) * alpha_V + omega_V[0] * alpha_V_i + omega_V[1] * alpha_V_j
+
+    z = np.matmul(x, xi_0) + V
+    d = z * pi_10 + np.matmul(x, pi_20) + v
+    y = d * theta + np.matmul(x, zeta_0) + eps
+
+    cluster_cols = ["cluster_var_i", "cluster_var_j"]
+    cluster_vars = pd.MultiIndex.from_product([range(N), range(M)]).to_frame(name=cluster_cols).reset_index(drop=True)
+
+    if return_type in _array_alias:
+        return x, y, d, cluster_vars.values, z
+    elif return_type in _data_frame_alias + _dml_cluster_data_alias:
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_X)]
+        data = pd.concat((cluster_vars, pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["Y", "D", "Z"])), axis=1)
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLClusterData(data, "Y", "D", cluster_cols, x_cols, "Z")
+    else:
+        raise ValueError("Invalid return_type.")
diff --git a/doubleml/plm/datasets/dgp_plr_CCDDHNR2018.py b/doubleml/plm/datasets/dgp_plr_CCDDHNR2018.py
new file mode 100644
index 000000000..7d6fdf9e8
--- /dev/null
+++ b/doubleml/plm/datasets/dgp_plr_CCDDHNR2018.py
@@ -0,0 +1,108 @@
+import numpy as np
+import pandas as pd
+from scipy.linalg import toeplitz
+
+from doubleml.data import DoubleMLData
+from doubleml.utils._aliases import _get_array_alias, _get_data_frame_alias, _get_dml_data_alias
+
+_array_alias = _get_array_alias()
+_data_frame_alias = _get_data_frame_alias()
+_dml_data_alias = _get_dml_data_alias()
+
+
+def make_plr_CCDDHNR2018(n_obs=500, dim_x=20, alpha=0.5, return_type="DoubleMLData", **kwargs):
+    """
+    Generates data from a partially linear regression model used in Chernozhukov et al. (2018) for Figure 1.
+    The data generating process is defined as
+
+    .. math::
+
+        d_i &= m_0(x_i) + s_1 v_i, & &v_i \\sim \\mathcal{N}(0,1),
+
+        y_i &= \\alpha d_i + g_0(x_i) + s_2 \\zeta_i, & &\\zeta_i \\sim \\mathcal{N}(0,1),
+
+
+    with covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a matrix with entries
+    :math:`\\Sigma_{kj} = 0.7^{|j-k|}`.
+    The nuisance functions are given by
+
+    .. math::
+
+        m_0(x_i) &= a_0 x_{i,1} + a_1 \\frac{\\exp(x_{i,3})}{1+\\exp(x_{i,3})},
+
+        g_0(x_i) &= b_0 \\frac{\\exp(x_{i,1})}{1+\\exp(x_{i,1})} + b_1 x_{i,3}.
+
+    Parameters
+    ----------
+    n_obs :
+        The number of observations to simulate.
+    dim_x :
+        The number of covariates.
+    alpha :
+        The value of the causal parameter.
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d)``.
+    **kwargs
+        Additional keyword arguments to set non-default values for the parameters
+        :math:`a_0=1`, :math:`a_1=0.25`, :math:`s_1=1`, :math:`b_0=1`, :math:`b_1=0.25` or :math:`s_2=1`.
+
+    References
+    ----------
+    Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W. and Robins, J. (2018),
+    Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
+    doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
+    """
+    a_0 = kwargs.get("a_0", 1.0)
+    a_1 = kwargs.get("a_1", 0.25)
+    s_1 = kwargs.get("s_1", 1.0)
+
+    b_0 = kwargs.get("b_0", 1.0)
+    b_1 = kwargs.get("b_1", 0.25)
+    s_2 = kwargs.get("s_2", 1.0)
+
+    cov_mat = toeplitz([np.power(0.7, k) for k in range(dim_x)])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
+
+    d = (
+        a_0 * x[:, 0]
+        + a_1 * np.divide(np.exp(x[:, 2]), 1 + np.exp(x[:, 2]))
+        + s_1
+        * np.random.standard_normal(
+            size=[
+                n_obs,
+            ]
+        )
+    )
+    y = (
+        alpha * d
+        + b_0 * np.divide(np.exp(x[:, 0]), 1 + np.exp(x[:, 0]))
+        + b_1 * x[:, 2]
+        + s_2
+        * np.random.standard_normal(
+            size=[
+                n_obs,
+            ]
+        )
+    )
+
+    if return_type in _array_alias:
+        return x, y, d
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + ["y", "d"])
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, "y", "d", x_cols)
+    else:
+        raise ValueError("Invalid return_type.")
diff --git a/doubleml/plm/datasets/dgp_plr_turrell2018.py b/doubleml/plm/datasets/dgp_plr_turrell2018.py
new file mode 100644
index 000000000..5cfefdd8f
--- /dev/null
+++ b/doubleml/plm/datasets/dgp_plr_turrell2018.py
@@ -0,0 +1,107 @@
+import numpy as np
+import pandas as pd
+from sklearn.datasets import make_spd_matrix
+
+from doubleml.data import DoubleMLData
+from doubleml.utils._aliases import _get_array_alias, _get_data_frame_alias, _get_dml_data_alias
+
+_array_alias = _get_array_alias()
+_data_frame_alias = _get_data_frame_alias()
+_dml_data_alias = _get_dml_data_alias()
+
+
+def _g(x):
+    return np.power(np.sin(x), 2)
+
+
+def _m(x, nu=0.0, gamma=1.0):
+    return 0.5 / np.pi * (np.sinh(gamma)) / (np.cosh(gamma) - np.cos(x - nu))
+
+
+def make_plr_turrell2018(n_obs=100, dim_x=20, theta=0.5, return_type="DoubleMLData", **kwargs):
+    """
+    Generates data from a partially linear regression model used in a blog article by Turrell (2018).
+    The data generating process is defined as
+
+    .. math::
+
+        d_i &= m_0(x_i' b) + v_i, & &v_i \\sim \\mathcal{N}(0,1),
+
+        y_i &= \\theta d_i + g_0(x_i' b) + u_i, & &u_i \\sim \\mathcal{N}(0,1),
+
+
+    with covariates :math:`x_i \\sim \\mathcal{N}(0, \\Sigma)`, where  :math:`\\Sigma` is a random symmetric,
+    positive-definite matrix generated with :py:meth:`sklearn.datasets.make_spd_matrix`.
+    :math:`b` is a vector with entries :math:`b_j=\\frac{1}{j}` and the nuisance functions are given by
+
+    .. math::
+
+        m_0(x_i) &= \\frac{1}{2 \\pi} \\frac{\\sinh(\\gamma)}{\\cosh(\\gamma) - \\cos(x_i-\\nu)},
+
+        g_0(x_i) &= \\sin(x_i)^2.
+
+    Parameters
+    ----------
+    n_obs :
+        The number of observations to simulate.
+    dim_x :
+        The number of covariates.
+    theta :
+        The value of the causal parameter.
+    return_type :
+        If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
+
+        If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
+
+        If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d)``.
+    **kwargs
+        Additional keyword arguments to set non-default values for the parameters
+        :math:`\\nu=0`, or :math:`\\gamma=1`.
+
+    References
+    ----------
+    Turrell, A. (2018), Econometrics in Python part I - Double machine learning, Markov Wanderer: A blog on economics,
+    science, coding and data. `https://aeturrell.com/blog/posts/econometrics-in-python-parti-ml/
+    <https://aeturrell.com/blog/posts/econometrics-in-python-parti-ml/>`_.
+    """
+    nu = kwargs.get("nu", 0.0)
+    gamma = kwargs.get("gamma", 1.0)
+
+    b = [1 / k for k in range(1, dim_x + 1)]
+    sigma = make_spd_matrix(dim_x)
+
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        sigma,
+        size=[
+            n_obs,
+        ],
+    )
+    G = _g(np.dot(x, b))
+    M = _m(np.dot(x, b), nu=nu, gamma=gamma)
+    d = M + np.random.standard_normal(
+        size=[
+            n_obs,
+        ]
+    )
+    y = (
+        np.dot(theta, d)
+        + G
+        + np.random.standard_normal(
+            size=[
+                n_obs,
+            ]
+        )
+    )
+
+    if return_type in _array_alias:
+        return x, y, d
+    elif return_type in _data_frame_alias + _dml_data_alias:
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + ["y", "d"])
+        if return_type in _data_frame_alias:
+            return data
+        else:
+            return DoubleMLData(data, "y", "d", x_cols)
+    else:
+        raise ValueError("Invalid return_type.")
diff --git a/doubleml/plm/pliv.py b/doubleml/plm/pliv.py
index ba0226889..52cb796d3 100644
--- a/doubleml/plm/pliv.py
+++ b/doubleml/plm/pliv.py
@@ -62,7 +62,7 @@ class DoubleMLPLIV(LinearScoreMixin, DoubleML):
     --------
     >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml.datasets import make_pliv_CHS2015
+    >>> from doubleml.plm.datasets import make_pliv_CHS2015
     >>> from sklearn.ensemble import RandomForestRegressor
     >>> from sklearn.base import clone
     >>> np.random.seed(3141)
diff --git a/doubleml/plm/plr.py b/doubleml/plm/plr.py
index a81bac48c..4a57dfcb0 100644
--- a/doubleml/plm/plr.py
+++ b/doubleml/plm/plr.py
@@ -60,7 +60,7 @@ class DoubleMLPLR(LinearScoreMixin, DoubleML):
     --------
     >>> import numpy as np
     >>> import doubleml as dml
-    >>> from doubleml.datasets import make_plr_CCDDHNR2018
+    >>> from doubleml.plm.datasets import make_plr_CCDDHNR2018
     >>> from sklearn.ensemble import RandomForestRegressor
     >>> from sklearn.base import clone
     >>> np.random.seed(3141)
diff --git a/doubleml/plm/tests/conftest.py b/doubleml/plm/tests/conftest.py
index 497d6fc9d..cfde0f414 100644
--- a/doubleml/plm/tests/conftest.py
+++ b/doubleml/plm/tests/conftest.py
@@ -4,7 +4,7 @@
 from scipy.linalg import toeplitz
 from sklearn.datasets import make_spd_matrix
 
-from doubleml.datasets import make_pliv_CHS2015, make_plr_turrell2018
+from doubleml.plm.datasets import make_pliv_CHS2015, make_plr_turrell2018
 
 
 def _g(x):
diff --git a/doubleml/plm/tests/test_pliv_external_predictions.py b/doubleml/plm/tests/test_pliv_external_predictions.py
index bc8a1e8a4..55c362abe 100644
--- a/doubleml/plm/tests/test_pliv_external_predictions.py
+++ b/doubleml/plm/tests/test_pliv_external_predictions.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import LinearRegression
 
 from doubleml import DoubleMLData, DoubleMLPLIV
-from doubleml.datasets import make_pliv_CHS2015
+from doubleml.plm.datasets import make_pliv_CHS2015
 from doubleml.utils import DMLDummyRegressor
 
 
diff --git a/doubleml/plm/tests/test_plr.py b/doubleml/plm/tests/test_plr.py
index 79f21f849..65f5ad834 100644
--- a/doubleml/plm/tests/test_plr.py
+++ b/doubleml/plm/tests/test_plr.py
@@ -304,7 +304,7 @@ def test_dml_plr_cate_gate(score, cov_type):
 
     # collect data
     np.random.seed(42)
-    obj_dml_data = dml.datasets.make_plr_CCDDHNR2018(n_obs=n)
+    obj_dml_data = dml.plm.datasets.make_plr_CCDDHNR2018(n_obs=n)
     ml_l = LinearRegression()
     ml_g = LinearRegression()
     ml_m = LinearRegression()
diff --git a/doubleml/plm/tests/test_plr_external_predictions.py b/doubleml/plm/tests/test_plr_external_predictions.py
index 47644555d..160052b14 100644
--- a/doubleml/plm/tests/test_plr_external_predictions.py
+++ b/doubleml/plm/tests/test_plr_external_predictions.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import LinearRegression
 
 from doubleml import DoubleMLData, DoubleMLPLR
-from doubleml.datasets import make_plr_CCDDHNR2018
+from doubleml.plm.datasets import make_plr_CCDDHNR2018
 from doubleml.utils import DMLDummyRegressor
 
 
diff --git a/doubleml/tests/conftest.py b/doubleml/tests/conftest.py
index bf53d788f..6abea18c1 100644
--- a/doubleml/tests/conftest.py
+++ b/doubleml/tests/conftest.py
@@ -4,7 +4,7 @@
 from sklearn.datasets import make_classification, make_regression, make_spd_matrix
 
 from doubleml import DoubleMLData
-from doubleml.datasets import make_pliv_CHS2015, make_plr_turrell2018
+from doubleml.plm.datasets import make_pliv_CHS2015, make_plr_turrell2018
 
 
 def _g(x):
diff --git a/doubleml/tests/test_datasets.py b/doubleml/tests/test_datasets.py
index 67f612e80..8f1c4f033 100644
--- a/doubleml/tests/test_datasets.py
+++ b/doubleml/tests/test_datasets.py
@@ -3,21 +3,22 @@
 import pytest
 
 from doubleml import DoubleMLClusterData, DoubleMLData
-from doubleml.datasets import (
-    _make_pliv_data,
-    fetch_401K,
-    fetch_bonus,
+from doubleml.datasets import fetch_401K, fetch_bonus
+from doubleml.irm.datasets import (
     make_confounded_irm_data,
-    make_confounded_plr_data,
     make_heterogeneous_data,
     make_iivm_data,
     make_irm_data,
     make_irm_data_discrete_treatments,
+    make_ssm_data,
+)
+from doubleml.plm.datasets import (
+    _make_pliv_data,
+    make_confounded_plr_data,
     make_pliv_CHS2015,
     make_pliv_multiway_cluster_CKMS2021,
     make_plr_CCDDHNR2018,
     make_plr_turrell2018,
-    make_ssm_data,
 )
 
 msg_inv_return_type = "Invalid return_type."
diff --git a/doubleml/tests/test_evaluate_learner.py b/doubleml/tests/test_evaluate_learner.py
index dbad9b620..2c5d3f9a6 100644
--- a/doubleml/tests/test_evaluate_learner.py
+++ b/doubleml/tests/test_evaluate_learner.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
-from doubleml.datasets import make_irm_data
+from doubleml.irm.datasets import make_irm_data
 from doubleml.utils._estimation import _logloss
 
 np.random.seed(3141)
diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py
index a4655bb9d..d8fe4e7cb 100644
--- a/doubleml/tests/test_exceptions.py
+++ b/doubleml/tests/test_exceptions.py
@@ -21,13 +21,8 @@
     DoubleMLPQ,
     DoubleMLQTE,
 )
-from doubleml.datasets import (
-    make_iivm_data,
-    make_irm_data,
-    make_pliv_CHS2015,
-    make_pliv_multiway_cluster_CKMS2021,
-    make_plr_CCDDHNR2018,
-)
+from doubleml.irm.datasets import make_iivm_data, make_irm_data
+from doubleml.plm.datasets import make_pliv_CHS2015, make_pliv_multiway_cluster_CKMS2021, make_plr_CCDDHNR2018
 from doubleml.did.datasets import make_did_SZ2020
 
 from ._utils import DummyDataClass
diff --git a/doubleml/tests/test_exceptions_ext_preds.py b/doubleml/tests/test_exceptions_ext_preds.py
index 3f6002825..a65b6ebb7 100644
--- a/doubleml/tests/test_exceptions_ext_preds.py
+++ b/doubleml/tests/test_exceptions_ext_preds.py
@@ -2,7 +2,7 @@
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 
 from doubleml import DoubleMLCVAR, DoubleMLData, DoubleMLIRM, DoubleMLQTE
-from doubleml.datasets import make_irm_data
+from doubleml.irm.datasets import make_irm_data
 from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
 
 df_irm = make_irm_data(n_obs=10, dim_x=2, theta=0.5, return_type="DataFrame")
diff --git a/doubleml/tests/test_framework.py b/doubleml/tests/test_framework.py
index 24810b680..44dabb713 100644
--- a/doubleml/tests/test_framework.py
+++ b/doubleml/tests/test_framework.py
@@ -3,7 +3,7 @@
 import pytest
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
-from doubleml.datasets import make_irm_data
+from doubleml.irm.datasets import make_irm_data
 from doubleml.double_ml_framework import DoubleMLFramework, concat
 from doubleml.irm.irm import DoubleMLIRM
 
diff --git a/doubleml/tests/test_model_defaults.py b/doubleml/tests/test_model_defaults.py
index f55a555cc..8417468ad 100644
--- a/doubleml/tests/test_model_defaults.py
+++ b/doubleml/tests/test_model_defaults.py
@@ -4,13 +4,8 @@
 from sklearn.linear_model import Lasso, LogisticRegression
 
 import doubleml as dml
-from doubleml.datasets import (
-    make_iivm_data,
-    make_irm_data,
-    make_pliv_CHS2015,
-    make_plr_CCDDHNR2018,
-    make_ssm_data,
-)
+from doubleml.irm.datasets import make_iivm_data, make_irm_data, make_ssm_data
+from doubleml.plm.datasets import make_pliv_CHS2015, make_plr_CCDDHNR2018
 from doubleml.did.datasets import make_did_SZ2020
 
 np.random.seed(3141)
diff --git a/doubleml/tests/test_multiway_cluster.py b/doubleml/tests/test_multiway_cluster.py
index b064024f0..10e5d4457 100644
--- a/doubleml/tests/test_multiway_cluster.py
+++ b/doubleml/tests/test_multiway_cluster.py
@@ -6,7 +6,7 @@
 from sklearn.linear_model import Lasso, LinearRegression
 
 import doubleml as dml
-from doubleml.datasets import make_pliv_multiway_cluster_CKMS2021
+from doubleml.plm.datasets import make_pliv_multiway_cluster_CKMS2021
 
 from ..plm.tests._utils_pliv_manual import compute_pliv_residuals, fit_pliv
 from ._utils import _clone
diff --git a/doubleml/tests/test_nonlinear_cluster.py b/doubleml/tests/test_nonlinear_cluster.py
index f84f3e2e9..71998941c 100644
--- a/doubleml/tests/test_nonlinear_cluster.py
+++ b/doubleml/tests/test_nonlinear_cluster.py
@@ -7,7 +7,8 @@
 from sklearn.linear_model import Lasso, LinearRegression
 
 import doubleml as dml
-from doubleml.datasets import DoubleMLClusterData, make_pliv_multiway_cluster_CKMS2021
+from doubleml import DoubleMLClusterData
+from doubleml.plm.datasets import make_pliv_multiway_cluster_CKMS2021
 
 from .test_nonlinear_score_mixin import DoubleMLPLRWithNonLinearScoreMixin
 
diff --git a/doubleml/tests/test_return_types.py b/doubleml/tests/test_return_types.py
index 11ebd6248..03676b747 100644
--- a/doubleml/tests/test_return_types.py
+++ b/doubleml/tests/test_return_types.py
@@ -23,14 +23,8 @@
     DoubleMLPQ,
     DoubleMLSSM,
 )
-from doubleml.datasets import (
-    make_iivm_data,
-    make_irm_data,
-    make_pliv_CHS2015,
-    make_pliv_multiway_cluster_CKMS2021,
-    make_plr_CCDDHNR2018,
-    make_ssm_data,
-)
+from doubleml.irm.datasets import make_iivm_data, make_irm_data, make_ssm_data
+from doubleml.plm.datasets import make_pliv_CHS2015, make_pliv_multiway_cluster_CKMS2021, make_plr_CCDDHNR2018
 from doubleml.did.datasets import make_did_SZ2020
 
 np.random.seed(3141)
diff --git a/doubleml/tests/test_scores.py b/doubleml/tests/test_scores.py
index c3281702d..0687546dc 100644
--- a/doubleml/tests/test_scores.py
+++ b/doubleml/tests/test_scores.py
@@ -3,7 +3,8 @@
 from sklearn.linear_model import Lasso, LogisticRegression
 
 from doubleml import DoubleMLIIVM, DoubleMLIRM, DoubleMLPLIV, DoubleMLPLR
-from doubleml.datasets import make_iivm_data, make_irm_data, make_pliv_CHS2015, make_plr_CCDDHNR2018
+from doubleml.irm.datasets import make_iivm_data, make_irm_data
+from doubleml.plm.datasets import make_pliv_CHS2015, make_plr_CCDDHNR2018
 
 np.random.seed(3141)
 dml_data_plr = make_plr_CCDDHNR2018(n_obs=100)
diff --git a/doubleml/tests/test_sensitivity.py b/doubleml/tests/test_sensitivity.py
index e4b434959..a0e47c0da 100644
--- a/doubleml/tests/test_sensitivity.py
+++ b/doubleml/tests/test_sensitivity.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
-from doubleml.datasets import make_irm_data
+from doubleml.irm.datasets import make_irm_data
 
 from ._utils_doubleml_sensitivity_manual import doubleml_sensitivity_benchmark_manual, doubleml_sensitivity_manual
 
diff --git a/doubleml/tests/test_sensitivity_cluster.py b/doubleml/tests/test_sensitivity_cluster.py
index 65ec0d644..83f8c270c 100644
--- a/doubleml/tests/test_sensitivity_cluster.py
+++ b/doubleml/tests/test_sensitivity_cluster.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import LinearRegression
 
 import doubleml as dml
-from doubleml.datasets import make_pliv_multiway_cluster_CKMS2021
+from doubleml.plm.datasets import make_pliv_multiway_cluster_CKMS2021
 
 from ._utils_doubleml_sensitivity_manual import doubleml_sensitivity_benchmark_manual
 
diff --git a/doubleml/tests/test_set_ml_nuisance_params.py b/doubleml/tests/test_set_ml_nuisance_params.py
index a189b184b..055bcbff7 100644
--- a/doubleml/tests/test_set_ml_nuisance_params.py
+++ b/doubleml/tests/test_set_ml_nuisance_params.py
@@ -3,7 +3,8 @@
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 
 from doubleml import DoubleMLCVAR, DoubleMLIIVM, DoubleMLIRM, DoubleMLLPQ, DoubleMLPLIV, DoubleMLPLR, DoubleMLPQ
-from doubleml.datasets import make_iivm_data, make_irm_data, make_pliv_CHS2015, make_plr_CCDDHNR2018
+from doubleml.irm.datasets import make_iivm_data, make_irm_data
+from doubleml.plm.datasets import make_pliv_CHS2015, make_plr_CCDDHNR2018
 
 # set default and test values
 n_est_default = 100
diff --git a/doubleml/tests/test_set_sample_splitting.py b/doubleml/tests/test_set_sample_splitting.py
index 97313a00e..0995d831b 100644
--- a/doubleml/tests/test_set_sample_splitting.py
+++ b/doubleml/tests/test_set_sample_splitting.py
@@ -3,7 +3,7 @@
 from sklearn.linear_model import Lasso
 
 from doubleml import DoubleMLPLR
-from doubleml.datasets import make_plr_CCDDHNR2018
+from doubleml.plm.datasets import make_plr_CCDDHNR2018
 
 np.random.seed(3141)
 dml_data = make_plr_CCDDHNR2018(n_obs=10)