diff --git a/.github/ISSUE_TEMPLATE/api_docu.yml b/.github/ISSUE_TEMPLATE/api_docu.yml
index c2004a531..dd23ee0b4 100644
--- a/.github/ISSUE_TEMPLATE/api_docu.yml
+++ b/.github/ISSUE_TEMPLATE/api_docu.yml
@@ -11,4 +11,4 @@ body:
     required: true
 - type: textarea
   attributes:
-    label: Suggested alternative or fix
\ No newline at end of file
+    label: Suggested alternative or fix
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
index f6f09dd41..5cf5c8000 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -76,4 +76,4 @@ body:
         import sklearn; print("Scikit-Learn", sklearn.__version__)
         ```
     validations:
-      required: true
\ No newline at end of file
+      required: true
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
index 43088fb56..2ec778f0c 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -17,4 +17,4 @@ body:
     label: Did you consider alternatives to the proposed solution. If yes, please describe
 - type: textarea
   attributes:
-    label: Comments, context or references
\ No newline at end of file
+    label: Comments, context or references
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 0fb27f571..f1ce2941c 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -43,14 +43,16 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        python -m pip install flake8
         pip install -e .[dev,rdd]
-    - name: Lint with flake8
+    - name: Lint with ruff
       run: |
         # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        ruff check . --select E9,F63,F7,F82 --output-format=full --statistics
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+        ruff check . --exit-zero --line-length 127 --output-format=full --statistics
+    - name: Check code formatting with black
+      run: |
+        black --check .
     - name: Test with pytest
       if: |
         matrix.config.os != 'ubuntu-latest' ||
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 000000000..0a391f696
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,17 @@
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+    -   id: check-yaml
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+-   repo: https://github.com/astral-sh/ruff-pre-commit
+    # Ruff version.
+    rev: v0.9.1
+    hooks:
+    -   id: ruff
+        args: ["--fix", "--output-format=full"]
+-   repo: https://github.com/psf/black
+    rev: 24.10.0
+    hooks:
+    -   id: black
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index 5cc83c2c6..ff4c7c8c4 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -130,4 +130,4 @@ enforcement ladder](https://github.com/mozilla/diversity).
 
 For answers to common questions about this code of conduct, see the FAQ at
 https://www.contributor-covenant.org/faq. Translations are available at
-https://www.contributor-covenant.org/translations.
\ No newline at end of file
+https://www.contributor-covenant.org/translations.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 4151eeaf9..bd6a465d5 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -81,7 +81,7 @@ $ git merge upstream/main
 
 5. **Install DoubleML in editable mode** (more details can be found
 [here](https://docs.doubleml.org/stable/intro/install.html#python-building-the-package-from-source))
-via 
+via
 ```bash
 $ pip install --editable .[dev, rdd]
 ```
@@ -119,12 +119,34 @@ $ pytest .
 - [x] Check whether your changes adhere to the **PEP8 standards**.
 For the check you can use the following code
 ```bash
-$ git diff upstream/main -u -- "*.py" | flake8 --diff --max-line-length=127
+$ git diff upstream/main -u -- "*.py" | ruff check --diff
+```
+
+- [x] Check wether the code formatting adheres to the **Black code style**
+by running
+```bash
+$ black . --check --diff
 ```
 
 If your PR is still **work in progress**, please consider marking it a **draft PR**
 (see also [here](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request)).
 
+### (Optional) Set up pre-commit Hooks
+
+To ensure code quality and consistency before committing your changes, we recommend using [pre-commit hooks](https://pre-commit.com/). Pre-commit hooks will automatically run checks like code formatting and linting on your staged files.
+
+1. **Install hooks**:
+   If you haven't already, install the required hooks by running:
+   ```bash
+   $ pre-commit install
+   ```
+
+2. **Run pre-commit manually**:
+    To run the pre-commit checks manually, use:
+   ```bash
+   $ pre-commit run --all-files
+   ```
+
 ### Unit Tests and Test Coverage
 We use the package **pytest for unit testing**.
 Unit testing is considered to be a fundamental part of the development workflow.
@@ -165,7 +187,7 @@ The source code for the website, user guide, example gallery, etc. is available
 
 ### Contribute to the API Documentation
 The **API documentation** is generated from **docstrings** in the source code.
-It can be generated locally (dev requirements sphinx and pydata-sphinx-theme need to be installed) via 
+It can be generated locally (dev requirements sphinx and pydata-sphinx-theme need to be installed) via
 ```bash
 $ cd doc/
 $ make html
@@ -175,7 +197,7 @@ $ make html
 The **documentation of DoubleML** is hosted at [https://docs.doubleml.org](https://docs.doubleml.org).
 The **source code** for the website, user guide, example gallery, etc. is available in a **separate repository
 [doubleml-docs](https://github.com/DoubleML/doubleml-docs)**.
-Changes, issues and PRs for the documentation (except the API documentation) should be discussed in the 
+Changes, issues and PRs for the documentation (except the API documentation) should be discussed in the
 [doubleml-docs](https://github.com/DoubleML/doubleml-docs) repo.
 We welcome contributions to the user guide, especially case studies for the
 [example gallery](https://docs.doubleml.org/stable/examples/index.html).
diff --git a/LICENSE b/LICENSE
index f20a88b19..d4d3f03ce 100644
--- a/LICENSE
+++ b/LICENSE
@@ -26,4 +26,4 @@ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/MANIFEST.in b/MANIFEST.in
index cb5457a94..f23447ba7 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,2 +1,2 @@
 include LICENSE
-include pytest.ini
\ No newline at end of file
+include pytest.ini
diff --git a/README.md b/README.md
index e713d494d..37a1894e8 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ The Python package **DoubleML** provides an implementation of the double / debia
 It is built on top of [scikit-learn](https://scikit-learn.org) (Pedregosa et al., 2011).
 
 Note that the Python package was developed together with an R twin based on [mlr3](https://mlr3.mlr-org.com/).
-The R package is also available on [GitHub](https://github.com/DoubleML/doubleml-for-r) and 
+The R package is also available on [GitHub](https://github.com/DoubleML/doubleml-for-r) and
 [![CRAN Version](https://www.r-pkg.org/badges/version/DoubleML)](https://cran.r-project.org/package=DoubleML).
 
 ## Documentation and Maintenance
@@ -27,7 +27,7 @@ Bugs can be reported to the issue tracker at
 
 ## Main Features
 
-Double / debiased machine learning [(Chernozhukov et al. (2018))](https://doi.org/10.1111/ectj.12097) for 
+Double / debiased machine learning [(Chernozhukov et al. (2018))](https://doi.org/10.1111/ectj.12097) for
 
 - Partially linear regression models (PLR)
 - Partially linear IV regression models (PLIV)
@@ -46,14 +46,14 @@ This object-oriented implementation allows a high flexibility for the model spec
 - ... the resampling schemes,
 - ... the double machine learning algorithm,
 - ... the Neyman orthogonal score functions,
-- ... 
+- ...
 
 It further can be readily extended with regards to
 
 - ... new model classes that come with Neyman orthogonal score functions being linear in the target parameter,
 - ... alternative score functions via callables,
 - ... alternative resampling schemes,
-- ... 
+- ...
 
 ![An overview of the OOP structure of the DoubleML package is given in the graphic available at https://github.com/DoubleML/doubleml-for-py/blob/main/doc/oop.svg](https://raw.githubusercontent.com/DoubleML/doubleml-for-py/main/doc/oop.svg)
 
@@ -106,7 +106,7 @@ Bibtex-entry:
 
 ```
 @article{DoubleML2022,
-      title   = {{DoubleML} -- {A}n Object-Oriented Implementation of Double Machine Learning in {P}ython}, 
+      title   = {{DoubleML} -- {A}n Object-Oriented Implementation of Double Machine Learning in {P}ython},
       author  = {Philipp Bach and Victor Chernozhukov and Malte S. Kurz and Martin Spindler},
       journal = {Journal of Machine Learning Research},
       year    = {2022},
diff --git a/doc/_templates/class.rst b/doc/_templates/class.rst
index b4ec353ca..c7ca556c1 100644
--- a/doc/_templates/class.rst
+++ b/doc/_templates/class.rst
@@ -34,4 +34,4 @@
 .. automethod:: {{ name }}.{{ item }}
 {% endif %}
 {%- endfor %}
-{% endif %}
\ No newline at end of file
+{% endif %}
diff --git a/doc/conf.py b/doc/conf.py
index 9570f7aef..b4ce1c05b 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -12,17 +12,18 @@
 #
 import os
 import sys
-sys.path.insert(0, os.path.abspath('..'))
+
+sys.path.insert(0, os.path.abspath(".."))
 
 
 # -- Project information -----------------------------------------------------
 
-project = 'DoubleML'
-copyright = '2021, Bach, P., Chernozhukov, V., Klaassen, S., Kurz, M. S., and Spindler, M.'
-author = 'Bach, P., Chernozhukov, V., Klaassen, S., Kurz, M. S., and Spindler, M.'
+project = "DoubleML"
+copyright = "2021, Bach, P., Chernozhukov, V., Klaassen, S., Kurz, M. S., and Spindler, M."
+author = "Bach, P., Chernozhukov, V., Klaassen, S., Kurz, M. S., and Spindler, M."
 
 # The full version, including alpha/beta/rc tags
-release = '0.10.dev0'
+release = "0.10.dev0"
 
 
 # -- General configuration ---------------------------------------------------
@@ -31,25 +32,25 @@
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
-    'sphinx.ext.autodoc',
-    'sphinx.ext.autosummary',
-    'sphinx.ext.coverage',
-    'sphinx.ext.intersphinx',
-    'sphinx.ext.mathjax',
-    'sphinx.ext.napoleon',
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.coverage",
+    "sphinx.ext.intersphinx",
+    "sphinx.ext.mathjax",
+    "sphinx.ext.napoleon",
 ]
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 
-master_doc = 'index'
+master_doc = "index"
 
-autoclass_content = 'class'
+autoclass_content = "class"
 autosummary_generate = True
 
 # -- Options for HTML output -------------------------------------------------
@@ -57,12 +58,12 @@
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'pydata_sphinx_theme'
+html_theme = "pydata_sphinx_theme"
 
 html_theme_options = {
-    'github_url': 'https://github.com/DoubleML/doubleml-for-py',
-    'navigation_with_keys': False,
-    'show_toc_level': 0
+    "github_url": "https://github.com/DoubleML/doubleml-for-py",
+    "navigation_with_keys": False,
+    "show_toc_level": 0,
 }
 
 # Add any paths that contain custom static files (such as style sheets) here,
@@ -74,9 +75,9 @@
 
 # intersphinx configuration
 intersphinx_mapping = {
-    'python': ('https://docs.python.org/{.major}'.format(sys.version_info), None),
-    'sklearn': ('https://scikit-learn.org/stable/', None),
-    'numpy': ('https://numpy.org/doc/stable/', None),
-    'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None),
-    'statsmodels': ('https://www.statsmodels.org/stable/', None),
+    "python": ("https://docs.python.org/{.major}".format(sys.version_info), None),
+    "sklearn": ("https://scikit-learn.org/stable/", None),
+    "numpy": ("https://numpy.org/doc/stable/", None),
+    "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
+    "statsmodels": ("https://www.statsmodels.org/stable/", None),
 }
diff --git a/doc/oop.svg b/doc/oop.svg
index c63dc8404..9b2d1ed80 100644
--- a/doc/oop.svg
+++ b/doc/oop.svg
@@ -2419,4 +2419,4 @@
 </g>
 <path d='M163.547 251.0348H154.969V242.4568' stroke='#000' fill='none' stroke-width='.3985' stroke-miterlimit='10'/>
 </g>
-</svg>
\ No newline at end of file
+</svg>
diff --git a/doubleml/__init__.py b/doubleml/__init__.py
index c97bddf79..a86735c89 100644
--- a/doubleml/__init__.py
+++ b/doubleml/__init__.py
@@ -1,45 +1,43 @@
 import importlib.metadata
 
-from .double_ml_framework import concat
-from .double_ml_framework import DoubleMLFramework
-from .plm.plr import DoubleMLPLR
-from .plm.pliv import DoubleMLPLIV
-from .irm.irm import DoubleMLIRM
+from .did.did import DoubleMLDID
+from .did.did_cs import DoubleMLDIDCS
+from .double_ml_data import DoubleMLClusterData, DoubleMLData
+from .double_ml_framework import DoubleMLFramework, concat
 from .irm.apo import DoubleMLAPO
 from .irm.apos import DoubleMLAPOS
+from .irm.cvar import DoubleMLCVAR
 from .irm.iivm import DoubleMLIIVM
-from .double_ml_data import DoubleMLData, DoubleMLClusterData
-from .did.did import DoubleMLDID
-from .did.did_cs import DoubleMLDIDCS
-from .irm.qte import DoubleMLQTE
-from .irm.pq import DoubleMLPQ
+from .irm.irm import DoubleMLIRM
 from .irm.lpq import DoubleMLLPQ
-from .irm.cvar import DoubleMLCVAR
+from .irm.pq import DoubleMLPQ
+from .irm.qte import DoubleMLQTE
 from .irm.ssm import DoubleMLSSM
-
+from .plm.pliv import DoubleMLPLIV
+from .plm.plr import DoubleMLPLR
 from .utils.blp import DoubleMLBLP
 from .utils.policytree import DoubleMLPolicyTree
 
 __all__ = [
-    'concat',
-    'DoubleMLFramework',
-    'DoubleMLPLR',
-    'DoubleMLPLIV',
-    'DoubleMLIRM',
-    'DoubleMLAPO',
-    'DoubleMLAPOS',
-    'DoubleMLIIVM',
-    'DoubleMLData',
-    'DoubleMLClusterData',
-    'DoubleMLDID',
-    'DoubleMLDIDCS',
-    'DoubleMLPQ',
-    'DoubleMLQTE',
-    'DoubleMLLPQ',
-    'DoubleMLCVAR',
-    'DoubleMLBLP',
-    'DoubleMLPolicyTree',
-    'DoubleMLSSM'
+    "concat",
+    "DoubleMLFramework",
+    "DoubleMLPLR",
+    "DoubleMLPLIV",
+    "DoubleMLIRM",
+    "DoubleMLAPO",
+    "DoubleMLAPOS",
+    "DoubleMLIIVM",
+    "DoubleMLData",
+    "DoubleMLClusterData",
+    "DoubleMLDID",
+    "DoubleMLDIDCS",
+    "DoubleMLPQ",
+    "DoubleMLQTE",
+    "DoubleMLLPQ",
+    "DoubleMLCVAR",
+    "DoubleMLBLP",
+    "DoubleMLPolicyTree",
+    "DoubleMLSSM",
 ]
 
-__version__ = importlib.metadata.version('doubleml')
+__version__ = importlib.metadata.version("doubleml")
diff --git a/doubleml/datasets.py b/doubleml/datasets.py
index 74eceb0fc..8be9cd4ef 100644
--- a/doubleml/datasets.py
+++ b/doubleml/datasets.py
@@ -1,22 +1,21 @@
-import pandas as pd
-import numpy as np
 import warnings
 
+import numpy as np
+import pandas as pd
 from scipy.linalg import toeplitz
 from scipy.optimize import minimize_scalar
-
-from sklearn.preprocessing import PolynomialFeatures, OneHotEncoder
 from sklearn.datasets import make_spd_matrix
+from sklearn.preprocessing import OneHotEncoder, PolynomialFeatures
 
-from .double_ml_data import DoubleMLData, DoubleMLClusterData
+from .double_ml_data import DoubleMLClusterData, DoubleMLData
 
-_array_alias = ['array', 'np.ndarray', 'np.array', np.ndarray]
-_data_frame_alias = ['DataFrame', 'pd.DataFrame', pd.DataFrame]
-_dml_data_alias = ['DoubleMLData', DoubleMLData]
-_dml_cluster_data_alias = ['DoubleMLClusterData', DoubleMLClusterData]
+_array_alias = ["array", "np.ndarray", "np.array", np.ndarray]
+_data_frame_alias = ["DataFrame", "pd.DataFrame", pd.DataFrame]
+_dml_data_alias = ["DoubleMLData", DoubleMLData]
+_dml_cluster_data_alias = ["DoubleMLClusterData", DoubleMLClusterData]
 
 
-def fetch_401K(return_type='DoubleMLData', polynomial_features=False):
+def fetch_401K(return_type="DoubleMLData", polynomial_features=False):
     """
     Data set on financial wealth and 401(k) plan participation.
 
@@ -38,17 +37,17 @@ def fetch_401K(return_type='DoubleMLData', polynomial_features=False):
     Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
     doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
     """
-    url = 'https://github.com/VC2015/DMLonGitHub/raw/master/sipp1991.dta'
+    url = "https://github.com/VC2015/DMLonGitHub/raw/master/sipp1991.dta"
     raw_data = pd.read_stata(url)
 
-    y_col = 'net_tfa'
-    d_cols = ['e401']
-    x_cols = ['age', 'inc', 'educ', 'fsize', 'marr', 'twoearn', 'db', 'pira', 'hown']
+    y_col = "net_tfa"
+    d_cols = ["e401"]
+    x_cols = ["age", "inc", "educ", "fsize", "marr", "twoearn", "db", "pira", "hown"]
 
     data = raw_data.copy()
 
     if polynomial_features:
-        raise NotImplementedError('polynomial_features os not implemented yet for fetch_401K.')
+        raise NotImplementedError("polynomial_features os not implemented yet for fetch_401K.")
 
     if return_type in _data_frame_alias + _dml_data_alias:
         if return_type in _data_frame_alias:
@@ -56,10 +55,10 @@ def fetch_401K(return_type='DoubleMLData', polynomial_features=False):
         else:
             return DoubleMLData(data, y_col, d_cols, x_cols)
     else:
-        raise ValueError('Invalid return_type.')
+        raise ValueError("Invalid return_type.")
 
 
-def fetch_bonus(return_type='DoubleMLData', polynomial_features=False):
+def fetch_bonus(return_type="DoubleMLData", polynomial_features=False):
     """
     Data set on the Pennsylvania Reemployment Bonus experiment.
 
@@ -81,27 +80,40 @@ def fetch_bonus(return_type='DoubleMLData', polynomial_features=False):
     Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
     doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
     """
-    url = 'https://raw.githubusercontent.com/VC2015/DMLonGitHub/master/penn_jae.dat'
-    raw_data = pd.read_csv(url, sep='\s+')
+    url = "https://raw.githubusercontent.com/VC2015/DMLonGitHub/master/penn_jae.dat"
+    raw_data = pd.read_csv(url, sep=r"\s+")
 
-    ind = (raw_data['tg'] == 0) | (raw_data['tg'] == 4)
+    ind = (raw_data["tg"] == 0) | (raw_data["tg"] == 4)
     data = raw_data.copy()[ind]
     data.reset_index(inplace=True)
-    data['tg'] = data['tg'].replace(4, 1)
-    data['inuidur1'] = np.log(data['inuidur1'])
+    data["tg"] = data["tg"].replace(4, 1)
+    data["inuidur1"] = np.log(data["inuidur1"])
 
     # variable dep as factor (dummy encoding)
-    dummy_enc = OneHotEncoder(drop='first', categories='auto').fit(data.loc[:, ['dep']])
-    xx = dummy_enc.transform(data.loc[:, ['dep']]).toarray()
-    data['dep1'] = xx[:, 0]
-    data['dep2'] = xx[:, 1]
-
-    y_col = 'inuidur1'
-    d_cols = ['tg']
-    x_cols = ['female', 'black', 'othrace',
-              'dep1', 'dep2',
-              'q2', 'q3', 'q4', 'q5', 'q6',
-              'agelt35', 'agegt54', 'durable', 'lusd', 'husd']
+    dummy_enc = OneHotEncoder(drop="first", categories="auto").fit(data.loc[:, ["dep"]])
+    xx = dummy_enc.transform(data.loc[:, ["dep"]]).toarray()
+    data["dep1"] = xx[:, 0]
+    data["dep2"] = xx[:, 1]
+
+    y_col = "inuidur1"
+    d_cols = ["tg"]
+    x_cols = [
+        "female",
+        "black",
+        "othrace",
+        "dep1",
+        "dep2",
+        "q2",
+        "q3",
+        "q4",
+        "q5",
+        "q6",
+        "agelt35",
+        "agegt54",
+        "durable",
+        "lusd",
+        "husd",
+    ]
 
     if polynomial_features:
         poly = PolynomialFeatures(2, include_bias=False)
@@ -109,8 +121,7 @@ def fetch_bonus(return_type='DoubleMLData', polynomial_features=False):
         x_cols = list(poly.get_feature_names_out(x_cols))
 
         data_transf = pd.DataFrame(data_transf, columns=x_cols)
-        data = pd.concat((data[[y_col] + d_cols], data_transf),
-                         axis=1, sort=False)
+        data = pd.concat((data[[y_col] + d_cols], data_transf), axis=1, sort=False)
 
     if return_type in _data_frame_alias + _dml_data_alias:
         if return_type in _data_frame_alias:
@@ -118,18 +129,18 @@ def fetch_bonus(return_type='DoubleMLData', polynomial_features=False):
         else:
             return DoubleMLData(data, y_col, d_cols, x_cols)
     else:
-        raise ValueError('Invalid return_type.')
+        raise ValueError("Invalid return_type.")
 
 
 def _g(x):
     return np.power(np.sin(x), 2)
 
 
-def _m(x, nu=0., gamma=1.):
-    return 0.5/np.pi*(np.sinh(gamma))/(np.cosh(gamma)-np.cos(x-nu))
+def _m(x, nu=0.0, gamma=1.0):
+    return 0.5 / np.pi * (np.sinh(gamma)) / (np.cosh(gamma) - np.cos(x - nu))
 
 
-def make_plr_CCDDHNR2018(n_obs=500, dim_x=20, alpha=0.5, return_type='DoubleMLData', **kwargs):
+def make_plr_CCDDHNR2018(n_obs=500, dim_x=20, alpha=0.5, return_type="DoubleMLData", **kwargs):
     """
     Generates data from a partially linear regression model used in Chernozhukov et al. (2018) for Figure 1.
     The data generating process is defined as
@@ -175,37 +186,59 @@ def make_plr_CCDDHNR2018(n_obs=500, dim_x=20, alpha=0.5, return_type='DoubleMLDa
     Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68.
     doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
     """
-    a_0 = kwargs.get('a_0', 1.)
-    a_1 = kwargs.get('a_1', 0.25)
-    s_1 = kwargs.get('s_1', 1.)
+    a_0 = kwargs.get("a_0", 1.0)
+    a_1 = kwargs.get("a_1", 0.25)
+    s_1 = kwargs.get("s_1", 1.0)
 
-    b_0 = kwargs.get('b_0', 1.)
-    b_1 = kwargs.get('b_1', 0.25)
-    s_2 = kwargs.get('s_2', 1.)
+    b_0 = kwargs.get("b_0", 1.0)
+    b_1 = kwargs.get("b_1", 0.25)
+    s_2 = kwargs.get("s_2", 1.0)
 
     cov_mat = toeplitz([np.power(0.7, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
 
-    d = a_0 * x[:, 0] + a_1 * np.divide(np.exp(x[:, 2]), 1 + np.exp(x[:, 2])) \
-        + s_1 * np.random.standard_normal(size=[n_obs, ])
-    y = alpha * d + b_0 * np.divide(np.exp(x[:, 0]), 1 + np.exp(x[:, 0])) \
-        + b_1 * x[:, 2] + s_2 * np.random.standard_normal(size=[n_obs, ])
+    d = (
+        a_0 * x[:, 0]
+        + a_1 * np.divide(np.exp(x[:, 2]), 1 + np.exp(x[:, 2]))
+        + s_1
+        * np.random.standard_normal(
+            size=[
+                n_obs,
+            ]
+        )
+    )
+    y = (
+        alpha * d
+        + b_0 * np.divide(np.exp(x[:, 0]), 1 + np.exp(x[:, 0]))
+        + b_1 * x[:, 2]
+        + s_2
+        * np.random.standard_normal(
+            size=[
+                n_obs,
+            ]
+        )
+    )
 
     if return_type in _array_alias:
         return x, y, d
     elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d)),
-                            columns=x_cols + ['y', 'd'])
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + ["y", "d"])
         if return_type in _data_frame_alias:
             return data
         else:
-            return DoubleMLData(data, 'y', 'd', x_cols)
+            return DoubleMLData(data, "y", "d", x_cols)
     else:
-        raise ValueError('Invalid return_type.')
+        raise ValueError("Invalid return_type.")
 
 
-def make_plr_turrell2018(n_obs=100, dim_x=20, theta=0.5, return_type='DoubleMLData', **kwargs):
+def make_plr_turrell2018(n_obs=100, dim_x=20, theta=0.5, return_type="DoubleMLData", **kwargs):
     """
     Generates data from a partially linear regression model used in a blog article by Turrell (2018).
     The data generating process is defined as
@@ -251,33 +284,50 @@ def make_plr_turrell2018(n_obs=100, dim_x=20, theta=0.5, return_type='DoubleMLDa
     science, coding and data. `https://aeturrell.com/blog/posts/econometrics-in-python-parti-ml/
     <https://aeturrell.com/blog/posts/econometrics-in-python-parti-ml/>`_.
     """
-    nu = kwargs.get('nu', 0.)
-    gamma = kwargs.get('gamma', 1.)
+    nu = kwargs.get("nu", 0.0)
+    gamma = kwargs.get("gamma", 1.0)
 
     b = [1 / k for k in range(1, dim_x + 1)]
     sigma = make_spd_matrix(dim_x)
 
-    x = np.random.multivariate_normal(np.zeros(dim_x), sigma, size=[n_obs, ])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        sigma,
+        size=[
+            n_obs,
+        ],
+    )
     G = _g(np.dot(x, b))
     M = _m(np.dot(x, b), nu=nu, gamma=gamma)
-    d = M + np.random.standard_normal(size=[n_obs, ])
-    y = np.dot(theta, d) + G + np.random.standard_normal(size=[n_obs, ])
+    d = M + np.random.standard_normal(
+        size=[
+            n_obs,
+        ]
+    )
+    y = (
+        np.dot(theta, d)
+        + G
+        + np.random.standard_normal(
+            size=[
+                n_obs,
+            ]
+        )
+    )
 
     if return_type in _array_alias:
         return x, y, d
     elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d)),
-                            columns=x_cols + ['y', 'd'])
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + ["y", "d"])
         if return_type in _data_frame_alias:
             return data
         else:
-            return DoubleMLData(data, 'y', 'd', x_cols)
+            return DoubleMLData(data, "y", "d", x_cols)
     else:
-        raise ValueError('Invalid return_type.')
+        raise ValueError("Invalid return_type.")
 
 
-def make_irm_data(n_obs=500, dim_x=20, theta=0, R2_d=0.5, R2_y=0.5, return_type='DoubleMLData'):
+def make_irm_data(n_obs=500, dim_x=20, theta=0, R2_d=0.5, R2_y=0.5, return_type="DoubleMLData"):
     """
     Generates data from a interactive regression (IRM) model.
     The data generating process is defined as
@@ -327,37 +377,50 @@ def make_irm_data(n_obs=500, dim_x=20, theta=0, R2_d=0.5, R2_y=0.5, return_type=
     High‐Dimensional Data. Econometrica, 85: 233-298.
     """
     # inspired by https://onlinelibrary.wiley.com/doi/abs/10.3982/ECTA12723, see suplement
-    v = np.random.uniform(size=[n_obs, ])
-    zeta = np.random.standard_normal(size=[n_obs, ])
+    v = np.random.uniform(
+        size=[
+            n_obs,
+        ]
+    )
+    zeta = np.random.standard_normal(
+        size=[
+            n_obs,
+        ]
+    )
 
     cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
 
     beta = [1 / (k**2) for k in range(1, dim_x + 1)]
     b_sigma_b = np.dot(np.dot(cov_mat, beta), beta)
-    c_y = np.sqrt(R2_y/((1-R2_y) * b_sigma_b))
-    c_d = np.sqrt(np.pi**2 / 3. * R2_d/((1-R2_d) * b_sigma_b))
+    c_y = np.sqrt(R2_y / ((1 - R2_y) * b_sigma_b))
+    c_d = np.sqrt(np.pi**2 / 3.0 * R2_d / ((1 - R2_d) * b_sigma_b))
 
     xx = np.exp(np.dot(x, np.multiply(beta, c_d)))
-    d = 1. * ((xx/(1+xx)) > v)
+    d = 1.0 * ((xx / (1 + xx)) > v)
 
     y = d * theta + d * np.dot(x, np.multiply(beta, c_y)) + zeta
 
     if return_type in _array_alias:
         return x, y, d
     elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d)),
-                            columns=x_cols + ['y', 'd'])
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + ["y", "d"])
         if return_type in _data_frame_alias:
             return data
         else:
-            return DoubleMLData(data, 'y', 'd', x_cols)
+            return DoubleMLData(data, "y", "d", x_cols)
     else:
-        raise ValueError('Invalid return_type.')
+        raise ValueError("Invalid return_type.")
 
 
-def make_iivm_data(n_obs=500, dim_x=20, theta=1., alpha_x=0.2, return_type='DoubleMLData'):
+def make_iivm_data(n_obs=500, dim_x=20, theta=1.0, alpha_x=0.2, return_type="DoubleMLData"):
     """
     Generates data from a interactive IV regression (IIVM) model.
     The data generating process is defined as
@@ -405,64 +468,100 @@ def make_iivm_data(n_obs=500, dim_x=20, theta=1., alpha_x=0.2, return_type='Doub
     Paper No. 13-2020. Available at SSRN: http://dx.doi.org/10.2139/ssrn.3619201.
     """
     # inspired by https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3619201
-    xx = np.random.multivariate_normal(np.zeros(2),
-                                       np.array([[1., 0.3], [0.3, 1.]]),
-                                       size=[n_obs, ])
+    xx = np.random.multivariate_normal(
+        np.zeros(2),
+        np.array([[1.0, 0.3], [0.3, 1.0]]),
+        size=[
+            n_obs,
+        ],
+    )
     u = xx[:, 0]
     v = xx[:, 1]
 
     cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
 
     beta = [1 / (k**2) for k in range(1, dim_x + 1)]
 
-    z = np.random.binomial(p=0.5, n=1, size=[n_obs, ])
-    d = 1. * (alpha_x * z + v > 0)
+    z = np.random.binomial(
+        p=0.5,
+        n=1,
+        size=[
+            n_obs,
+        ],
+    )
+    d = 1.0 * (alpha_x * z + v > 0)
 
     y = d * theta + np.dot(x, beta) + u
 
     if return_type in _array_alias:
         return x, y, d, z
     elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d, z)),
-                            columns=x_cols + ['y', 'd', 'z'])
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["y", "d", "z"])
         if return_type in _data_frame_alias:
             return data
         else:
-            return DoubleMLData(data, 'y', 'd', x_cols, 'z')
+            return DoubleMLData(data, "y", "d", x_cols, "z")
     else:
-        raise ValueError('Invalid return_type.')
+        raise ValueError("Invalid return_type.")
 
 
-def _make_pliv_data(n_obs=100, dim_x=20, theta=0.5, gamma_z=0.4, return_type='DoubleMLData'):
-    b = [1/k for k in range(1, dim_x+1)]
+def _make_pliv_data(n_obs=100, dim_x=20, theta=0.5, gamma_z=0.4, return_type="DoubleMLData"):
+    b = [1 / k for k in range(1, dim_x + 1)]
     sigma = make_spd_matrix(dim_x)
 
-    x = np.random.multivariate_normal(np.zeros(dim_x), sigma, size=[n_obs, ])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        sigma,
+        size=[
+            n_obs,
+        ],
+    )
     G = _g(np.dot(x, b))
     # instrument
-    z = _m(np.dot(x, b)) + np.random.standard_normal(size=[n_obs, ])
+    z = _m(np.dot(x, b)) + np.random.standard_normal(
+        size=[
+            n_obs,
+        ]
+    )
     # treatment
     M = _m(gamma_z * z + np.dot(x, b))
-    d = M + np.random.standard_normal(size=[n_obs, ])
-    y = np.dot(theta, d) + G + np.random.standard_normal(size=[n_obs, ])
+    d = M + np.random.standard_normal(
+        size=[
+            n_obs,
+        ]
+    )
+    y = (
+        np.dot(theta, d)
+        + G
+        + np.random.standard_normal(
+            size=[
+                n_obs,
+            ]
+        )
+    )
 
     if return_type in _array_alias:
         return x, y, d, z
     elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
-        data = pd.DataFrame(np.column_stack((x, y, d, z)),
-                            columns=x_cols + ['y', 'd', 'z'])
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        data = pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["y", "d", "z"])
         if return_type in _data_frame_alias:
             return data
         else:
-            return DoubleMLData(data, 'y', 'd', x_cols, 'z')
+            return DoubleMLData(data, "y", "d", x_cols, "z")
     else:
-        raise ValueError('Invalid return_type.')
+        raise ValueError("Invalid return_type.")
 
 
-def make_pliv_CHS2015(n_obs, alpha=1., dim_x=200, dim_z=150, return_type='DoubleMLData'):
+def make_pliv_CHS2015(n_obs, alpha=1.0, dim_x=200, dim_z=150, return_type="DoubleMLData"):
     """
     Generates data from a partially linear IV regression model used in Chernozhukov, Hansen and Spindler (2015).
     The data generating process is defined as
@@ -513,26 +612,38 @@ def make_pliv_CHS2015(n_obs, alpha=1., dim_x=200, dim_z=150, return_type='Double
     """
     assert dim_x >= dim_z
     # see https://assets.aeaweb.org/asset-server/articles-attachments/aer/app/10505/P2015_1022_app.pdf
-    xx = np.random.multivariate_normal(np.zeros(2),
-                                       np.array([[1., 0.6], [0.6, 1.]]),
-                                       size=[n_obs, ])
+    xx = np.random.multivariate_normal(
+        np.zeros(2),
+        np.array([[1.0, 0.6], [0.6, 1.0]]),
+        size=[
+            n_obs,
+        ],
+    )
     epsilon = xx[:, 0]
     u = xx[:, 1]
 
     sigma = toeplitz([np.power(0.5, k) for k in range(0, dim_x)])
-    x = np.random.multivariate_normal(np.zeros(dim_x),
-                                      sigma,
-                                      size=[n_obs, ])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        sigma,
+        size=[
+            n_obs,
+        ],
+    )
 
     I_z = np.eye(dim_z)
-    xi = np.random.multivariate_normal(np.zeros(dim_z),
-                                       0.25*I_z,
-                                       size=[n_obs, ])
+    xi = np.random.multivariate_normal(
+        np.zeros(dim_z),
+        0.25 * I_z,
+        size=[
+            n_obs,
+        ],
+    )
 
     beta = [1 / (k**2) for k in range(1, dim_x + 1)]
     gamma = beta
     delta = [1 / (k**2) for k in range(1, dim_z + 1)]
-    Pi = np.hstack((I_z, np.zeros((dim_z, dim_x-dim_z))))
+    Pi = np.hstack((I_z, np.zeros((dim_z, dim_x - dim_z))))
 
     z = np.dot(x, np.transpose(Pi)) + xi
     d = np.dot(x, gamma) + np.dot(z, delta) + u
@@ -541,19 +652,18 @@ def make_pliv_CHS2015(n_obs, alpha=1., dim_x=200, dim_z=150, return_type='Double
     if return_type in _array_alias:
         return x, y, d, z
     elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
-        z_cols = [f'Z{i + 1}' for i in np.arange(dim_z)]
-        data = pd.DataFrame(np.column_stack((x, y, d, z)),
-                            columns=x_cols + ['y', 'd'] + z_cols)
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+        z_cols = [f"Z{i + 1}" for i in np.arange(dim_z)]
+        data = pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["y", "d"] + z_cols)
         if return_type in _data_frame_alias:
             return data
         else:
-            return DoubleMLData(data, 'y', 'd', x_cols, z_cols)
+            return DoubleMLData(data, "y", "d", x_cols, z_cols)
     else:
-        raise ValueError('Invalid return_type.')
+        raise ValueError("Invalid return_type.")
 
 
-def make_pliv_multiway_cluster_CKMS2021(N=25, M=25, dim_X=100, theta=1., return_type='DoubleMLClusterData', **kwargs):
+def make_pliv_multiway_cluster_CKMS2021(N=25, M=25, dim_X=100, theta=1.0, return_type="DoubleMLClusterData", **kwargs):
     """
     Generates data from a partially linear IV regression model with multiway cluster sample used in Chiang et al.
     (2021). The data generating process is defined as
@@ -631,20 +741,20 @@ def make_pliv_multiway_cluster_CKMS2021(N=25, M=25, dim_X=100, theta=1., return_
     arXiv:`1909.03489 <https://arxiv.org/abs/1909.03489>`_.
     """
     # additional parameters specifiable via kwargs
-    pi_10 = kwargs.get('pi_10', 1.0)
+    pi_10 = kwargs.get("pi_10", 1.0)
 
     xx = np.arange(1, dim_X + 1)
-    zeta_0 = kwargs.get('zeta_0', np.power(0.5, xx))
-    pi_20 = kwargs.get('pi_20', np.power(0.5, xx))
-    xi_0 = kwargs.get('xi_0', np.power(0.5, xx))
+    zeta_0 = kwargs.get("zeta_0", np.power(0.5, xx))
+    pi_20 = kwargs.get("pi_20", np.power(0.5, xx))
+    xi_0 = kwargs.get("xi_0", np.power(0.5, xx))
 
-    omega_X = kwargs.get('omega_X', np.array([0.25, 0.25]))
-    omega_epsilon = kwargs.get('omega_epsilon', np.array([0.25, 0.25]))
-    omega_v = kwargs.get('omega_v', np.array([0.25, 0.25]))
-    omega_V = kwargs.get('omega_V', np.array([0.25, 0.25]))
+    omega_X = kwargs.get("omega_X", np.array([0.25, 0.25]))
+    omega_epsilon = kwargs.get("omega_epsilon", np.array([0.25, 0.25]))
+    omega_v = kwargs.get("omega_v", np.array([0.25, 0.25]))
+    omega_V = kwargs.get("omega_V", np.array([0.25, 0.25]))
 
-    s_X = kwargs.get('s_X', 0.25)
-    s_epsilon_v = kwargs.get('s_epsilon_v', 0.25)
+    s_X = kwargs.get("s_X", 0.25)
+    s_epsilon_v = kwargs.get("s_epsilon_v", 0.25)
 
     # use np.tile() and np.repeat() for repeating vectors in different styles, i.e.,
     # np.tile([v1, v2, v3], 2) [v1, v2, v3, v1, v2, v3]
@@ -655,61 +765,98 @@ def make_pliv_multiway_cluster_CKMS2021(N=25, M=25, dim_X=100, theta=1., return_
     alpha_V_j = np.tile(np.random.normal(size=M), N)
 
     cov_mat = np.array([[1, s_epsilon_v], [s_epsilon_v, 1]])
-    alpha_eps_v = np.random.multivariate_normal(np.zeros(2), cov_mat, size=[N * M, ])
+    alpha_eps_v = np.random.multivariate_normal(
+        np.zeros(2),
+        cov_mat,
+        size=[
+            N * M,
+        ],
+    )
     alpha_eps = alpha_eps_v[:, 0]
     alpha_v = alpha_eps_v[:, 1]
 
-    alpha_eps_v_i = np.random.multivariate_normal(np.zeros(2), cov_mat, size=[N, ])
+    alpha_eps_v_i = np.random.multivariate_normal(
+        np.zeros(2),
+        cov_mat,
+        size=[
+            N,
+        ],
+    )
     alpha_eps_i = np.repeat(alpha_eps_v_i[:, 0], M)
     alpha_v_i = np.repeat(alpha_eps_v_i[:, 1], M)
 
-    alpha_eps_v_j = np.random.multivariate_normal(np.zeros(2), cov_mat, size=[M, ])
+    alpha_eps_v_j = np.random.multivariate_normal(
+        np.zeros(2),
+        cov_mat,
+        size=[
+            M,
+        ],
+    )
     alpha_eps_j = np.tile(alpha_eps_v_j[:, 0], N)
     alpha_v_j = np.tile(alpha_eps_v_j[:, 1], N)
 
     cov_mat = toeplitz([np.power(s_X, k) for k in range(dim_X)])
-    alpha_X = np.random.multivariate_normal(np.zeros(dim_X), cov_mat, size=[N * M, ])
-    alpha_X_i = np.repeat(np.random.multivariate_normal(np.zeros(dim_X), cov_mat, size=[N, ]),
-                          M, axis=0)
-    alpha_X_j = np.tile(np.random.multivariate_normal(np.zeros(dim_X), cov_mat, size=[M, ]),
-                        (N, 1))
+    alpha_X = np.random.multivariate_normal(
+        np.zeros(dim_X),
+        cov_mat,
+        size=[
+            N * M,
+        ],
+    )
+    alpha_X_i = np.repeat(
+        np.random.multivariate_normal(
+            np.zeros(dim_X),
+            cov_mat,
+            size=[
+                N,
+            ],
+        ),
+        M,
+        axis=0,
+    )
+    alpha_X_j = np.tile(
+        np.random.multivariate_normal(
+            np.zeros(dim_X),
+            cov_mat,
+            size=[
+                M,
+            ],
+        ),
+        (N, 1),
+    )
 
     # generate variables
-    x = (1 - omega_X[0] - omega_X[1]) * alpha_X \
-        + omega_X[0] * alpha_X_i + omega_X[1] * alpha_X_j
+    x = (1 - omega_X[0] - omega_X[1]) * alpha_X + omega_X[0] * alpha_X_i + omega_X[1] * alpha_X_j
 
-    eps = (1 - omega_epsilon[0] - omega_epsilon[1]) * alpha_eps \
-        + omega_epsilon[0] * alpha_eps_i + omega_epsilon[1] * alpha_eps_j
+    eps = (
+        (1 - omega_epsilon[0] - omega_epsilon[1]) * alpha_eps + omega_epsilon[0] * alpha_eps_i + omega_epsilon[1] * alpha_eps_j
+    )
 
-    v = (1 - omega_v[0] - omega_v[1]) * alpha_v \
-        + omega_v[0] * alpha_v_i + omega_v[1] * alpha_v_j
+    v = (1 - omega_v[0] - omega_v[1]) * alpha_v + omega_v[0] * alpha_v_i + omega_v[1] * alpha_v_j
 
-    V = (1 - omega_V[0] - omega_V[1]) * alpha_V \
-        + omega_V[0] * alpha_V_i + omega_V[1] * alpha_V_j
+    V = (1 - omega_V[0] - omega_V[1]) * alpha_V + omega_V[0] * alpha_V_i + omega_V[1] * alpha_V_j
 
     z = np.matmul(x, xi_0) + V
     d = z * pi_10 + np.matmul(x, pi_20) + v
     y = d * theta + np.matmul(x, zeta_0) + eps
 
-    cluster_cols = ['cluster_var_i', 'cluster_var_j']
+    cluster_cols = ["cluster_var_i", "cluster_var_j"]
     cluster_vars = pd.MultiIndex.from_product([range(N), range(M)]).to_frame(name=cluster_cols).reset_index(drop=True)
 
     if return_type in _array_alias:
         return x, y, d, cluster_vars.values, z
     elif return_type in _data_frame_alias + _dml_cluster_data_alias:
-        x_cols = [f'X{i + 1}' for i in np.arange(dim_X)]
-        data = pd.concat((cluster_vars,
-                          pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ['Y', 'D', 'Z'])),
-                         axis=1)
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_X)]
+        data = pd.concat((cluster_vars, pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["Y", "D", "Z"])), axis=1)
         if return_type in _data_frame_alias:
             return data
         else:
-            return DoubleMLClusterData(data, 'Y', 'D', cluster_cols, x_cols, 'Z')
+            return DoubleMLClusterData(data, "Y", "D", cluster_cols, x_cols, "Z")
     else:
-        raise ValueError('Invalid return_type.')
+        raise ValueError("Invalid return_type.")
 
 
-def make_did_SZ2020(n_obs=500, dgp_type=1, cross_sectional_data=False, return_type='DoubleMLData', **kwargs):
+def make_did_SZ2020(n_obs=500, dgp_type=1, cross_sectional_data=False, return_type="DoubleMLData", **kwargs):
     """
     Generates data from a difference-in-differences model used in Sant'Anna and Zhao (2020).
     The data generating process is defined as follows. For a generic :math:`W=(W_1, W_2, W_3, W_4)^T`, let
@@ -795,26 +942,32 @@ def make_did_SZ2020(n_obs=500, dgp_type=1, cross_sectional_data=False, return_ty
     Doubly robust difference-in-differences estimators. Journal of Econometrics, 219(1), 101-122.
     doi:`10.1016/j.jeconom.2020.06.003 <https://doi.org/10.1016/j.jeconom.2020.06.003>`_.
     """
-    xi = kwargs.get('xi', 0.75)
-    c = kwargs.get('c', 0.0)
-    lambda_t = kwargs.get('lambda_t', 0.5)
+    xi = kwargs.get("xi", 0.75)
+    c = kwargs.get("c", 0.0)
+    lambda_t = kwargs.get("lambda_t", 0.5)
 
     def f_reg(w):
-        res = 210 + 27.4*w[:, 0] + 13.7*(w[:, 1] + w[:, 2] + w[:, 3])
+        res = 210 + 27.4 * w[:, 0] + 13.7 * (w[:, 1] + w[:, 2] + w[:, 3])
         return res
 
     def f_ps(w, xi):
-        res = xi*(-w[:, 0] + 0.5*w[:, 1] - 0.25*w[:, 2] - 0.1*w[:, 3])
+        res = xi * (-w[:, 0] + 0.5 * w[:, 1] - 0.25 * w[:, 2] - 0.1 * w[:, 3])
         return res
 
     dim_x = 4
     cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
 
-    z_tilde_1 = np.exp(0.5*x[:, 0])
+    z_tilde_1 = np.exp(0.5 * x[:, 0])
     z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
-    z_tilde_3 = (0.6 + x[:, 0]*x[:, 2]/25)**3
-    z_tilde_4 = (20 + x[:, 1] + x[:, 3])**2
+    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
+    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
 
     z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4))
     z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
@@ -842,7 +995,7 @@ def f_ps(w, xi):
         features_ps = None
         features_reg = x
     else:
-        raise ValueError('The dgp_type is not valid.')
+        raise ValueError("The dgp_type is not valid.")
 
     # treatment and propensities
     is_experimental = (dgp_type == 5) or (dgp_type == 6)
@@ -855,11 +1008,11 @@ def f_ps(w, xi):
     d = 1.0 * (p >= u)
 
     # potential outcomes
-    nu = np.random.normal(loc=d*f_reg(features_reg), scale=1, size=n_obs)
+    nu = np.random.normal(loc=d * f_reg(features_reg), scale=1, size=n_obs)
     y0 = f_reg(features_reg) + nu + epsilon_0
     y1_d0 = 2 * f_reg(features_reg) + nu + epsilon_1[:, 0]
     y1_d1 = 2 * f_reg(features_reg) + nu + epsilon_1[:, 1]
-    y1 = d * y1_d1 + (1-d) * y1_d0
+    y1 = d * y1_d1 + (1 - d) * y1_d0
 
     if not cross_sectional_data:
         y = y1 - y0
@@ -867,33 +1020,31 @@ def f_ps(w, xi):
         if return_type in _array_alias:
             return z, y, d
         elif return_type in _data_frame_alias + _dml_data_alias:
-            z_cols = [f'Z{i + 1}' for i in np.arange(dim_x)]
-            data = pd.DataFrame(np.column_stack((z, y, d)),
-                                columns=z_cols + ['y', 'd'])
+            z_cols = [f"Z{i + 1}" for i in np.arange(dim_x)]
+            data = pd.DataFrame(np.column_stack((z, y, d)), columns=z_cols + ["y", "d"])
             if return_type in _data_frame_alias:
                 return data
             else:
-                return DoubleMLData(data, 'y', 'd', z_cols)
+                return DoubleMLData(data, "y", "d", z_cols)
         else:
-            raise ValueError('Invalid return_type.')
+            raise ValueError("Invalid return_type.")
 
     else:
         u_t = np.random.uniform(low=0, high=1, size=n_obs)
         t = 1.0 * (u_t <= lambda_t)
-        y = t * y1 + (1-t)*y0
+        y = t * y1 + (1 - t) * y0
 
         if return_type in _array_alias:
             return z, y, d, t
         elif return_type in _data_frame_alias + _dml_data_alias:
-            z_cols = [f'Z{i + 1}' for i in np.arange(dim_x)]
-            data = pd.DataFrame(np.column_stack((z, y, d, t)),
-                                columns=z_cols + ['y', 'd', 't'])
+            z_cols = [f"Z{i + 1}" for i in np.arange(dim_x)]
+            data = pd.DataFrame(np.column_stack((z, y, d, t)), columns=z_cols + ["y", "d", "t"])
             if return_type in _data_frame_alias:
                 return data
             else:
-                return DoubleMLData(data, 'y', 'd', z_cols, t_col='t')
+                return DoubleMLData(data, "y", "d", z_cols, t_col="t")
         else:
-            raise ValueError('Invalid return_type.')
+            raise ValueError("Invalid return_type.")
 
 
 def make_confounded_irm_data(n_obs=500, theta=0.0, gamma_a=0.127, beta_a=0.58, linear=False, **kwargs):
@@ -1001,26 +1152,33 @@ def make_confounded_irm_data(n_obs=500, theta=0.0, gamma_a=0.127, beta_a=0.58, l
     """
     c = 0.0  # the confounding strength is only valid for c=0
     xi = 0.75
-    dim_x = kwargs.get('dim_x', 5)
-    trimming_threshold = kwargs.get('trimming_threshold', 0.01)
-    var_eps_y = kwargs.get('var_eps_y', 1.0)
+    dim_x = kwargs.get("dim_x", 5)
+    trimming_threshold = kwargs.get("trimming_threshold", 0.01)
+    var_eps_y = kwargs.get("var_eps_y", 1.0)
 
     # Specification of main regression function
     def f_reg(w):
-        res = 2.5 + 0.74*w[:, 0] + 0.25 * w[:, 1] + 0.137*(w[:, 2] + w[:, 3])
+        res = 2.5 + 0.74 * w[:, 0] + 0.25 * w[:, 1] + 0.137 * (w[:, 2] + w[:, 3])
         return res
 
     # Specification of prop score function
     def f_ps(w, xi):
-        res = xi*(-w[:, 0] + 0.1*w[:, 1] - 0.25*w[:, 2] - 0.1*w[:, 3])
+        res = xi * (-w[:, 0] + 0.1 * w[:, 1] - 0.25 * w[:, 2] - 0.1 * w[:, 3])
         return res
+
     # observed covariates
     cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
-    z_tilde_1 = np.exp(0.5*x[:, 0])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
+    z_tilde_1 = np.exp(0.5 * x[:, 0])
     z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
-    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2]/25)**3
-    z_tilde_4 = (20 + x[:, 1] + x[:, 3])**2
+    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
+    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
     z_tilde_5 = x[:, 4]
     z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, z_tilde_5))
     z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
@@ -1041,36 +1199,38 @@ def f_ps(w, xi):
 
     p = np.exp(f_ps(features_ps, xi)) / (1 + np.exp(f_ps(features_ps, xi)))
     # compute short and long form of propensity score
-    m_long = p + gamma_a*a
+    m_long = p + gamma_a * a
     m_short = p
     # check propensity score bounds
     if np.any(m_long < trimming_threshold) or np.any(m_long > 1.0 - trimming_threshold):
         m_long = np.clip(m_long, trimming_threshold, 1.0 - trimming_threshold)
         m_short = np.clip(m_short, trimming_threshold, 1.0 - trimming_threshold)
-        warnings.warn(f'Propensity score is close to 0 or 1. '
-                      f'Trimming is at {trimming_threshold} and {1.0-trimming_threshold} is applied')
+        warnings.warn(
+            f"Propensity score is close to 0 or 1. "
+            f"Trimming is at {trimming_threshold} and {1.0 - trimming_threshold} is applied"
+        )
     # generate treatment based on long form
     u = np.random.uniform(low=0, high=1, size=n_obs)
     d = 1.0 * (m_long >= u)
     # add treatment heterogeneity
     d1x = z[:, 4] + 1
-    var_dx = np.var(d*(d1x))
+    var_dx = np.var(d * (d1x))
     cov_adx = gamma_a * var_a
     # Outcome regression
     g_partial_reg = f_reg(features_reg)
     # short model
     g_short_d0 = g_partial_reg
     g_short_d1 = (theta + beta_a * cov_adx / var_dx) * d1x + g_partial_reg
-    g_short = d * g_short_d1 + (1.0-d) * g_short_d0
+    g_short = d * g_short_d1 + (1.0 - d) * g_short_d0
     # long model
     g_long_d0 = g_partial_reg + beta_a * a
     g_long_d1 = theta * d1x + g_partial_reg + beta_a * a
-    g_long = d * g_long_d1 + (1.0-d) * g_long_d0
+    g_long = d * g_long_d1 + (1.0 - d) * g_long_d0
     # Potential outcomes
     y_0 = g_long_d0 + eps_y
     y_1 = g_long_d1 + eps_y
     # Realized outcome
-    y = d * y_1 + (1.0-d) * y_0
+    y = d * y_1 + (1.0 - d) * y_0
     # In-sample values for confounding strength
     explained_residual_variance = np.square(g_long - g_short)
     residual_variance = np.square(y - g_short)
@@ -1085,7 +1245,9 @@ def f_ps(w, xi):
     propensity_ratio_short = m_short / (1.0 - m_short)
     rr_short_ate = d / m_short - (1.0 - d) / (1.0 - m_short)
     rr_short_atte = treated_weight - np.multiply(untreated_weight, propensity_ratio_short)
-    cf_d_ate = (np.mean(1/(m_long * (1 - m_long))) - np.mean(1/(m_short * (1 - m_short)))) / np.mean(1/(m_long * (1 - m_long)))
+    cf_d_ate = (np.mean(1 / (m_long * (1 - m_long))) - np.mean(1 / (m_short * (1 - m_short)))) / np.mean(
+        1 / (m_long * (1 - m_long))
+    )
     cf_d_atte = (np.mean(propensity_ratio_long) - np.mean(propensity_ratio_short)) / np.mean(propensity_ratio_long)
     if (beta_a == 0) | (gamma_a == 0):
         rho_ate = 0.0
@@ -1094,28 +1256,23 @@ def f_ps(w, xi):
         rho_ate = np.corrcoef((g_long - g_short), (rr_long_ate - rr_short_ate))[0, 1]
         rho_atte = np.corrcoef((g_long - g_short), (rr_long_atte - rr_short_atte))[0, 1]
     oracle_values = {
-        'g_long': g_long,
-        'g_short': g_short,
-        'm_long': m_long,
-        'm_short': m_short,
-        'gamma_a': gamma_a,
-        'beta_a': beta_a,
-        'a': a,
-        'y_0': y_0,
-        'y_1': y_1,
-        'z': z,
-        'cf_y': cf_y,
-        'cf_d_ate': cf_d_ate,
-        'cf_d_atte': cf_d_atte,
-        'rho_ate': rho_ate,
-        'rho_atte': rho_atte,
-    }
-    res_dict = {
-        'x': x,
-        'y': y,
-        'd': d,
-        'oracle_values': oracle_values
+        "g_long": g_long,
+        "g_short": g_short,
+        "m_long": m_long,
+        "m_short": m_short,
+        "gamma_a": gamma_a,
+        "beta_a": beta_a,
+        "a": a,
+        "y_0": y_0,
+        "y_1": y_1,
+        "z": z,
+        "cf_y": cf_y,
+        "cf_d_ate": cf_d_ate,
+        "cf_d_atte": cf_d_atte,
+        "rho_ate": rho_ate,
+        "rho_atte": rho_atte,
     }
+    res_dict = {"x": x, "y": y, "d": d, "oracle_values": oracle_values}
     return res_dict
 
 
@@ -1208,17 +1365,23 @@ def make_confounded_plr_data(n_obs=500, theta=5.0, cf_y=0.04, cf_d=0.04, **kwarg
     Doubly robust difference-in-differences estimators. Journal of Econometrics, 219(1), 101-122.
     doi:`10.1016/j.jeconom.2020.06.003 <https://doi.org/10.1016/j.jeconom.2020.06.003>`_.
     """
-    c = kwargs.get('c', 0.0)
-    dim_x = kwargs.get('dim_x', 4)
+    c = kwargs.get("c", 0.0)
+    dim_x = kwargs.get("dim_x", 4)
 
     # observed covariates
     cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
 
-    z_tilde_1 = np.exp(0.5*x[:, 0])
+    z_tilde_1 = np.exp(0.5 * x[:, 0])
     z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
-    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2]/25)**3
-    z_tilde_4 = (20 + x[:, 1] + x[:, 3])**2
+    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
+    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
 
     z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, x[:, 4:]))
     z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
@@ -1235,7 +1398,7 @@ def make_confounded_plr_data(n_obs=500, theta=5.0, cf_y=0.04, cf_d=0.04, **kwarg
     var_a = np.square(a_bounds[1] - a_bounds[0]) / 12
 
     # get the required impact of the confounder on the propensity score
-    m_short = -z[:, 0] + 0.5*z[:, 1] - 0.25*z[:, 2] - 0.1*z[:, 3]
+    m_short = -z[:, 0] + 0.5 * z[:, 1] - 0.25 * z[:, 2] - 0.1 * z[:, 3]
 
     def f_m(gamma_a):
         rr_long = eps_d / var_eps_d
@@ -1244,40 +1407,39 @@ def f_m(gamma_a):
         return np.square(C2_D / (1 + C2_D) - cf_d)
 
     gamma_a = minimize_scalar(f_m).x
-    m_long = m_short + gamma_a*a
+    m_long = m_short + gamma_a * a
     d = m_long + eps_d
 
     # short and long version of g
-    g_partial_reg = 210 + 27.4*z[:, 0] + 13.7*(z[:, 1] + z[:, 2] + z[:, 3])
+    g_partial_reg = 210 + 27.4 * z[:, 0] + 13.7 * (z[:, 1] + z[:, 2] + z[:, 3])
 
     var_d = np.var(d)
 
     def f_g(beta_a):
-        g_diff = beta_a * (a - gamma_a * (var_a/var_d) * d)
+        g_diff = beta_a * (a - gamma_a * (var_a / var_d) * d)
         y_diff = eps_y + g_diff
         return np.square(np.mean(np.square(g_diff)) / np.mean(np.square(y_diff)) - cf_y)
 
     beta_a = minimize_scalar(f_g).x
 
-    g_long = theta*d + g_partial_reg + beta_a*a
-    g_short = (theta + gamma_a*beta_a * var_a / var_d)*d + g_partial_reg
+    g_long = theta * d + g_partial_reg + beta_a * a
+    g_short = (theta + gamma_a * beta_a * var_a / var_d) * d + g_partial_reg
 
     y = g_long + eps_y
 
-    oracle_values = {'g_long': g_long,
-                     'g_short': g_short,
-                     'm_long': m_long,
-                     'm_short': m_short,
-                     'theta': theta,
-                     'gamma_a': gamma_a,
-                     'beta_a': beta_a,
-                     'a': a,
-                     'z': z}
-
-    res_dict = {'x': x,
-                'y': y,
-                'd': d,
-                'oracle_values': oracle_values}
+    oracle_values = {
+        "g_long": g_long,
+        "g_short": g_short,
+        "m_long": m_long,
+        "m_short": m_short,
+        "theta": theta,
+        "gamma_a": gamma_a,
+        "beta_a": beta_a,
+        "a": a,
+        "z": z,
+    }
+
+    res_dict = {"x": x, "y": y, "d": d, "oracle_values": oracle_values}
 
     return res_dict
 
@@ -1346,14 +1508,16 @@ def make_heterogeneous_data(n_obs=200, p=30, support_size=5, n_x=1, binary_treat
 
     """
     # simple input checks
-    assert n_x in [1, 2], 'n_x must be either 1 or 2.'
-    assert support_size <= p, 'support_size must be smaller than p.'
-    assert isinstance(binary_treatment, bool), 'binary_treatment must be a boolean.'
+    assert n_x in [1, 2], "n_x must be either 1 or 2."
+    assert support_size <= p, "support_size must be smaller than p."
+    assert isinstance(binary_treatment, bool), "binary_treatment must be a boolean."
 
     # define treatment effects
     if n_x == 1:
+
         def treatment_effect(x):
             return np.exp(2 * x[:, 0]) + 3 * np.sin(4 * x[:, 0])
+
     else:
         assert n_x == 2
 
@@ -1383,23 +1547,16 @@ def treatment_effect(x):
     y = te * d + np.dot(x[:, support_y], coefs_y) + epsilon
 
     # Now we build the dataset
-    y_df = pd.DataFrame({'y': y})
-    d_df = pd.DataFrame({'d': d})
-    x_df = pd.DataFrame(
-        data=x,
-        index=np.arange(x.shape[0]),
-        columns=[f'X_{i}' for i in range(x.shape[1])]
-    )
+    y_df = pd.DataFrame({"y": y})
+    d_df = pd.DataFrame({"d": d})
+    x_df = pd.DataFrame(data=x, index=np.arange(x.shape[0]), columns=[f"X_{i}" for i in range(x.shape[1])])
 
     data = pd.concat([y_df, d_df, x_df], axis=1)
-    res_dict = {
-        'data': data,
-        'effects': te,
-        'treatment_effect': treatment_effect}
+    res_dict = {"data": data, "effects": te, "treatment_effect": treatment_effect}
     return res_dict
 
 
-def make_ssm_data(n_obs=8000, dim_x=100, theta=1, mar=True, return_type='DoubleMLData'):
+def make_ssm_data(n_obs=8000, dim_x=100, theta=1, mar=True, return_type="DoubleMLData"):
     """
     Generates data from a sample selection model (SSM).
     The data generating process is defined as
@@ -1456,7 +1613,13 @@ def make_ssm_data(n_obs=8000, dim_x=100, theta=1, mar=True, return_type='DoubleM
     e = np.random.multivariate_normal(mean=[0, 0], cov=sigma, size=n_obs).T
 
     cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
 
     beta = [0.4 / (k**2) for k in range(1, dim_x + 1)]
 
@@ -1470,21 +1633,19 @@ def make_ssm_data(n_obs=8000, dim_x=100, theta=1, mar=True, return_type='DoubleM
     if return_type in _array_alias:
         return x, y, d, z, s
     elif return_type in _data_frame_alias + _dml_data_alias:
-        x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
+        x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
         if mar:
-            data = pd.DataFrame(np.column_stack((x, y, d, s)),
-                                columns=x_cols + ['y', 'd', 's'])
+            data = pd.DataFrame(np.column_stack((x, y, d, s)), columns=x_cols + ["y", "d", "s"])
         else:
-            data = pd.DataFrame(np.column_stack((x, y, d, z, s)),
-                                columns=x_cols + ['y', 'd', 'z', 's'])
+            data = pd.DataFrame(np.column_stack((x, y, d, z, s)), columns=x_cols + ["y", "d", "z", "s"])
         if return_type in _data_frame_alias:
             return data
         else:
             if mar:
-                return DoubleMLData(data, 'y', 'd', x_cols, None, None, 's')
-            return DoubleMLData(data, 'y', 'd', x_cols, 'z', None, 's')
+                return DoubleMLData(data, "y", "d", x_cols, None, None, "s")
+            return DoubleMLData(data, "y", "d", x_cols, "z", None, "s")
     else:
-        raise ValueError('Invalid return_type.')
+        raise ValueError("Invalid return_type.")
 
 
 def make_irm_data_discrete_treatments(n_obs=200, n_levels=3, linear=False, random_state=None, **kwargs):
@@ -1573,25 +1734,31 @@ def make_irm_data_discrete_treatments(n_obs=200, n_levels=3, linear=False, rando
     """
     if random_state is not None:
         np.random.seed(random_state)
-    xi = kwargs.get('xi', 0.3)
-    c = kwargs.get('c', 0.0)
-    dim_x = kwargs.get('dim_x', 5)
+    xi = kwargs.get("xi", 0.3)
+    c = kwargs.get("c", 0.0)
+    dim_x = kwargs.get("dim_x", 5)
 
     if not isinstance(n_levels, int):
-        raise ValueError('n_levels must be an integer.')
+        raise ValueError("n_levels must be an integer.")
     if n_levels < 2:
-        raise ValueError('n_levels must be at least 2.')
+        raise ValueError("n_levels must be at least 2.")
 
     # observed covariates
     cov_mat = toeplitz([np.power(c, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
 
     def f_reg(w):
-        res = 210 + 27.4*w[:, 0] + 13.7*(w[:, 1] + w[:, 2] + w[:, 3])
+        res = 210 + 27.4 * w[:, 0] + 13.7 * (w[:, 1] + w[:, 2] + w[:, 3])
         return res
 
     def f_treatment(w, xi):
-        res = xi * (-w[:, 0] + 0.5*w[:, 1] - 0.25*w[:, 2] - 0.1*w[:, 3])
+        res = xi * (-w[:, 0] + 0.5 * w[:, 1] - 0.25 * w[:, 2] - 0.1 * w[:, 3])
         return res
 
     def treatment_effect(d, scale=15):
@@ -1599,8 +1766,8 @@ def treatment_effect(d, scale=15):
 
     z_tilde_1 = np.exp(0.5 * x[:, 0])
     z_tilde_2 = 10 + x[:, 1] / (1 + np.exp(x[:, 0]))
-    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2]/25)**3
-    z_tilde_4 = (20 + x[:, 1] + x[:, 3])**2
+    z_tilde_3 = (0.6 + x[:, 0] * x[:, 2] / 25) ** 3
+    z_tilde_4 = (20 + x[:, 1] + x[:, 3]) ** 2
 
     z_tilde = np.column_stack((z_tilde_1, z_tilde_2, z_tilde_3, z_tilde_4, x[:, 4:]))
     z = (z_tilde - np.mean(z_tilde, axis=0)) / np.std(z_tilde, axis=0)
@@ -1623,7 +1790,7 @@ def treatment_effect(d, scale=15):
     level_bounds = np.quantile(cont_d, q=np.linspace(0, 1, n_levels + 1))
     potential_level = sum([1.0 * (cont_d >= bound) for bound in level_bounds[1:-1]]) + 1
     eta = np.random.uniform(0, 1, size=n_obs)
-    d = 1.0 * (eta >= 1/n_levels) * potential_level
+    d = 1.0 * (eta >= 1 / n_levels) * potential_level
 
     ite = treatment_effect(cont_d)
     y0 = g + eps_y
@@ -1631,18 +1798,13 @@ def treatment_effect(d, scale=15):
     y = ite * (d > 0) + y0
 
     oracle_values = {
-        'cont_d': cont_d,
-        'level_bounds': level_bounds,
-        'potential_level': potential_level,
-        'ite': ite,
-        'y0': y0,
+        "cont_d": cont_d,
+        "level_bounds": level_bounds,
+        "potential_level": potential_level,
+        "ite": ite,
+        "y0": y0,
     }
 
-    resul_dict = {
-        'x': x,
-        'y': y,
-        'd': d,
-        'oracle_values': oracle_values
-    }
+    resul_dict = {"x": x, "y": y, "d": d, "oracle_values": oracle_values}
 
     return resul_dict
diff --git a/doubleml/did/did.py b/doubleml/did/did.py
index c62c8e329..495f63c8a 100644
--- a/doubleml/did/did.py
+++ b/doubleml/did/did.py
@@ -1,14 +1,14 @@
+import warnings
+
 import numpy as np
 from sklearn.utils import check_X_y
 from sklearn.utils.multiclass import type_of_target
-import warnings
 
 from ..double_ml import DoubleML
 from ..double_ml_data import DoubleMLData
 from ..double_ml_score_mixins import LinearScoreMixin
-
-from ..utils._estimation import _dml_cv_predict, _get_cond_smpls, _dml_tune, _trimm
-from ..utils._checks import _check_score, _check_trimming, _check_finite_predictions, _check_is_propensity
+from ..utils._checks import _check_finite_predictions, _check_is_propensity, _check_score, _check_trimming
+from ..utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls, _trimm
 
 
 class DoubleMLDID(LinearScoreMixin, DoubleML):
@@ -77,31 +77,32 @@ class DoubleMLDID(LinearScoreMixin, DoubleML):
            coef   std err         t     P>|t|     2.5 %   97.5 %
     d -2.685104  1.798071 -1.493325  0.135352 -6.209257  0.83905
     """
-    def __init__(self,
-                 obj_dml_data,
-                 ml_g,
-                 ml_m=None,
-                 n_folds=5,
-                 n_rep=1,
-                 score='observational',
-                 in_sample_normalization=True,
-                 trimming_rule='truncate',
-                 trimming_threshold=1e-2,
-                 draw_sample_splitting=True):
-        super().__init__(obj_dml_data,
-                         n_folds,
-                         n_rep,
-                         score,
-                         draw_sample_splitting)
+
+    def __init__(
+        self,
+        obj_dml_data,
+        ml_g,
+        ml_m=None,
+        n_folds=5,
+        n_rep=1,
+        score="observational",
+        in_sample_normalization=True,
+        trimming_rule="truncate",
+        trimming_threshold=1e-2,
+        draw_sample_splitting=True,
+    ):
+        super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
 
         self._check_data(self._dml_data)
-        valid_scores = ['observational', 'experimental']
+        valid_scores = ["observational", "experimental"]
         _check_score(self.score, valid_scores, allow_callable=False)
 
         self._in_sample_normalization = in_sample_normalization
         if not isinstance(self.in_sample_normalization, bool):
-            raise TypeError('in_sample_normalization indicator has to be boolean. ' +
-                            f'Object of type {str(type(self.in_sample_normalization))} passed.')
+            raise TypeError(
+                "in_sample_normalization indicator has to be boolean. "
+                + f"Object of type {str(type(self.in_sample_normalization))} passed."
+            )
 
         # set stratication for resampling
         self._strata = self._dml_data.d
@@ -109,28 +110,34 @@ def __init__(self,
             self.draw_sample_splitting()
 
         # check learners
-        ml_g_is_classifier = self._check_learner(ml_g, 'ml_g', regressor=True, classifier=True)
-        if self.score == 'observational':
-            _ = self._check_learner(ml_m, 'ml_m', regressor=False, classifier=True)
-            self._learner = {'ml_g': ml_g, 'ml_m': ml_m}
+        ml_g_is_classifier = self._check_learner(ml_g, "ml_g", regressor=True, classifier=True)
+        if self.score == "observational":
+            _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True)
+            self._learner = {"ml_g": ml_g, "ml_m": ml_m}
         else:
-            assert self.score == 'experimental'
+            assert self.score == "experimental"
             if ml_m is not None:
-                warnings.warn(('A learner ml_m has been provided for score = "experimental" but will be ignored. '
-                               'A learner ml_m is not required for estimation.'))
-            self._learner = {'ml_g': ml_g}
+                warnings.warn(
+                    (
+                        'A learner ml_m has been provided for score = "experimental" but will be ignored. '
+                        "A learner ml_m is not required for estimation."
+                    )
+                )
+            self._learner = {"ml_g": ml_g}
 
         if ml_g_is_classifier:
             if obj_dml_data.binary_outcome:
-                self._predict_method = {'ml_g': 'predict_proba'}
+                self._predict_method = {"ml_g": "predict_proba"}
             else:
-                raise ValueError(f'The ml_g learner {str(ml_g)} was identified as classifier '
-                                 'but the outcome variable is not binary with values 0 and 1.')
+                raise ValueError(
+                    f"The ml_g learner {str(ml_g)} was identified as classifier "
+                    "but the outcome variable is not binary with values 0 and 1."
+                )
         else:
-            self._predict_method = {'ml_g': 'predict'}
+            self._predict_method = {"ml_g": "predict"}
 
-        if 'ml_m' in self._learner:
-            self._predict_method['ml_m'] = 'predict_proba'
+        if "ml_m" in self._learner:
+            self._predict_method["ml_m"] = "predict_proba"
         self._initialize_ml_nuisance_params()
 
         self._trimming_rule = trimming_rule
@@ -161,109 +168,118 @@ def trimming_threshold(self):
         return self._trimming_threshold
 
     def _initialize_ml_nuisance_params(self):
-        if self.score == 'observational':
-            valid_learner = ['ml_g0', 'ml_g1', 'ml_m']
+        if self.score == "observational":
+            valid_learner = ["ml_g0", "ml_g1", "ml_m"]
         else:
-            assert self.score == 'experimental'
-            valid_learner = ['ml_g0', 'ml_g1']
-        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
-                        for learner in valid_learner}
+            assert self.score == "experimental"
+            valid_learner = ["ml_g0", "ml_g1"]
+        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in valid_learner}
 
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
-            raise TypeError('For repeated outcomes the data must be of DoubleMLData type. '
-                            f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.')
+            raise TypeError(
+                "For repeated outcomes the data must be of DoubleMLData type. "
+                f"{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+            )
         if obj_dml_data.z_cols is not None:
-            raise ValueError('Incompatible data. ' +
-                             ' and '.join(obj_dml_data.z_cols) +
-                             ' have been set as instrumental variable(s). '
-                             'At the moment there are not DiD models with instruments implemented.')
-        one_treat = (obj_dml_data.n_treat == 1)
-        binary_treat = (type_of_target(obj_dml_data.d) == 'binary')
+            raise ValueError(
+                "Incompatible data. " + " and ".join(obj_dml_data.z_cols) + " have been set as instrumental variable(s). "
+                "At the moment there are not DiD models with instruments implemented."
+            )
+        one_treat = obj_dml_data.n_treat == 1
+        binary_treat = type_of_target(obj_dml_data.d) == "binary"
         zero_one_treat = np.all((np.power(obj_dml_data.d, 2) - obj_dml_data.d) == 0)
         if not (one_treat & binary_treat & zero_one_treat):
-            raise ValueError('Incompatible data. '
-                             'To fit an DID model with DML '
-                             'exactly one binary variable with values 0 and 1 '
-                             'needs to be specified as treatment variable.')
+            raise ValueError(
+                "Incompatible data. "
+                "To fit an DID model with DML "
+                "exactly one binary variable with values 0 and 1 "
+                "needs to be specified as treatment variable."
+            )
         return
 
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
 
         # nuisance g
         # get train indices for d == 0
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)
 
         # nuisance g for d==0
-        if external_predictions['ml_g0'] is not None:
-            g_hat0 = {'preds': external_predictions['ml_g0'],
-                      'targets': None,
-                      'models': None}
+        if external_predictions["ml_g0"] is not None:
+            g_hat0 = {"preds": external_predictions["ml_g0"], "targets": None, "models": None}
         else:
-            g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv,
-                                     est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
-                                     return_models=return_models)
-
-            _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
+            g_hat0 = _dml_cv_predict(
+                self._learner["ml_g"],
+                x,
+                y,
+                smpls=smpls_d0,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_g0"),
+                method=self._predict_method["ml_g"],
+                return_models=return_models,
+            )
+
+            _check_finite_predictions(g_hat0["preds"], self._learner["ml_g"], "ml_g", smpls)
             # adjust target values to consider only compatible subsamples
-            g_hat0['targets'] = g_hat0['targets'].astype(float)
-            g_hat0['targets'][d == 1] = np.nan
+            g_hat0["targets"] = g_hat0["targets"].astype(float)
+            g_hat0["targets"][d == 1] = np.nan
 
         # nuisance g for d==1
-        if external_predictions['ml_g1'] is not None:
-            g_hat1 = {'preds': external_predictions['ml_g1'],
-                      'targets': None,
-                      'models': None}
+        if external_predictions["ml_g1"] is not None:
+            g_hat1 = {"preds": external_predictions["ml_g1"], "targets": None, "models": None}
         else:
-            g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
-                                     est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
-                                     return_models=return_models)
-
-            _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
+            g_hat1 = _dml_cv_predict(
+                self._learner["ml_g"],
+                x,
+                y,
+                smpls=smpls_d1,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_g1"),
+                method=self._predict_method["ml_g"],
+                return_models=return_models,
+            )
+
+            _check_finite_predictions(g_hat1["preds"], self._learner["ml_g"], "ml_g", smpls)
             # adjust target values to consider only compatible subsamples
-            g_hat1['targets'] = g_hat1['targets'].astype(float)
-            g_hat1['targets'][d == 0] = np.nan
+            g_hat1["targets"] = g_hat1["targets"].astype(float)
+            g_hat1["targets"][d == 0] = np.nan
 
         # only relevant for observational setting
-        m_hat = {'preds': None, 'targets': None, 'models': None}
-        if self.score == 'observational':
+        m_hat = {"preds": None, "targets": None, "models": None}
+        if self.score == "observational":
             # nuisance m
-            if external_predictions['ml_m'] is not None:
-                m_hat = {'preds': external_predictions['ml_m'],
-                         'targets': None,
-                         'models': None}
+            if external_predictions["ml_m"] is not None:
+                m_hat = {"preds": external_predictions["ml_m"], "targets": None, "models": None}
             else:
-                m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                        return_models=return_models)
-            _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
-            _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
-            m_hat['preds'] = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
+                m_hat = _dml_cv_predict(
+                    self._learner["ml_m"],
+                    x,
+                    d,
+                    smpls=smpls,
+                    n_jobs=n_jobs_cv,
+                    est_params=self._get_params("ml_m"),
+                    method=self._predict_method["ml_m"],
+                    return_models=return_models,
+                )
+            _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
+            _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
+            m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
 
         # nuisance estimates of the uncond. treatment prob.
-        p_hat = np.full_like(d, np.nan, dtype='float64')
+        p_hat = np.full_like(d, np.nan, dtype="float64")
         for train_index, test_index in smpls:
             p_hat[test_index] = np.mean(d[train_index])
 
-        psi_a, psi_b = self._score_elements(y, d, g_hat0['preds'], g_hat1['preds'], m_hat['preds'], p_hat)
-
-        psi_elements = {'psi_a': psi_a,
-                        'psi_b': psi_b}
-        preds = {'predictions': {'ml_g0': g_hat0['preds'],
-                                 'ml_g1': g_hat1['preds'],
-                                 'ml_m': m_hat['preds']},
-                 'targets': {'ml_g0': g_hat0['targets'],
-                             'ml_g1': g_hat1['targets'],
-                             'ml_m': m_hat['targets']},
-                 'models': {'ml_g0': g_hat0['models'],
-                            'ml_g1': g_hat1['models'],
-                            'ml_m': m_hat['models']
-                            }
-                 }
+        psi_a, psi_b = self._score_elements(y, d, g_hat0["preds"], g_hat1["preds"], m_hat["preds"], p_hat)
+
+        psi_elements = {"psi_a": psi_a, "psi_b": psi_b}
+        preds = {
+            "predictions": {"ml_g0": g_hat0["preds"], "ml_g1": g_hat1["preds"], "ml_m": m_hat["preds"]},
+            "targets": {"ml_g0": g_hat0["targets"], "ml_g1": g_hat1["targets"], "ml_m": m_hat["targets"]},
+            "models": {"ml_g0": g_hat0["models"], "ml_g1": g_hat1["models"], "ml_m": m_hat["models"]},
+        }
 
         return psi_elements, preds
 
@@ -271,35 +287,35 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, p_hat):
         # calc residuals
         resid_d0 = y - g_hat0
 
-        if self.score == 'observational':
+        if self.score == "observational":
             if self.in_sample_normalization:
                 weight_psi_a = np.divide(d, np.mean(d))
-                propensity_weight = np.multiply(1.0-d, np.divide(m_hat, 1.0-m_hat))
+                propensity_weight = np.multiply(1.0 - d, np.divide(m_hat, 1.0 - m_hat))
                 weight_resid_d0 = np.divide(d, np.mean(d)) - np.divide(propensity_weight, np.mean(propensity_weight))
             else:
                 weight_psi_a = np.divide(d, p_hat)
-                weight_resid_d0 = np.divide(d-m_hat, np.multiply(p_hat, 1.0-m_hat))
+                weight_resid_d0 = np.divide(d - m_hat, np.multiply(p_hat, 1.0 - m_hat))
 
             psi_b_1 = np.zeros_like(y)
 
         else:
-            assert self.score == 'experimental'
+            assert self.score == "experimental"
             if self.in_sample_normalization:
                 weight_psi_a = np.ones_like(y)
                 weight_g0 = np.divide(d, np.mean(d)) - 1.0
                 weight_g1 = 1.0 - np.divide(d, np.mean(d))
-                weight_resid_d0 = np.divide(d, np.mean(d)) - np.divide(1.0-d, np.mean(1.0-d))
+                weight_resid_d0 = np.divide(d, np.mean(d)) - np.divide(1.0 - d, np.mean(1.0 - d))
             else:
                 weight_psi_a = np.ones_like(y)
                 weight_g0 = np.divide(d, p_hat) - 1.0
                 weight_g1 = 1.0 - np.divide(d, p_hat)
-                weight_resid_d0 = np.divide(d-p_hat, np.multiply(p_hat, 1.0-p_hat))
+                weight_resid_d0 = np.divide(d - p_hat, np.multiply(p_hat, 1.0 - p_hat))
 
-            psi_b_1 = np.multiply(weight_g0,  g_hat0) + np.multiply(weight_g1,  g_hat1)
+            psi_b_1 = np.multiply(weight_g0, g_hat0) + np.multiply(weight_g1, g_hat1)
 
         # set score elements
         psi_a = -1.0 * weight_psi_a
-        psi_b = psi_b_1 + np.multiply(weight_resid_d0,  resid_d0)
+        psi_b = psi_b_1 + np.multiply(weight_resid_d0, resid_d0)
 
         return psi_a, psi_b
 
@@ -307,92 +323,112 @@ def _sensitivity_element_est(self, preds):
         y = self._dml_data.y
         d = self._dml_data.d
 
-        m_hat = preds['predictions']['ml_m']
-        g_hat0 = preds['predictions']['ml_g0']
-        g_hat1 = preds['predictions']['ml_g1']
+        m_hat = preds["predictions"]["ml_m"]
+        g_hat0 = preds["predictions"]["ml_g0"]
+        g_hat1 = preds["predictions"]["ml_g1"]
 
-        g_hat = np.multiply(d, g_hat1) + np.multiply(1.0-d, g_hat0)
+        g_hat = np.multiply(d, g_hat1) + np.multiply(1.0 - d, g_hat0)
         sigma2_score_element = np.square(y - g_hat)
         sigma2 = np.mean(sigma2_score_element)
         psi_sigma2 = sigma2_score_element - sigma2
 
         # calc m(W,alpha) and Riesz representer
         p_hat = np.mean(d)
-        if self.score == 'observational':
-            propensity_weight_d0 = np.divide(m_hat, 1.0-m_hat)
+        if self.score == "observational":
+            propensity_weight_d0 = np.divide(m_hat, 1.0 - m_hat)
             if self.in_sample_normalization:
-                weight_d0 = np.multiply(1.0-d, propensity_weight_d0)
+                weight_d0 = np.multiply(1.0 - d, propensity_weight_d0)
                 mean_weight_d0 = np.mean(weight_d0)
 
-                m_alpha = np.multiply(np.divide(d, p_hat),
-                                      np.divide(1.0, p_hat) + np.divide(propensity_weight_d0, mean_weight_d0))
+                m_alpha = np.multiply(
+                    np.divide(d, p_hat), np.divide(1.0, p_hat) + np.divide(propensity_weight_d0, mean_weight_d0)
+                )
                 rr = np.divide(d, p_hat) - np.divide(weight_d0, mean_weight_d0)
             else:
                 m_alpha = np.multiply(np.divide(d, np.square(p_hat)), (1.0 + propensity_weight_d0))
-                rr = np.divide(d, p_hat) - np.multiply(np.divide(1.0-d, p_hat), propensity_weight_d0)
+                rr = np.divide(d, p_hat) - np.multiply(np.divide(1.0 - d, p_hat), propensity_weight_d0)
         else:
-            assert self.score == 'experimental'
+            assert self.score == "experimental"
             # the same with or without self-normalization
-            m_alpha = np.divide(1.0, p_hat) + np.divide(1.0, 1.0-p_hat)
-            rr = np.divide(d, p_hat) - np.divide(1.0-d, 1.0-p_hat)
+            m_alpha = np.divide(1.0, p_hat) + np.divide(1.0, 1.0 - p_hat)
+            rr = np.divide(d, p_hat) - np.divide(1.0 - d, 1.0 - p_hat)
 
         nu2_score_element = np.multiply(2.0, m_alpha) - np.square(rr)
         nu2 = np.mean(nu2_score_element)
         psi_nu2 = nu2_score_element - nu2
 
-        element_dict = {'sigma2': sigma2,
-                        'nu2': nu2,
-                        'psi_sigma2': psi_sigma2,
-                        'psi_nu2': psi_nu2,
-                        'riesz_rep': rr,
-                        }
+        element_dict = {
+            "sigma2": sigma2,
+            "nu2": nu2,
+            "psi_sigma2": psi_sigma2,
+            "psi_nu2": psi_nu2,
+            "riesz_rep": rr,
+        }
         return element_dict
 
-    def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                         search_mode, n_iter_randomized_search):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
+    def _nuisance_tuning(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
         # get train indices for d == 0 and d == 1
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)
 
         if scoring_methods is None:
-            scoring_methods = {'ml_g': None,
-                               'ml_m': None}
+            scoring_methods = {"ml_g": None, "ml_m": None}
 
         train_inds = [train_index for (train_index, _) in smpls]
         train_inds_d0 = [train_index for (train_index, _) in smpls_d0]
         train_inds_d1 = [train_index for (train_index, _) in smpls_d1]
-        g0_tune_res = _dml_tune(y, x, train_inds_d0,
-                                self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'],
-                                n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        g1_tune_res = _dml_tune(y, x, train_inds_d1,
-                                self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'],
-                                n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        g0_tune_res = _dml_tune(
+            y,
+            x,
+            train_inds_d0,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        g1_tune_res = _dml_tune(
+            y,
+            x,
+            train_inds_d1,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
 
         g0_best_params = [xx.best_params_ for xx in g0_tune_res]
         g1_best_params = [xx.best_params_ for xx in g1_tune_res]
 
         m_tune_res = list()
-        if self.score == 'observational':
-            m_tune_res = _dml_tune(d, x, train_inds,
-                                   self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'],
-                                   n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        if self.score == "observational":
+            m_tune_res = _dml_tune(
+                d,
+                x,
+                train_inds,
+                self._learner["ml_m"],
+                param_grids["ml_m"],
+                scoring_methods["ml_m"],
+                n_folds_tune,
+                n_jobs_cv,
+                search_mode,
+                n_iter_randomized_search,
+            )
             m_best_params = [xx.best_params_ for xx in m_tune_res]
-            params = {'ml_g0': g0_best_params,
-                      'ml_g1': g1_best_params,
-                      'ml_m': m_best_params}
-            tune_res = {'g0_tune': g0_tune_res,
-                        'g1_tune': g1_tune_res,
-                        'm_tune': m_tune_res}
+            params = {"ml_g0": g0_best_params, "ml_g1": g1_best_params, "ml_m": m_best_params}
+            tune_res = {"g0_tune": g0_tune_res, "g1_tune": g1_tune_res, "m_tune": m_tune_res}
         else:
-            params = {'ml_g0': g0_best_params,
-                      'ml_g1': g1_best_params}
-            tune_res = {'g0_tune': g0_tune_res,
-                        'g1_tune': g1_tune_res}
+            params = {"ml_g0": g0_best_params, "ml_g1": g1_best_params}
+            tune_res = {"g0_tune": g0_tune_res, "g1_tune": g1_tune_res}
 
-        res = {'params': params,
-               'tune_res': tune_res}
+        res = {"params": params, "tune_res": tune_res}
 
         return res
diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py
index b1cd3d4f5..2b7b5b12c 100644
--- a/doubleml/did/did_cs.py
+++ b/doubleml/did/did_cs.py
@@ -1,14 +1,14 @@
+import warnings
+
 import numpy as np
 from sklearn.utils import check_X_y
 from sklearn.utils.multiclass import type_of_target
-import warnings
 
 from ..double_ml import DoubleML
 from ..double_ml_data import DoubleMLData
 from ..double_ml_score_mixins import LinearScoreMixin
-
-from ..utils._estimation import _dml_cv_predict, _trimm, _get_cond_smpls_2d, _dml_tune
-from ..utils._checks import _check_score, _check_trimming, _check_finite_predictions, _check_is_propensity
+from ..utils._checks import _check_finite_predictions, _check_is_propensity, _check_score, _check_trimming
+from ..utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d, _trimm
 
 
 class DoubleMLDIDCS(LinearScoreMixin, DoubleML):
@@ -77,31 +77,32 @@ class DoubleMLDIDCS(LinearScoreMixin, DoubleML):
            coef   std err         t     P>|t|      2.5 %     97.5 %
     d -6.604603  8.725802 -0.756905  0.449107 -23.706862  10.497655
     """
-    def __init__(self,
-                 obj_dml_data,
-                 ml_g,
-                 ml_m=None,
-                 n_folds=5,
-                 n_rep=1,
-                 score='observational',
-                 in_sample_normalization=True,
-                 trimming_rule='truncate',
-                 trimming_threshold=1e-2,
-                 draw_sample_splitting=True):
-        super().__init__(obj_dml_data,
-                         n_folds,
-                         n_rep,
-                         score,
-                         draw_sample_splitting)
+
+    def __init__(
+        self,
+        obj_dml_data,
+        ml_g,
+        ml_m=None,
+        n_folds=5,
+        n_rep=1,
+        score="observational",
+        in_sample_normalization=True,
+        trimming_rule="truncate",
+        trimming_threshold=1e-2,
+        draw_sample_splitting=True,
+    ):
+        super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
 
         self._check_data(self._dml_data)
-        valid_scores = ['observational', 'experimental']
+        valid_scores = ["observational", "experimental"]
         _check_score(self.score, valid_scores, allow_callable=False)
 
         self._in_sample_normalization = in_sample_normalization
         if not isinstance(self.in_sample_normalization, bool):
-            raise TypeError('in_sample_normalization indicator has to be boolean. ' +
-                            f'Object of type {str(type(self.in_sample_normalization))} passed.')
+            raise TypeError(
+                "in_sample_normalization indicator has to be boolean. "
+                + f"Object of type {str(type(self.in_sample_normalization))} passed."
+            )
 
         # set stratication for resampling
         self._strata = self._dml_data.d.reshape(-1, 1) + 2 * self._dml_data.t.reshape(-1, 1)
@@ -109,28 +110,34 @@ def __init__(self,
             self.draw_sample_splitting()
 
         # check learners
-        ml_g_is_classifier = self._check_learner(ml_g, 'ml_g', regressor=True, classifier=True)
-        if self.score == 'observational':
-            _ = self._check_learner(ml_m, 'ml_m', regressor=False, classifier=True)
-            self._learner = {'ml_g': ml_g, 'ml_m': ml_m}
+        ml_g_is_classifier = self._check_learner(ml_g, "ml_g", regressor=True, classifier=True)
+        if self.score == "observational":
+            _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True)
+            self._learner = {"ml_g": ml_g, "ml_m": ml_m}
         else:
-            assert self.score == 'experimental'
+            assert self.score == "experimental"
             if ml_m is not None:
-                warnings.warn(('A learner ml_m has been provided for score = "experimental" but will be ignored. '
-                               'A learner ml_m is not required for estimation.'))
-            self._learner = {'ml_g': ml_g}
+                warnings.warn(
+                    (
+                        'A learner ml_m has been provided for score = "experimental" but will be ignored. '
+                        "A learner ml_m is not required for estimation."
+                    )
+                )
+            self._learner = {"ml_g": ml_g}
 
         if ml_g_is_classifier:
             if obj_dml_data.binary_outcome:
-                self._predict_method = {'ml_g': 'predict_proba'}
+                self._predict_method = {"ml_g": "predict_proba"}
             else:
-                raise ValueError(f'The ml_g learner {str(ml_g)} was identified as classifier '
-                                 'but the outcome variable is not binary with values 0 and 1.')
+                raise ValueError(
+                    f"The ml_g learner {str(ml_g)} was identified as classifier "
+                    "but the outcome variable is not binary with values 0 and 1."
+                )
         else:
-            self._predict_method = {'ml_g': 'predict'}
+            self._predict_method = {"ml_g": "predict"}
 
-        if 'ml_m' in self._learner:
-            self._predict_method['ml_m'] = 'predict_proba'
+        if "ml_m" in self._learner:
+            self._predict_method["ml_m"] = "predict_proba"
         self._initialize_ml_nuisance_params()
 
         self._trimming_rule = trimming_rule
@@ -162,157 +169,190 @@ def trimming_threshold(self):
         return self._trimming_threshold
 
     def _initialize_ml_nuisance_params(self):
-        if self.score == 'observational':
-            valid_learner = ['ml_g_d0_t0', 'ml_g_d0_t1',
-                             'ml_g_d1_t0', 'ml_g_d1_t1', 'ml_m']
+        if self.score == "observational":
+            valid_learner = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1", "ml_m"]
         else:
-            assert self.score == 'experimental'
-            valid_learner = ['ml_g_d0_t0', 'ml_g_d0_t1',
-                             'ml_g_d1_t0', 'ml_g_d1_t1']
-        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
-                        for learner in valid_learner}
+            assert self.score == "experimental"
+            valid_learner = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1"]
+        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in valid_learner}
 
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
-            raise TypeError('For repeated cross sections the data must be of DoubleMLData type. '
-                            f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.')
+            raise TypeError(
+                "For repeated cross sections the data must be of DoubleMLData type. "
+                f"{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+            )
         if obj_dml_data.z_cols is not None:
-            raise ValueError('Incompatible data. ' +
-                             ' and '.join(obj_dml_data.z_cols) +
-                             ' have been set as instrumental variable(s). '
-                             'At the moment there are no DiD models with instruments implemented.')
-        one_treat = (obj_dml_data.n_treat == 1)
-        binary_treat = (type_of_target(obj_dml_data.d) == 'binary')
-        zero_one_treat = np.all(
-            (np.power(obj_dml_data.d, 2) - obj_dml_data.d) == 0)
+            raise ValueError(
+                "Incompatible data. " + " and ".join(obj_dml_data.z_cols) + " have been set as instrumental variable(s). "
+                "At the moment there are no DiD models with instruments implemented."
+            )
+        one_treat = obj_dml_data.n_treat == 1
+        binary_treat = type_of_target(obj_dml_data.d) == "binary"
+        zero_one_treat = np.all((np.power(obj_dml_data.d, 2) - obj_dml_data.d) == 0)
         if not (one_treat & binary_treat & zero_one_treat):
-            raise ValueError('Incompatible data. '
-                             'To fit an DIDCS model with DML '
-                             'exactly one binary variable with values 0 and 1 '
-                             'needs to be specified as treatment variable.')
+            raise ValueError(
+                "Incompatible data. "
+                "To fit an DIDCS model with DML "
+                "exactly one binary variable with values 0 and 1 "
+                "needs to be specified as treatment variable."
+            )
 
-        binary_time = (type_of_target(obj_dml_data.t) == 'binary')
-        zero_one_time = np.all(
-            (np.power(obj_dml_data.t, 2) - obj_dml_data.t) == 0)
+        binary_time = type_of_target(obj_dml_data.t) == "binary"
+        zero_one_time = np.all((np.power(obj_dml_data.t, 2) - obj_dml_data.t) == 0)
 
         if not (binary_time & zero_one_time):
-            raise ValueError('Incompatible data. '
-                             'To fit an DIDCS model with DML '
-                             'exactly one binary variable with values 0 and 1 '
-                             'needs to be specified as time variable.')
+            raise ValueError(
+                "Incompatible data. "
+                "To fit an DIDCS model with DML "
+                "exactly one binary variable with values 0 and 1 "
+                "needs to be specified as time variable."
+            )
 
         return
 
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
-        x, t = check_X_y(x, self._dml_data.t,
-                         force_all_finite=False)
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
+        x, t = check_X_y(x, self._dml_data.t, force_all_finite=False)
 
         # THIS DIFFERS FROM THE PAPER due to stratified splitting this should be the same for each fold
         # nuisance estimates of the uncond. treatment prob.
-        p_hat = np.full_like(d, np.nan, dtype='float64')
+        p_hat = np.full_like(d, np.nan, dtype="float64")
         for train_index, test_index in smpls:
             p_hat[test_index] = np.mean(d[train_index])
 
         # nuisance estimates of the uncond. time prob.
-        lambda_hat = np.full_like(t, np.nan, dtype='float64')
+        lambda_hat = np.full_like(t, np.nan, dtype="float64")
         for train_index, test_index in smpls:
             lambda_hat[test_index] = np.mean(t[train_index])
 
         # nuisance g
         smpls_d0_t0, smpls_d0_t1, smpls_d1_t0, smpls_d1_t1 = _get_cond_smpls_2d(smpls, d, t)
-        if external_predictions['ml_g_d0_t0'] is not None:
-            g_hat_d0_t0 = {'preds': external_predictions['ml_g_d0_t0'],
-                           'targets': None,
-                           'models': None}
+        if external_predictions["ml_g_d0_t0"] is not None:
+            g_hat_d0_t0 = {"preds": external_predictions["ml_g_d0_t0"], "targets": None, "models": None}
         else:
-            g_hat_d0_t0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d0_t0, n_jobs=n_jobs_cv,
-                                          est_params=self._get_params('ml_g_d0_t0'), method=self._predict_method['ml_g'],
-                                          return_models=return_models)
-
-            g_hat_d0_t0['targets'] = g_hat_d0_t0['targets'].astype(float)
-            g_hat_d0_t0['targets'][np.invert((d == 0) & (t == 0))] = np.nan
-        if external_predictions['ml_g_d0_t1'] is not None:
-            g_hat_d0_t1 = {'preds': external_predictions['ml_g_d0_t1'],
-                           'targets': None,
-                           'models': None}
+            g_hat_d0_t0 = _dml_cv_predict(
+                self._learner["ml_g"],
+                x,
+                y,
+                smpls_d0_t0,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_g_d0_t0"),
+                method=self._predict_method["ml_g"],
+                return_models=return_models,
+            )
+
+            g_hat_d0_t0["targets"] = g_hat_d0_t0["targets"].astype(float)
+            g_hat_d0_t0["targets"][np.invert((d == 0) & (t == 0))] = np.nan
+        if external_predictions["ml_g_d0_t1"] is not None:
+            g_hat_d0_t1 = {"preds": external_predictions["ml_g_d0_t1"], "targets": None, "models": None}
         else:
-            g_hat_d0_t1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d0_t1, n_jobs=n_jobs_cv,
-                                          est_params=self._get_params('ml_g_d0_t1'), method=self._predict_method['ml_g'],
-                                          return_models=return_models)
-            g_hat_d0_t1['targets'] = g_hat_d0_t1['targets'].astype(float)
-            g_hat_d0_t1['targets'][np.invert((d == 0) & (t == 1))] = np.nan
-        if external_predictions['ml_g_d1_t0'] is not None:
-            g_hat_d1_t0 = {'preds': external_predictions['ml_g_d1_t0'],
-                           'targets': None,
-                           'models': None}
+            g_hat_d0_t1 = _dml_cv_predict(
+                self._learner["ml_g"],
+                x,
+                y,
+                smpls_d0_t1,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_g_d0_t1"),
+                method=self._predict_method["ml_g"],
+                return_models=return_models,
+            )
+            g_hat_d0_t1["targets"] = g_hat_d0_t1["targets"].astype(float)
+            g_hat_d0_t1["targets"][np.invert((d == 0) & (t == 1))] = np.nan
+        if external_predictions["ml_g_d1_t0"] is not None:
+            g_hat_d1_t0 = {"preds": external_predictions["ml_g_d1_t0"], "targets": None, "models": None}
         else:
-            g_hat_d1_t0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d1_t0, n_jobs=n_jobs_cv,
-                                          est_params=self._get_params('ml_g_d1_t0'), method=self._predict_method['ml_g'],
-                                          return_models=return_models)
-            g_hat_d1_t0['targets'] = g_hat_d1_t0['targets'].astype(float)
-            g_hat_d1_t0['targets'][np.invert((d == 1) & (t == 0))] = np.nan
-        if external_predictions['ml_g_d1_t1'] is not None:
-            g_hat_d1_t1 = {'preds': external_predictions['ml_g_d1_t1'],
-                           'targets': None,
-                           'models': None}
+            g_hat_d1_t0 = _dml_cv_predict(
+                self._learner["ml_g"],
+                x,
+                y,
+                smpls_d1_t0,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_g_d1_t0"),
+                method=self._predict_method["ml_g"],
+                return_models=return_models,
+            )
+            g_hat_d1_t0["targets"] = g_hat_d1_t0["targets"].astype(float)
+            g_hat_d1_t0["targets"][np.invert((d == 1) & (t == 0))] = np.nan
+        if external_predictions["ml_g_d1_t1"] is not None:
+            g_hat_d1_t1 = {"preds": external_predictions["ml_g_d1_t1"], "targets": None, "models": None}
         else:
-            g_hat_d1_t1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d1_t1, n_jobs=n_jobs_cv,
-                                          est_params=self._get_params('ml_g_d1_t1'), method=self._predict_method['ml_g'],
-                                          return_models=return_models)
-            g_hat_d1_t1['targets'] = g_hat_d1_t1['targets'].astype(float)
-            g_hat_d1_t1['targets'][np.invert((d == 1) & (t == 1))] = np.nan
+            g_hat_d1_t1 = _dml_cv_predict(
+                self._learner["ml_g"],
+                x,
+                y,
+                smpls_d1_t1,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_g_d1_t1"),
+                method=self._predict_method["ml_g"],
+                return_models=return_models,
+            )
+            g_hat_d1_t1["targets"] = g_hat_d1_t1["targets"].astype(float)
+            g_hat_d1_t1["targets"][np.invert((d == 1) & (t == 1))] = np.nan
 
         # only relevant for observational or experimental setting
-        m_hat = {'preds': None, 'targets': None, 'models': None}
-        if self.score == 'observational':
+        m_hat = {"preds": None, "targets": None, "models": None}
+        if self.score == "observational":
             # nuisance m
-            if external_predictions['ml_m'] is not None:
-                m_hat = {'preds': external_predictions['ml_m'],
-                         'targets': None,
-                         'models': None}
+            if external_predictions["ml_m"] is not None:
+                m_hat = {"preds": external_predictions["ml_m"], "targets": None, "models": None}
             else:
-                m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                        return_models=return_models)
-                _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
-                _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
-            m_hat['preds'] = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
-
-        psi_a, psi_b = self._score_elements(y, d, t,
-                                            g_hat_d0_t0['preds'], g_hat_d0_t1['preds'],
-                                            g_hat_d1_t0['preds'], g_hat_d1_t1['preds'],
-                                            m_hat['preds'], p_hat, lambda_hat)
-
-        psi_elements = {'psi_a': psi_a,
-                        'psi_b': psi_b}
-        preds = {'predictions': {'ml_g_d0_t0': g_hat_d0_t0['preds'],
-                                 'ml_g_d0_t1': g_hat_d0_t1['preds'],
-                                 'ml_g_d1_t0': g_hat_d1_t0['preds'],
-                                 'ml_g_d1_t1': g_hat_d1_t1['preds'],
-                                 'ml_m': m_hat['preds']},
-                 'targets': {'ml_g_d0_t0': g_hat_d0_t0['targets'],
-                             'ml_g_d0_t1': g_hat_d0_t1['targets'],
-                             'ml_g_d1_t0': g_hat_d1_t0['targets'],
-                             'ml_g_d1_t1': g_hat_d1_t1['targets'],
-                             'ml_m': m_hat['targets']},
-                 'models': {'ml_g_d0_t0': g_hat_d0_t0['models'],
-                            'ml_g_d0_t1': g_hat_d0_t1['models'],
-                            'ml_g_d1_t0': g_hat_d1_t0['models'],
-                            'ml_g_d1_t1': g_hat_d1_t1['models'],
-                            'ml_m': m_hat['models']}
-                 }
+                m_hat = _dml_cv_predict(
+                    self._learner["ml_m"],
+                    x,
+                    d,
+                    smpls=smpls,
+                    n_jobs=n_jobs_cv,
+                    est_params=self._get_params("ml_m"),
+                    method=self._predict_method["ml_m"],
+                    return_models=return_models,
+                )
+                _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
+                _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
+            m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
+
+        psi_a, psi_b = self._score_elements(
+            y,
+            d,
+            t,
+            g_hat_d0_t0["preds"],
+            g_hat_d0_t1["preds"],
+            g_hat_d1_t0["preds"],
+            g_hat_d1_t1["preds"],
+            m_hat["preds"],
+            p_hat,
+            lambda_hat,
+        )
+
+        psi_elements = {"psi_a": psi_a, "psi_b": psi_b}
+        preds = {
+            "predictions": {
+                "ml_g_d0_t0": g_hat_d0_t0["preds"],
+                "ml_g_d0_t1": g_hat_d0_t1["preds"],
+                "ml_g_d1_t0": g_hat_d1_t0["preds"],
+                "ml_g_d1_t1": g_hat_d1_t1["preds"],
+                "ml_m": m_hat["preds"],
+            },
+            "targets": {
+                "ml_g_d0_t0": g_hat_d0_t0["targets"],
+                "ml_g_d0_t1": g_hat_d0_t1["targets"],
+                "ml_g_d1_t0": g_hat_d1_t0["targets"],
+                "ml_g_d1_t1": g_hat_d1_t1["targets"],
+                "ml_m": m_hat["targets"],
+            },
+            "models": {
+                "ml_g_d0_t0": g_hat_d0_t0["models"],
+                "ml_g_d0_t1": g_hat_d0_t1["models"],
+                "ml_g_d1_t0": g_hat_d1_t0["models"],
+                "ml_g_d1_t1": g_hat_d1_t1["models"],
+                "ml_m": m_hat["models"],
+            },
+        }
 
         return psi_elements, preds
 
-    def _score_elements(self, y, d, t,
-                        g_hat_d0_t0, g_hat_d0_t1,
-                        g_hat_d1_t0, g_hat_d1_t1,
-                        m_hat, p_hat, lambda_hat):
-
+    def _score_elements(self, y, d, t, g_hat_d0_t0, g_hat_d0_t1, g_hat_d1_t0, g_hat_d1_t1, m_hat, p_hat, lambda_hat):
         # calculate residuals
         resid_d0_t0 = y - g_hat_d0_t0
         resid_d0_t1 = y - g_hat_d0_t1
@@ -320,11 +360,11 @@ def _score_elements(self, y, d, t,
         resid_d1_t1 = y - g_hat_d1_t1
 
         d1t1 = np.multiply(d, t)
-        d1t0 = np.multiply(d, 1.0-t)
-        d0t1 = np.multiply(1.0-d, t)
-        d0t0 = np.multiply(1.0-d, 1.0-t)
+        d1t0 = np.multiply(d, 1.0 - t)
+        d0t1 = np.multiply(1.0 - d, t)
+        d0t0 = np.multiply(1.0 - d, 1.0 - t)
 
-        if self.score == 'observational':
+        if self.score == "observational":
             if self.in_sample_normalization:
                 weight_psi_a = np.divide(d, np.mean(d))
                 weight_g_d1_t1 = weight_psi_a
@@ -335,7 +375,7 @@ def _score_elements(self, y, d, t,
                 weight_resid_d1_t1 = np.divide(d1t1, np.mean(d1t1))
                 weight_resid_d1_t0 = -1.0 * np.divide(d1t0, np.mean(d1t0))
 
-                prop_weighting = np.divide(m_hat, 1.0-m_hat)
+                prop_weighting = np.divide(m_hat, 1.0 - m_hat)
                 unscaled_d0_t1 = np.multiply(d0t1, prop_weighting)
                 weight_resid_d0_t1 = -1.0 * np.divide(unscaled_d0_t1, np.mean(unscaled_d0_t1))
 
@@ -349,15 +389,13 @@ def _score_elements(self, y, d, t,
                 weight_g_d0_t0 = weight_psi_a
 
                 weight_resid_d1_t1 = np.divide(d1t1, np.multiply(p_hat, lambda_hat))
-                weight_resid_d1_t0 = -1.0 * np.divide(d1t0, np.multiply(p_hat, 1.0-lambda_hat))
+                weight_resid_d1_t0 = -1.0 * np.divide(d1t0, np.multiply(p_hat, 1.0 - lambda_hat))
 
-                prop_weighting = np.divide(m_hat, 1.0-m_hat)
-                weight_resid_d0_t1 = -1.0 * np.multiply(np.divide(d0t1, np.multiply(p_hat, lambda_hat)),
-                                                        prop_weighting)
-                weight_resid_d0_t0 = np.multiply(np.divide(d0t0, np.multiply(p_hat, 1.0-lambda_hat)),
-                                                 prop_weighting)
+                prop_weighting = np.divide(m_hat, 1.0 - m_hat)
+                weight_resid_d0_t1 = -1.0 * np.multiply(np.divide(d0t1, np.multiply(p_hat, lambda_hat)), prop_weighting)
+                weight_resid_d0_t0 = np.multiply(np.divide(d0t0, np.multiply(p_hat, 1.0 - lambda_hat)), prop_weighting)
         else:
-            assert self.score == 'experimental'
+            assert self.score == "experimental"
             if self.in_sample_normalization:
                 weight_psi_a = np.ones_like(y)
                 weight_g_d1_t1 = weight_psi_a
@@ -377,22 +415,26 @@ def _score_elements(self, y, d, t,
                 weight_g_d0_t0 = weight_psi_a
 
                 weight_resid_d1_t1 = np.divide(d1t1, np.multiply(p_hat, lambda_hat))
-                weight_resid_d1_t0 = -1.0 * np.divide(d1t0, np.multiply(p_hat, 1.0-lambda_hat))
-                weight_resid_d0_t1 = -1.0 * np.divide(d0t1, np.multiply(1.0-p_hat, lambda_hat))
-                weight_resid_d0_t0 = np.divide(d0t0, np.multiply(1.0-p_hat, 1.0-lambda_hat))
+                weight_resid_d1_t0 = -1.0 * np.divide(d1t0, np.multiply(p_hat, 1.0 - lambda_hat))
+                weight_resid_d0_t1 = -1.0 * np.divide(d0t1, np.multiply(1.0 - p_hat, lambda_hat))
+                weight_resid_d0_t0 = np.divide(d0t0, np.multiply(1.0 - p_hat, 1.0 - lambda_hat))
 
         # set score elements
         psi_a = -1.0 * weight_psi_a
 
         # psi_b
-        psi_b_1 = np.multiply(weight_g_d1_t1,  g_hat_d1_t1) + \
-            np.multiply(weight_g_d1_t0,  g_hat_d1_t0) + \
-            np.multiply(weight_g_d0_t0,  g_hat_d0_t0) + \
-            np.multiply(weight_g_d0_t1,  g_hat_d0_t1)
-        psi_b_2 = np.multiply(weight_resid_d1_t1,  resid_d1_t1) + \
-            np.multiply(weight_resid_d1_t0,  resid_d1_t0) + \
-            np.multiply(weight_resid_d0_t0,  resid_d0_t0) + \
-            np.multiply(weight_resid_d0_t1,  resid_d0_t1)
+        psi_b_1 = (
+            np.multiply(weight_g_d1_t1, g_hat_d1_t1)
+            + np.multiply(weight_g_d1_t0, g_hat_d1_t0)
+            + np.multiply(weight_g_d0_t0, g_hat_d0_t0)
+            + np.multiply(weight_g_d0_t1, g_hat_d0_t1)
+        )
+        psi_b_2 = (
+            np.multiply(weight_resid_d1_t1, resid_d1_t1)
+            + np.multiply(weight_resid_d1_t0, resid_d1_t0)
+            + np.multiply(weight_resid_d0_t0, resid_d0_t0)
+            + np.multiply(weight_resid_d0_t1, resid_d0_t1)
+        )
 
         psi_b = psi_b_1 + psi_b_2
 
@@ -403,19 +445,23 @@ def _sensitivity_element_est(self, preds):
         d = self._dml_data.d
         t = self._dml_data.t
 
-        m_hat = preds['predictions']['ml_m']
-        g_hat_d0_t0 = preds['predictions']['ml_g_d0_t0']
-        g_hat_d0_t1 = preds['predictions']['ml_g_d0_t1']
-        g_hat_d1_t0 = preds['predictions']['ml_g_d1_t0']
-        g_hat_d1_t1 = preds['predictions']['ml_g_d1_t1']
+        m_hat = preds["predictions"]["ml_m"]
+        g_hat_d0_t0 = preds["predictions"]["ml_g_d0_t0"]
+        g_hat_d0_t1 = preds["predictions"]["ml_g_d0_t1"]
+        g_hat_d1_t0 = preds["predictions"]["ml_g_d1_t0"]
+        g_hat_d1_t1 = preds["predictions"]["ml_g_d1_t1"]
 
-        d0t0 = np.multiply(1.0-d, 1.0-t)
-        d0t1 = np.multiply(1.0-d, t)
-        d1t0 = np.multiply(d, 1.0-t)
+        d0t0 = np.multiply(1.0 - d, 1.0 - t)
+        d0t1 = np.multiply(1.0 - d, t)
+        d1t0 = np.multiply(d, 1.0 - t)
         d1t1 = np.multiply(d, t)
 
-        g_hat = np.multiply(d0t0, g_hat_d0_t0) + np.multiply(d0t1, g_hat_d0_t1) + \
-            np.multiply(d1t0, g_hat_d1_t0) + np.multiply(d1t1, g_hat_d1_t1)
+        g_hat = (
+            np.multiply(d0t0, g_hat_d0_t0)
+            + np.multiply(d0t1, g_hat_d0_t1)
+            + np.multiply(d1t0, g_hat_d1_t0)
+            + np.multiply(d1t1, g_hat_d1_t1)
+        )
         sigma2_score_element = np.square(y - g_hat)
         sigma2 = np.mean(sigma2_score_element)
         psi_sigma2 = sigma2_score_element - sigma2
@@ -423,76 +469,86 @@ def _sensitivity_element_est(self, preds):
         # calc m(W,alpha) and Riesz representer
         p_hat = np.mean(d)
         lambda_hat = np.mean(t)
-        if self.score == 'observational':
-            propensity_weight_d0 = np.divide(m_hat, 1.0-m_hat)
+        if self.score == "observational":
+            propensity_weight_d0 = np.divide(m_hat, 1.0 - m_hat)
             if self.in_sample_normalization:
                 weight_d0t1 = np.multiply(d0t1, propensity_weight_d0)
                 weight_d0t0 = np.multiply(d0t0, propensity_weight_d0)
                 mean_weight_d0t1 = np.mean(weight_d0t1)
                 mean_weight_d0t0 = np.mean(weight_d0t0)
 
-                m_alpha = np.multiply(np.divide(d, p_hat),
-                                      np.divide(1.0, np.mean(d1t1)) +
-                                      np.divide(1.0, np.mean(d1t0)) +
-                                      np.divide(propensity_weight_d0, mean_weight_d0t1) +
-                                      np.divide(propensity_weight_d0, mean_weight_d0t0))
-
-                rr = np.divide(d1t1, np.mean(d1t1)) - \
-                    np.divide(d1t0, np.mean(d1t0)) - \
-                    np.divide(weight_d0t1, mean_weight_d0t1) + \
-                    np.divide(weight_d0t0, mean_weight_d0t0)
+                m_alpha = np.multiply(
+                    np.divide(d, p_hat),
+                    np.divide(1.0, np.mean(d1t1))
+                    + np.divide(1.0, np.mean(d1t0))
+                    + np.divide(propensity_weight_d0, mean_weight_d0t1)
+                    + np.divide(propensity_weight_d0, mean_weight_d0t0),
+                )
+
+                rr = (
+                    np.divide(d1t1, np.mean(d1t1))
+                    - np.divide(d1t0, np.mean(d1t0))
+                    - np.divide(weight_d0t1, mean_weight_d0t1)
+                    + np.divide(weight_d0t0, mean_weight_d0t0)
+                )
             else:
-                m_alpha_1 = np.divide(1.0, lambda_hat) + np.divide(1.0, 1.0-lambda_hat)
+                m_alpha_1 = np.divide(1.0, lambda_hat) + np.divide(1.0, 1.0 - lambda_hat)
                 m_alpha = np.multiply(np.divide(d, np.square(p_hat)), np.multiply(m_alpha_1, 1.0 + propensity_weight_d0))
 
-                rr_1 = np.divide(t, np.multiply(p_hat, lambda_hat)) + np.divide(1.0-t, np.multiply(p_hat, 1.0-lambda_hat))
-                rr_2 = d + np.multiply(1.0-d, propensity_weight_d0)
+                rr_1 = np.divide(t, np.multiply(p_hat, lambda_hat)) + np.divide(1.0 - t, np.multiply(p_hat, 1.0 - lambda_hat))
+                rr_2 = d + np.multiply(1.0 - d, propensity_weight_d0)
                 rr = np.multiply(rr_1, rr_2)
         else:
-            assert self.score == 'experimental'
+            assert self.score == "experimental"
             if self.in_sample_normalization:
-                m_alpha = np.divide(1.0, np.mean(d1t1)) + \
-                    np.divide(1.0, np.mean(d1t0)) + \
-                    np.divide(1.0, np.mean(d0t1)) + \
-                    np.divide(1.0, np.mean(d0t0))
-                rr = np.divide(d1t1, np.mean(d1t1)) - \
-                    np.divide(d1t0, np.mean(d1t0)) - \
-                    np.divide(d0t1, np.mean(d0t1)) + \
-                    np.divide(d0t0, np.mean(d0t0))
+                m_alpha = (
+                    np.divide(1.0, np.mean(d1t1))
+                    + np.divide(1.0, np.mean(d1t0))
+                    + np.divide(1.0, np.mean(d0t1))
+                    + np.divide(1.0, np.mean(d0t0))
+                )
+                rr = (
+                    np.divide(d1t1, np.mean(d1t1))
+                    - np.divide(d1t0, np.mean(d1t0))
+                    - np.divide(d0t1, np.mean(d0t1))
+                    + np.divide(d0t0, np.mean(d0t0))
+                )
             else:
-                m_alpha = np.divide(1.0, np.multiply(p_hat, lambda_hat)) + \
-                    np.divide(1.0, np.multiply(p_hat, 1.0-lambda_hat)) + \
-                    np.divide(1.0, np.multiply(1.0-p_hat, lambda_hat)) + \
-                    np.divide(1.0, np.multiply(1.0-p_hat, 1.0-lambda_hat))
-                rr = np.divide(d1t1, np.multiply(p_hat, lambda_hat)) - \
-                    np.divide(d1t0, np.multiply(p_hat, 1.0-lambda_hat)) - \
-                    np.divide(d0t1, np.multiply(1.0-p_hat, lambda_hat)) + \
-                    np.divide(d0t0, np.multiply(1.0-p_hat, 1.0-lambda_hat))
+                m_alpha = (
+                    np.divide(1.0, np.multiply(p_hat, lambda_hat))
+                    + np.divide(1.0, np.multiply(p_hat, 1.0 - lambda_hat))
+                    + np.divide(1.0, np.multiply(1.0 - p_hat, lambda_hat))
+                    + np.divide(1.0, np.multiply(1.0 - p_hat, 1.0 - lambda_hat))
+                )
+                rr = (
+                    np.divide(d1t1, np.multiply(p_hat, lambda_hat))
+                    - np.divide(d1t0, np.multiply(p_hat, 1.0 - lambda_hat))
+                    - np.divide(d0t1, np.multiply(1.0 - p_hat, lambda_hat))
+                    + np.divide(d0t0, np.multiply(1.0 - p_hat, 1.0 - lambda_hat))
+                )
 
         nu2_score_element = np.multiply(2.0, m_alpha) - np.square(rr)
         nu2 = np.mean(nu2_score_element)
         psi_nu2 = nu2_score_element - nu2
 
-        element_dict = {'sigma2': sigma2,
-                        'nu2': nu2,
-                        'psi_sigma2': psi_sigma2,
-                        'psi_nu2': psi_nu2,
-                        'riesz_rep': rr,
-                        }
+        element_dict = {
+            "sigma2": sigma2,
+            "nu2": nu2,
+            "psi_sigma2": psi_sigma2,
+            "psi_nu2": psi_nu2,
+            "riesz_rep": rr,
+        }
         return element_dict
 
-    def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                         search_mode, n_iter_randomized_search):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
-        x, t = check_X_y(x, self._dml_data.t,
-                         force_all_finite=False)
+    def _nuisance_tuning(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
+        x, t = check_X_y(x, self._dml_data.t, force_all_finite=False)
 
         if scoring_methods is None:
-            scoring_methods = {'ml_g': None,
-                               'ml_m': None}
+            scoring_methods = {"ml_g": None, "ml_m": None}
 
         # nuisance training sets conditional on d and t
         smpls_d0_t0, smpls_d0_t1, smpls_d1_t0, smpls_d1_t1 = _get_cond_smpls_2d(smpls, d, t)
@@ -502,56 +558,108 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
         train_inds_d1_t0 = [train_index for (train_index, _) in smpls_d1_t0]
         train_inds_d1_t1 = [train_index for (train_index, _) in smpls_d1_t1]
 
-        g_d0_t0_tune_res = _dml_tune(y, x, train_inds_d0_t0,
-                                     self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'],
-                                     n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-
-        g_d0_t1_tune_res = _dml_tune(y, x, train_inds_d0_t1,
-                                     self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'],
-                                     n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-
-        g_d1_t0_tune_res = _dml_tune(y, x, train_inds_d1_t0,
-                                     self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'],
-                                     n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-
-        g_d1_t1_tune_res = _dml_tune(y, x, train_inds_d1_t1,
-                                     self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'],
-                                     n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        g_d0_t0_tune_res = _dml_tune(
+            y,
+            x,
+            train_inds_d0_t0,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+
+        g_d0_t1_tune_res = _dml_tune(
+            y,
+            x,
+            train_inds_d0_t1,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+
+        g_d1_t0_tune_res = _dml_tune(
+            y,
+            x,
+            train_inds_d1_t0,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+
+        g_d1_t1_tune_res = _dml_tune(
+            y,
+            x,
+            train_inds_d1_t1,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
 
         m_tune_res = list()
-        if self.score == 'observational':
-            m_tune_res = _dml_tune(d, x, train_inds,
-                                   self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'],
-                                   n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        if self.score == "observational":
+            m_tune_res = _dml_tune(
+                d,
+                x,
+                train_inds,
+                self._learner["ml_m"],
+                param_grids["ml_m"],
+                scoring_methods["ml_m"],
+                n_folds_tune,
+                n_jobs_cv,
+                search_mode,
+                n_iter_randomized_search,
+            )
 
         g_d0_t0_best_params = [xx.best_params_ for xx in g_d0_t0_tune_res]
         g_d0_t1_best_params = [xx.best_params_ for xx in g_d0_t1_tune_res]
         g_d1_t0_best_params = [xx.best_params_ for xx in g_d1_t0_tune_res]
         g_d1_t1_best_params = [xx.best_params_ for xx in g_d1_t1_tune_res]
 
-        if self.score == 'observational':
+        if self.score == "observational":
             m_best_params = [xx.best_params_ for xx in m_tune_res]
-            params = {'ml_g_d0_t0': g_d0_t0_best_params,
-                      'ml_g_d0_t1': g_d0_t1_best_params,
-                      'ml_g_d1_t0': g_d1_t0_best_params,
-                      'ml_g_d1_t1': g_d1_t1_best_params,
-                      'ml_m': m_best_params}
-            tune_res = {'g_d0_t0_tune': g_d0_t0_tune_res,
-                        'g_d0_t1_tune': g_d0_t1_tune_res,
-                        'g_d1_t0_tune': g_d1_t0_tune_res,
-                        'g_d1_t1_tune': g_d1_t1_tune_res,
-                        'm_tune': m_tune_res}
+            params = {
+                "ml_g_d0_t0": g_d0_t0_best_params,
+                "ml_g_d0_t1": g_d0_t1_best_params,
+                "ml_g_d1_t0": g_d1_t0_best_params,
+                "ml_g_d1_t1": g_d1_t1_best_params,
+                "ml_m": m_best_params,
+            }
+            tune_res = {
+                "g_d0_t0_tune": g_d0_t0_tune_res,
+                "g_d0_t1_tune": g_d0_t1_tune_res,
+                "g_d1_t0_tune": g_d1_t0_tune_res,
+                "g_d1_t1_tune": g_d1_t1_tune_res,
+                "m_tune": m_tune_res,
+            }
         else:
-            params = {'ml_g_d0_t0': g_d0_t0_best_params,
-                      'ml_g_d0_t1': g_d0_t1_best_params,
-                      'ml_g_d1_t0': g_d1_t0_best_params,
-                      'ml_g_d1_t1': g_d1_t1_best_params}
-            tune_res = {'g_d0_t0_tune': g_d0_t0_tune_res,
-                        'g_d0_t1_tune': g_d0_t1_tune_res,
-                        'g_d1_t0_tune': g_d1_t0_tune_res,
-                        'g_d1_t1_tune': g_d1_t1_tune_res}
-
-        res = {'params': params,
-               'tune_res': tune_res}
+            params = {
+                "ml_g_d0_t0": g_d0_t0_best_params,
+                "ml_g_d0_t1": g_d0_t1_best_params,
+                "ml_g_d1_t0": g_d1_t0_best_params,
+                "ml_g_d1_t1": g_d1_t1_best_params,
+            }
+            tune_res = {
+                "g_d0_t0_tune": g_d0_t0_tune_res,
+                "g_d0_t1_tune": g_d0_t1_tune_res,
+                "g_d1_t0_tune": g_d1_t0_tune_res,
+                "g_d1_t1_tune": g_d1_t1_tune_res,
+            }
+
+        res = {"params": params, "tune_res": tune_res}
 
         return res
diff --git a/doubleml/did/tests/_utils_did_cs_manual.py b/doubleml/did/tests/_utils_did_cs_manual.py
index f7d975e35..f14a52a08 100644
--- a/doubleml/did/tests/_utils_did_cs_manual.py
+++ b/doubleml/did/tests/_utils_did_cs_manual.py
@@ -5,11 +5,24 @@
 from ._utils_did_manual import did_dml2
 
 
-def fit_did_cs(y, x, d, t,
-               learner_g, learner_m, all_smpls, score, in_sample_normalization,
-               n_rep=1, g_d0_t0_params=None, g_d0_t1_params=None,
-               g_d1_t0_params=None, g_d1_t1_params=None, m_params=None,
-               trimming_threshold=1e-2):
+def fit_did_cs(
+    y,
+    x,
+    d,
+    t,
+    learner_g,
+    learner_m,
+    all_smpls,
+    score,
+    in_sample_normalization,
+    n_rep=1,
+    g_d0_t0_params=None,
+    g_d0_t1_params=None,
+    g_d1_t0_params=None,
+    g_d1_t1_params=None,
+    m_params=None,
+    trimming_threshold=1e-2,
+):
     n_obs = len(y)
 
     thetas = np.zeros(n_rep)
@@ -26,14 +39,24 @@ def fit_did_cs(y, x, d, t,
     for i_rep in range(n_rep):
         smpls = all_smpls[i_rep]
 
-        g_hat_d0_t0_list, g_hat_d0_t1_list, g_hat_d1_t0_list, g_hat_d1_t1_list, m_hat_list, \
-            p_hat_list,  lambda_hat_list = fit_nuisance_did_cs(y, x, d, t,
-                                                               learner_g, learner_m,
-                                                               smpls, score,
-                                                               g_d0_t0_params=g_d0_t0_params, g_d0_t1_params=g_d0_t1_params,
-                                                               g_d1_t0_params=g_d1_t0_params, g_d1_t1_params=g_d1_t1_params,
-                                                               m_params=m_params,
-                                                               trimming_threshold=trimming_threshold)
+        g_hat_d0_t0_list, g_hat_d0_t1_list, g_hat_d1_t0_list, g_hat_d1_t1_list, m_hat_list, p_hat_list, lambda_hat_list = (
+            fit_nuisance_did_cs(
+                y,
+                x,
+                d,
+                t,
+                learner_g,
+                learner_m,
+                smpls,
+                score,
+                g_d0_t0_params=g_d0_t0_params,
+                g_d0_t1_params=g_d0_t1_params,
+                g_d1_t0_params=g_d1_t0_params,
+                g_d1_t1_params=g_d1_t1_params,
+                m_params=m_params,
+                trimming_threshold=trimming_threshold,
+            )
+        )
 
         all_g_hat_d0_t0.append(g_hat_d0_t0_list)
         all_g_hat_d0_t1.append(g_hat_d0_t1_list)
@@ -43,16 +66,47 @@ def fit_did_cs(y, x, d, t,
         all_p_hat.append(p_hat_list)
         all_lambda_hat.append(lambda_hat_list)
 
-        resid_d0_t0, resid_d0_t1, resid_d1_t0, resid_d1_t1, \
-            g_hat_d0_t0, g_hat_d0_t1, g_hat_d1_t0, g_hat_d1_t1, \
-            m_hat, p_hat, lambda_hat = compute_did_cs_residuals(y, g_hat_d0_t0_list, g_hat_d0_t1_list,
-                                                                g_hat_d1_t0_list, g_hat_d1_t1_list,
-                                                                m_hat_list, p_hat_list,
-                                                                lambda_hat_list, smpls)
-
-        psi_a, psi_b = did_cs_score_elements(resid_d0_t0, resid_d0_t1, resid_d1_t0, resid_d1_t1,
-                                             g_hat_d0_t0, g_hat_d0_t1, g_hat_d1_t0, g_hat_d1_t1,
-                                             m_hat, p_hat, lambda_hat, d, t, score, in_sample_normalization)
+        (
+            resid_d0_t0,
+            resid_d0_t1,
+            resid_d1_t0,
+            resid_d1_t1,
+            g_hat_d0_t0,
+            g_hat_d0_t1,
+            g_hat_d1_t0,
+            g_hat_d1_t1,
+            m_hat,
+            p_hat,
+            lambda_hat,
+        ) = compute_did_cs_residuals(
+            y,
+            g_hat_d0_t0_list,
+            g_hat_d0_t1_list,
+            g_hat_d1_t0_list,
+            g_hat_d1_t1_list,
+            m_hat_list,
+            p_hat_list,
+            lambda_hat_list,
+            smpls,
+        )
+
+        psi_a, psi_b = did_cs_score_elements(
+            resid_d0_t0,
+            resid_d0_t1,
+            resid_d1_t0,
+            resid_d1_t1,
+            g_hat_d0_t0,
+            g_hat_d0_t1,
+            g_hat_d1_t0,
+            g_hat_d1_t1,
+            m_hat,
+            p_hat,
+            lambda_hat,
+            d,
+            t,
+            score,
+            in_sample_normalization,
+        )
 
         all_psi_a.append(psi_a)
         all_psi_b.append(psi_b)
@@ -62,76 +116,88 @@ def fit_did_cs(y, x, d, t,
     theta = np.median(thetas)
     se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs)
 
-    res = {'theta': theta, 'se': se,
-           'thetas': thetas, 'ses': ses,
-           'all_g_hat_d0_t0': all_g_hat_d0_t0, 'all_g_hat_d0_t1': all_g_hat_d0_t1,
-           'all_g_hat_d1_t0': all_g_hat_d1_t0, 'all_g_hat_d1_t1': all_g_hat_d1_t1,
-           'all_m_hat': all_m_hat,
-           'all_p_hat': all_p_hat, 'all_lambda_hat': all_lambda_hat,
-           'all_psi_a': all_psi_a, 'all_psi_b': all_psi_b}
+    res = {
+        "theta": theta,
+        "se": se,
+        "thetas": thetas,
+        "ses": ses,
+        "all_g_hat_d0_t0": all_g_hat_d0_t0,
+        "all_g_hat_d0_t1": all_g_hat_d0_t1,
+        "all_g_hat_d1_t0": all_g_hat_d1_t0,
+        "all_g_hat_d1_t1": all_g_hat_d1_t1,
+        "all_m_hat": all_m_hat,
+        "all_p_hat": all_p_hat,
+        "all_lambda_hat": all_lambda_hat,
+        "all_psi_a": all_psi_a,
+        "all_psi_b": all_psi_b,
+    }
 
     return res
 
 
-def fit_nuisance_did_cs(y, x, d, t,
-                        learner_g, learner_m, smpls, score,
-                        g_d0_t0_params=None, g_d0_t1_params=None,
-                        g_d1_t0_params=None, g_d1_t1_params=None,
-                        m_params=None,
-                        trimming_threshold=1e-12):
+def fit_nuisance_did_cs(
+    y,
+    x,
+    d,
+    t,
+    learner_g,
+    learner_m,
+    smpls,
+    score,
+    g_d0_t0_params=None,
+    g_d0_t1_params=None,
+    g_d1_t0_params=None,
+    g_d1_t1_params=None,
+    m_params=None,
+    trimming_threshold=1e-12,
+):
     ml_g_d0_t0 = clone(learner_g)
     ml_g_d0_t1 = clone(learner_g)
     ml_g_d1_t0 = clone(learner_g)
     ml_g_d1_t1 = clone(learner_g)
 
     train_cond_d0_t0 = np.intersect1d(np.where(d == 0)[0], np.where(t == 0)[0])
-    g_hat_d0_t0_list = fit_predict(y, x, ml_g_d0_t0, g_d0_t0_params, smpls,
-                                   train_cond=train_cond_d0_t0)
+    g_hat_d0_t0_list = fit_predict(y, x, ml_g_d0_t0, g_d0_t0_params, smpls, train_cond=train_cond_d0_t0)
 
     train_cond_d0_t1 = np.intersect1d(np.where(d == 0)[0], np.where(t == 1)[0])
-    g_hat_d0_t1_list = fit_predict(y, x, ml_g_d0_t1, g_d0_t1_params, smpls,
-                                   train_cond=train_cond_d0_t1)
+    g_hat_d0_t1_list = fit_predict(y, x, ml_g_d0_t1, g_d0_t1_params, smpls, train_cond=train_cond_d0_t1)
 
     train_cond_d1_t0 = np.intersect1d(np.where(d == 1)[0], np.where(t == 0)[0])
-    g_hat_d1_t0_list = fit_predict(y, x, ml_g_d1_t0, g_d1_t0_params, smpls,
-                                   train_cond=train_cond_d1_t0)
+    g_hat_d1_t0_list = fit_predict(y, x, ml_g_d1_t0, g_d1_t0_params, smpls, train_cond=train_cond_d1_t0)
 
     train_cond_d1_t1 = np.intersect1d(np.where(d == 1)[0], np.where(t == 1)[0])
-    g_hat_d1_t1_list = fit_predict(y, x, ml_g_d1_t1, g_d1_t1_params, smpls,
-                                   train_cond=train_cond_d1_t1)
-    if score == 'observational':
+    g_hat_d1_t1_list = fit_predict(y, x, ml_g_d1_t1, g_d1_t1_params, smpls, train_cond=train_cond_d1_t1)
+    if score == "observational":
         ml_m = clone(learner_m)
-        m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls,
-                                       trimming_threshold=trimming_threshold)
+        m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold)
     else:
-        assert score == 'experimental'
+        assert score == "experimental"
         m_hat_list = list()
         for idx, _ in enumerate(smpls):
             # fill it up, but its not further used
-            m_hat_list.append(np.zeros_like(g_hat_d1_t1_list[idx], dtype='float64'))
+            m_hat_list.append(np.zeros_like(g_hat_d1_t1_list[idx], dtype="float64"))
 
     p_hat_list = []
-    for (train_index, _) in smpls:
+    for train_index, _ in smpls:
         p_hat_list.append(np.mean(d[train_index]))
 
     lambda_hat_list = []
-    for (train_index, _) in smpls:
+    for train_index, _ in smpls:
         lambda_hat_list.append(np.mean(t[train_index]))
 
-    return g_hat_d0_t0_list, g_hat_d0_t1_list, g_hat_d1_t0_list, g_hat_d1_t1_list, \
-        m_hat_list, p_hat_list,  lambda_hat_list
+    return g_hat_d0_t0_list, g_hat_d0_t1_list, g_hat_d1_t0_list, g_hat_d1_t1_list, m_hat_list, p_hat_list, lambda_hat_list
 
 
-def compute_did_cs_residuals(y, g_hat_d0_t0_list, g_hat_d0_t1_list,
-                             g_hat_d1_t0_list, g_hat_d1_t1_list,
-                             m_hat_list, p_hat_list, lambda_hat_list, smpls):
-    g_hat_d0_t0 = np.full_like(y, np.nan, dtype='float64')
-    g_hat_d0_t1 = np.full_like(y, np.nan, dtype='float64')
-    g_hat_d1_t0 = np.full_like(y, np.nan, dtype='float64')
-    g_hat_d1_t1 = np.full_like(y, np.nan, dtype='float64')
-    m_hat = np.full_like(y, np.nan, dtype='float64')
-    p_hat = np.full_like(y, np.nan, dtype='float64')
-    lambda_hat = np.full_like(y, np.nan, dtype='float64')
+def compute_did_cs_residuals(
+    y, g_hat_d0_t0_list, g_hat_d0_t1_list, g_hat_d1_t0_list, g_hat_d1_t1_list, m_hat_list, p_hat_list, lambda_hat_list, smpls
+):
+    g_hat_d0_t0 = np.full_like(y, np.nan, dtype="float64")
+    g_hat_d0_t1 = np.full_like(y, np.nan, dtype="float64")
+    g_hat_d1_t0 = np.full_like(y, np.nan, dtype="float64")
+    g_hat_d1_t1 = np.full_like(y, np.nan, dtype="float64")
+    m_hat = np.full_like(y, np.nan, dtype="float64")
+    p_hat = np.full_like(y, np.nan, dtype="float64")
+    lambda_hat = np.full_like(y, np.nan, dtype="float64")
     for idx, (_, test_index) in enumerate(smpls):
         g_hat_d0_t0[test_index] = g_hat_d0_t0_list[idx]
         g_hat_d0_t1[test_index] = g_hat_d0_t1_list[idx]
@@ -145,16 +211,39 @@ def compute_did_cs_residuals(y, g_hat_d0_t0_list, g_hat_d0_t1_list,
     resid_d0_t1 = y - g_hat_d0_t1
     resid_d1_t0 = y - g_hat_d1_t0
     resid_d1_t1 = y - g_hat_d1_t1
-    return resid_d0_t0, resid_d0_t1, resid_d1_t0, resid_d1_t1, \
-        g_hat_d0_t0, g_hat_d0_t1, g_hat_d1_t0, g_hat_d1_t1, \
-        m_hat, p_hat, lambda_hat
-
-
-def did_cs_score_elements(resid_d0_t0, resid_d0_t1, resid_d1_t0, resid_d1_t1,
-                          g_hat_d0_t0, g_hat_d0_t1, g_hat_d1_t0, g_hat_d1_t1,
-                          m_hat, p_hat, lambda_hat, d, t, score, in_sample_normalization):
-
-    if score == 'observational':
+    return (
+        resid_d0_t0,
+        resid_d0_t1,
+        resid_d1_t0,
+        resid_d1_t1,
+        g_hat_d0_t0,
+        g_hat_d0_t1,
+        g_hat_d1_t0,
+        g_hat_d1_t1,
+        m_hat,
+        p_hat,
+        lambda_hat,
+    )
+
+
+def did_cs_score_elements(
+    resid_d0_t0,
+    resid_d0_t1,
+    resid_d1_t0,
+    resid_d1_t1,
+    g_hat_d0_t0,
+    g_hat_d0_t1,
+    g_hat_d1_t0,
+    g_hat_d1_t1,
+    m_hat,
+    p_hat,
+    lambda_hat,
+    d,
+    t,
+    score,
+    in_sample_normalization,
+):
+    if score == "observational":
         if in_sample_normalization:
             weight_psi_a = np.divide(d, np.mean(d))
             weight_g_d1_t1 = weight_psi_a
@@ -162,16 +251,14 @@ def did_cs_score_elements(resid_d0_t0, resid_d0_t1, resid_d1_t0, resid_d1_t1,
             weight_g_d0_t1 = -1.0 * weight_psi_a
             weight_g_d0_t0 = weight_psi_a
 
-            weight_resid_d1_t1 = np.divide(np.multiply(d, t),
-                                           np.mean(np.multiply(d, t)))
-            weight_resid_d1_t0 = -1.0 * np.divide(np.multiply(d, 1.0-t),
-                                                  np.mean(np.multiply(d, 1.0-t)))
+            weight_resid_d1_t1 = np.divide(np.multiply(d, t), np.mean(np.multiply(d, t)))
+            weight_resid_d1_t0 = -1.0 * np.divide(np.multiply(d, 1.0 - t), np.mean(np.multiply(d, 1.0 - t)))
 
-            prop_weighting = np.divide(m_hat, 1.0-m_hat)
-            unscaled_d0_t1 = np.multiply(np.multiply(1.0-d, t), prop_weighting)
+            prop_weighting = np.divide(m_hat, 1.0 - m_hat)
+            unscaled_d0_t1 = np.multiply(np.multiply(1.0 - d, t), prop_weighting)
             weight_resid_d0_t1 = -1.0 * np.divide(unscaled_d0_t1, np.mean(unscaled_d0_t1))
 
-            unscaled_d0_t0 = np.multiply(np.multiply(1.0-d, 1.0-t), prop_weighting)
+            unscaled_d0_t0 = np.multiply(np.multiply(1.0 - d, 1.0 - t), prop_weighting)
             weight_resid_d0_t0 = np.divide(unscaled_d0_t0, np.mean(unscaled_d0_t0))
         else:
             weight_psi_a = np.divide(d, p_hat)
@@ -180,21 +267,19 @@ def did_cs_score_elements(resid_d0_t0, resid_d0_t1, resid_d1_t0, resid_d1_t1,
             weight_g_d0_t1 = -1.0 * weight_psi_a
             weight_g_d0_t0 = weight_psi_a
 
-            weight_resid_d1_t1 = np.divide(np.multiply(d, t),
-                                           np.multiply(p_hat, lambda_hat))
-            weight_resid_d1_t0 = -1.0 * np.divide(np.multiply(d, 1.0-t),
-                                                  np.multiply(p_hat, 1.0-lambda_hat))
+            weight_resid_d1_t1 = np.divide(np.multiply(d, t), np.multiply(p_hat, lambda_hat))
+            weight_resid_d1_t0 = -1.0 * np.divide(np.multiply(d, 1.0 - t), np.multiply(p_hat, 1.0 - lambda_hat))
 
-            prop_weighting = np.divide(m_hat, 1.0-m_hat)
-            weight_resid_d0_t1 = -1.0 * np.multiply(np.divide(np.multiply(1.0-d, t),
-                                                              np.multiply(p_hat, lambda_hat)),
-                                                    prop_weighting)
-            weight_resid_d0_t0 = np.multiply(np.divide(np.multiply(1.0-d, 1.0-t),
-                                                       np.multiply(p_hat, 1.0-lambda_hat)),
-                                             prop_weighting)
+            prop_weighting = np.divide(m_hat, 1.0 - m_hat)
+            weight_resid_d0_t1 = -1.0 * np.multiply(
+                np.divide(np.multiply(1.0 - d, t), np.multiply(p_hat, lambda_hat)), prop_weighting
+            )
+            weight_resid_d0_t0 = np.multiply(
+                np.divide(np.multiply(1.0 - d, 1.0 - t), np.multiply(p_hat, 1.0 - lambda_hat)), prop_weighting
+            )
 
     else:
-        assert score == 'experimental'
+        assert score == "experimental"
         if in_sample_normalization:
             weight_psi_a = np.ones_like(d)
             weight_g_d1_t1 = weight_psi_a
@@ -202,14 +287,10 @@ def did_cs_score_elements(resid_d0_t0, resid_d0_t1, resid_d1_t0, resid_d1_t1,
             weight_g_d0_t1 = -1.0 * weight_psi_a
             weight_g_d0_t0 = weight_psi_a
 
-            weight_resid_d1_t1 = np.divide(np.multiply(d, t),
-                                           np.mean(np.multiply(d, t)))
-            weight_resid_d1_t0 = -1.0 * np.divide(np.multiply(d, 1.0-t),
-                                                  np.mean(np.multiply(d, 1.0-t)))
-            weight_resid_d0_t1 = -1.0 * np.divide(np.multiply(1.0-d, t),
-                                                  np.mean(np.multiply(1.0-d, t)))
-            weight_resid_d0_t0 = np.divide(np.multiply(1.0-d, 1.0-t),
-                                           np.mean(np.multiply(1.0-d, 1.0-t)))
+            weight_resid_d1_t1 = np.divide(np.multiply(d, t), np.mean(np.multiply(d, t)))
+            weight_resid_d1_t0 = -1.0 * np.divide(np.multiply(d, 1.0 - t), np.mean(np.multiply(d, 1.0 - t)))
+            weight_resid_d0_t1 = -1.0 * np.divide(np.multiply(1.0 - d, t), np.mean(np.multiply(1.0 - d, t)))
+            weight_resid_d0_t0 = np.divide(np.multiply(1.0 - d, 1.0 - t), np.mean(np.multiply(1.0 - d, 1.0 - t)))
         else:
             weight_psi_a = np.ones_like(d)
             weight_g_d1_t1 = weight_psi_a
@@ -217,23 +298,23 @@ def did_cs_score_elements(resid_d0_t0, resid_d0_t1, resid_d1_t0, resid_d1_t1,
             weight_g_d0_t1 = -1.0 * weight_psi_a
             weight_g_d0_t0 = weight_psi_a
 
-            weight_resid_d1_t1 = np.divide(np.multiply(d, t),
-                                           np.multiply(p_hat, lambda_hat))
-            weight_resid_d1_t0 = -1.0 * np.divide(np.multiply(d, 1.0-t),
-                                                  np.multiply(p_hat, 1.0-lambda_hat))
-            weight_resid_d0_t1 = -1.0 * np.divide(np.multiply(1.0-d, t),
-                                                  np.multiply(1.0-p_hat, lambda_hat))
-            weight_resid_d0_t0 = np.divide(np.multiply(1.0-d, 1.0-t),
-                                           np.multiply(1.0-p_hat, 1.0-lambda_hat))
-
-    psi_b_1 = np.multiply(weight_g_d1_t1,  g_hat_d1_t1) + \
-        np.multiply(weight_g_d1_t0,  g_hat_d1_t0) + \
-        np.multiply(weight_g_d0_t0,  g_hat_d0_t0) + \
-        np.multiply(weight_g_d0_t1,  g_hat_d0_t1)
-    psi_b_2 = np.multiply(weight_resid_d1_t1,  resid_d1_t1) + \
-        np.multiply(weight_resid_d1_t0,  resid_d1_t0) + \
-        np.multiply(weight_resid_d0_t0,  resid_d0_t0) + \
-        np.multiply(weight_resid_d0_t1,  resid_d0_t1)
+            weight_resid_d1_t1 = np.divide(np.multiply(d, t), np.multiply(p_hat, lambda_hat))
+            weight_resid_d1_t0 = -1.0 * np.divide(np.multiply(d, 1.0 - t), np.multiply(p_hat, 1.0 - lambda_hat))
+            weight_resid_d0_t1 = -1.0 * np.divide(np.multiply(1.0 - d, t), np.multiply(1.0 - p_hat, lambda_hat))
+            weight_resid_d0_t0 = np.divide(np.multiply(1.0 - d, 1.0 - t), np.multiply(1.0 - p_hat, 1.0 - lambda_hat))
+
+    psi_b_1 = (
+        np.multiply(weight_g_d1_t1, g_hat_d1_t1)
+        + np.multiply(weight_g_d1_t0, g_hat_d1_t0)
+        + np.multiply(weight_g_d0_t0, g_hat_d0_t0)
+        + np.multiply(weight_g_d0_t1, g_hat_d0_t1)
+    )
+    psi_b_2 = (
+        np.multiply(weight_resid_d1_t1, resid_d1_t1)
+        + np.multiply(weight_resid_d1_t0, resid_d1_t0)
+        + np.multiply(weight_resid_d0_t0, resid_d0_t0)
+        + np.multiply(weight_resid_d0_t1, resid_d0_t1)
+    )
 
     psi_a = -1.0 * weight_psi_a
     psi_b = psi_b_1 + psi_b_2
@@ -241,37 +322,30 @@ def did_cs_score_elements(resid_d0_t0, resid_d0_t1, resid_d1_t0, resid_d1_t1,
     return psi_a, psi_b
 
 
-def tune_nuisance_did_cs(y, x, d, t, ml_g, ml_m, smpls, score, n_folds_tune,
-                         param_grid_g, param_grid_m):
-
+def tune_nuisance_did_cs(y, x, d, t, ml_g, ml_m, smpls, score, n_folds_tune, param_grid_g, param_grid_m):
     smpls_d0_t0 = np.intersect1d(np.where(d == 0)[0], np.where(t == 0)[0])
     smpls_d0_t1 = np.intersect1d(np.where(d == 0)[0], np.where(t == 1)[0])
     smpls_d1_t0 = np.intersect1d(np.where(d == 1)[0], np.where(t == 0)[0])
     smpls_d1_t1 = np.intersect1d(np.where(d == 1)[0], np.where(t == 1)[0])
 
-    g_d0_t0_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune,
-                                        train_cond=smpls_d0_t0)
-    g_d0_t1_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune,
-                                        train_cond=smpls_d0_t1)
-    g_d1_t0_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune,
-                                        train_cond=smpls_d1_t0)
-    g_d1_t1_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune,
-                                        train_cond=smpls_d1_t1)
+    g_d0_t0_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=smpls_d0_t0)
+    g_d0_t1_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=smpls_d0_t1)
+    g_d1_t0_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=smpls_d1_t0)
+    g_d1_t1_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=smpls_d1_t1)
 
     g_d0_t0_best_params = [xx.best_params_ for xx in g_d0_t0_tune_res]
     g_d0_t1_best_params = [xx.best_params_ for xx in g_d0_t1_tune_res]
     g_d1_t0_best_params = [xx.best_params_ for xx in g_d1_t0_tune_res]
     g_d1_t1_best_params = [xx.best_params_ for xx in g_d1_t1_tune_res]
 
-    if score == 'observational':
+    if score == "observational":
         m_tune_res = tune_grid_search(d, x, ml_m, smpls, param_grid_m, n_folds_tune)
         m_best_params = [xx.best_params_ for xx in m_tune_res]
     else:
-        assert score == 'experimental'
+        assert score == "experimental"
         m_best_params = None
 
-    return g_d0_t0_best_params, g_d0_t1_best_params, \
-        g_d1_t0_best_params, g_d1_t1_best_params, m_best_params
+    return g_d0_t0_best_params, g_d0_t1_best_params, g_d1_t0_best_params, g_d1_t1_best_params, m_best_params
 
 
 def fit_sensitivity_elements_did_cs(y, d, t, all_coef, predictions, score, in_sample_normalization, n_rep):
@@ -284,77 +358,93 @@ def fit_sensitivity_elements_did_cs(y, d, t, all_coef, predictions, score, in_sa
     psi_nu2 = np.full(shape=(n_obs, n_rep, n_treat), fill_value=np.nan)
 
     for i_rep in range(n_rep):
-
-        g_hat_d0_t0 = predictions['ml_g_d0_t0'][:, i_rep, 0]
-        g_hat_d0_t1 = predictions['ml_g_d0_t1'][:, i_rep, 0]
-        g_hat_d1_t0 = predictions['ml_g_d1_t0'][:, i_rep, 0]
-        g_hat_d1_t1 = predictions['ml_g_d1_t1'][:, i_rep, 0]
-
-        d0t0 = np.multiply(1.0-d, 1.0-t)
-        d0t1 = np.multiply(1.0-d, t)
-        d1t0 = np.multiply(d, 1.0-t)
+        g_hat_d0_t0 = predictions["ml_g_d0_t0"][:, i_rep, 0]
+        g_hat_d0_t1 = predictions["ml_g_d0_t1"][:, i_rep, 0]
+        g_hat_d1_t0 = predictions["ml_g_d1_t0"][:, i_rep, 0]
+        g_hat_d1_t1 = predictions["ml_g_d1_t1"][:, i_rep, 0]
+
+        d0t0 = np.multiply(1.0 - d, 1.0 - t)
+        d0t1 = np.multiply(1.0 - d, t)
+        d1t0 = np.multiply(d, 1.0 - t)
         d1t1 = np.multiply(d, t)
 
-        g_hat = np.multiply(d0t0, g_hat_d0_t0) + np.multiply(d0t1, g_hat_d0_t1) + \
-            np.multiply(d1t0, g_hat_d1_t0) + np.multiply(d1t1, g_hat_d1_t1)
+        g_hat = (
+            np.multiply(d0t0, g_hat_d0_t0)
+            + np.multiply(d0t1, g_hat_d0_t1)
+            + np.multiply(d1t0, g_hat_d1_t0)
+            + np.multiply(d1t1, g_hat_d1_t1)
+        )
         sigma2_score_element = np.square(y - g_hat)
         sigma2[0, i_rep, 0] = np.mean(sigma2_score_element)
         psi_sigma2[:, i_rep, 0] = sigma2_score_element - sigma2[0, i_rep, 0]
 
         p_hat = np.mean(d)
         lambda_hat = np.mean(t)
-        if score == 'observational':
-            m_hat = predictions['ml_m'][:, i_rep, 0]
-            propensity_weight_d0 = np.divide(m_hat, 1.0-m_hat)
+        if score == "observational":
+            m_hat = predictions["ml_m"][:, i_rep, 0]
+            propensity_weight_d0 = np.divide(m_hat, 1.0 - m_hat)
             if in_sample_normalization:
                 weight_d0t1 = np.multiply(d0t1, propensity_weight_d0)
                 weight_d0t0 = np.multiply(d0t0, propensity_weight_d0)
-                m_alpha_1 = np.divide(1.0, np.mean(d1t1)) + \
-                    np.divide(1.0, np.mean(d1t0)) + \
-                    np.divide(propensity_weight_d0, np.mean(weight_d0t1)) + \
-                    np.divide(propensity_weight_d0, np.mean(weight_d0t0))
+                m_alpha_1 = (
+                    np.divide(1.0, np.mean(d1t1))
+                    + np.divide(1.0, np.mean(d1t0))
+                    + np.divide(propensity_weight_d0, np.mean(weight_d0t1))
+                    + np.divide(propensity_weight_d0, np.mean(weight_d0t0))
+                )
                 m_alpha = np.multiply(np.divide(d, p_hat), m_alpha_1)
-                rr = np.divide(d1t1, np.mean(d1t1)) - \
-                    np.divide(d1t0, np.mean(d1t0)) - \
-                    np.divide(weight_d0t1, np.mean(weight_d0t1)) + \
-                    np.divide(weight_d0t0, np.mean(weight_d0t0))
+                rr = (
+                    np.divide(d1t1, np.mean(d1t1))
+                    - np.divide(d1t0, np.mean(d1t0))
+                    - np.divide(weight_d0t1, np.mean(weight_d0t1))
+                    + np.divide(weight_d0t0, np.mean(weight_d0t0))
+                )
             else:
-                m_alpha_1 = np.divide(1.0, np.multiply(p_hat, lambda_hat)) + \
-                    np.divide(1.0, np.multiply(p_hat, 1.0-lambda_hat)) + \
-                    np.divide(propensity_weight_d0, np.multiply(p_hat, lambda_hat)) + \
-                    np.divide(propensity_weight_d0, np.multiply(p_hat, 1.0-lambda_hat))
+                m_alpha_1 = (
+                    np.divide(1.0, np.multiply(p_hat, lambda_hat))
+                    + np.divide(1.0, np.multiply(p_hat, 1.0 - lambda_hat))
+                    + np.divide(propensity_weight_d0, np.multiply(p_hat, lambda_hat))
+                    + np.divide(propensity_weight_d0, np.multiply(p_hat, 1.0 - lambda_hat))
+                )
                 m_alpha = np.multiply(np.divide(d, p_hat), m_alpha_1)
-                rr = np.divide(d1t1, np.multiply(p_hat, lambda_hat)) - \
-                    np.divide(d1t0, np.multiply(p_hat, 1.0-lambda_hat)) - \
-                    np.multiply(np.divide(d0t1, np.multiply(p_hat, lambda_hat)), propensity_weight_d0) + \
-                    np.multiply(np.divide(d0t0, np.multiply(p_hat, 1.0-lambda_hat)), propensity_weight_d0)
+                rr = (
+                    np.divide(d1t1, np.multiply(p_hat, lambda_hat))
+                    - np.divide(d1t0, np.multiply(p_hat, 1.0 - lambda_hat))
+                    - np.multiply(np.divide(d0t1, np.multiply(p_hat, lambda_hat)), propensity_weight_d0)
+                    + np.multiply(np.divide(d0t0, np.multiply(p_hat, 1.0 - lambda_hat)), propensity_weight_d0)
+                )
         else:
-            assert score == 'experimental'
+            assert score == "experimental"
             if in_sample_normalization:
-                m_alpha = np.divide(1.0, np.mean(d1t1)) + \
-                    np.divide(1.0, np.mean(d1t0)) + \
-                    np.divide(1.0, np.mean(d0t1)) + \
-                    np.divide(1.0, np.mean(d0t0))
-                rr = np.divide(d1t1, np.mean(d1t1)) - \
-                    np.divide(d1t0, np.mean(d1t0)) - \
-                    np.divide(d0t1, np.mean(d0t1)) + \
-                    np.divide(d0t0, np.mean(d0t0))
+                m_alpha = (
+                    np.divide(1.0, np.mean(d1t1))
+                    + np.divide(1.0, np.mean(d1t0))
+                    + np.divide(1.0, np.mean(d0t1))
+                    + np.divide(1.0, np.mean(d0t0))
+                )
+                rr = (
+                    np.divide(d1t1, np.mean(d1t1))
+                    - np.divide(d1t0, np.mean(d1t0))
+                    - np.divide(d0t1, np.mean(d0t1))
+                    + np.divide(d0t0, np.mean(d0t0))
+                )
             else:
-                m_alpha = np.divide(1.0, np.multiply(p_hat, lambda_hat)) + \
-                    np.divide(1.0, np.multiply(p_hat, 1.0-lambda_hat)) + \
-                    np.divide(1.0, np.multiply(1.0-p_hat, lambda_hat)) + \
-                    np.divide(1.0, np.multiply(1.0-p_hat, 1.0-lambda_hat))
-                rr = np.divide(d1t1, np.multiply(p_hat, lambda_hat)) - \
-                    np.divide(d1t0, np.multiply(p_hat, 1.0-lambda_hat)) - \
-                    np.divide(d0t1, np.multiply(1.0-p_hat, lambda_hat)) + \
-                    np.divide(d0t0, np.multiply(1.0-p_hat, 1.0-lambda_hat))
+                m_alpha = (
+                    np.divide(1.0, np.multiply(p_hat, lambda_hat))
+                    + np.divide(1.0, np.multiply(p_hat, 1.0 - lambda_hat))
+                    + np.divide(1.0, np.multiply(1.0 - p_hat, lambda_hat))
+                    + np.divide(1.0, np.multiply(1.0 - p_hat, 1.0 - lambda_hat))
+                )
+                rr = (
+                    np.divide(d1t1, np.multiply(p_hat, lambda_hat))
+                    - np.divide(d1t0, np.multiply(p_hat, 1.0 - lambda_hat))
+                    - np.divide(d0t1, np.multiply(1.0 - p_hat, lambda_hat))
+                    + np.divide(d0t0, np.multiply(1.0 - p_hat, 1.0 - lambda_hat))
+                )
 
         nu2_score_element = np.multiply(2.0, m_alpha) - np.square(rr)
         nu2[0, i_rep, 0] = np.mean(nu2_score_element)
         psi_nu2[:, i_rep, 0] = nu2_score_element - nu2[0, i_rep, 0]
 
-    element_dict = {'sigma2': sigma2,
-                    'nu2': nu2,
-                    'psi_sigma2': psi_sigma2,
-                    'psi_nu2': psi_nu2}
+    element_dict = {"sigma2": sigma2, "nu2": nu2, "psi_sigma2": psi_sigma2, "psi_nu2": psi_nu2}
     return element_dict
diff --git a/doubleml/did/tests/_utils_did_manual.py b/doubleml/did/tests/_utils_did_manual.py
index 0fc98a6a2..e48c90423 100644
--- a/doubleml/did/tests/_utils_did_manual.py
+++ b/doubleml/did/tests/_utils_did_manual.py
@@ -1,14 +1,25 @@
 import numpy as np
 from sklearn.base import clone
 
-from ...tests._utils_boot import boot_manual, draw_weights
 from ...tests._utils import fit_predict, fit_predict_proba, tune_grid_search
+from ...tests._utils_boot import boot_manual, draw_weights
 
 
-def fit_did(y, x, d,
-            learner_g, learner_m, all_smpls, score, in_sample_normalization,
-            n_rep=1, g0_params=None, g1_params=None, m_params=None,
-            trimming_threshold=1e-2):
+def fit_did(
+    y,
+    x,
+    d,
+    learner_g,
+    learner_m,
+    all_smpls,
+    score,
+    in_sample_normalization,
+    n_rep=1,
+    g0_params=None,
+    g1_params=None,
+    m_params=None,
+    trimming_threshold=1e-2,
+):
     n_obs = len(y)
 
     thetas = np.zeros(n_rep)
@@ -22,12 +33,19 @@ def fit_did(y, x, d,
     for i_rep in range(n_rep):
         smpls = all_smpls[i_rep]
 
-        g_hat0_list, g_hat1_list, m_hat_list, \
-            p_hat_list = fit_nuisance_did(y, x, d,
-                                          learner_g, learner_m, smpls,
-                                          score,
-                                          g0_params=g0_params, g1_params=g1_params, m_params=m_params,
-                                          trimming_threshold=trimming_threshold)
+        g_hat0_list, g_hat1_list, m_hat_list, p_hat_list = fit_nuisance_did(
+            y,
+            x,
+            d,
+            learner_g,
+            learner_m,
+            smpls,
+            score,
+            g0_params=g0_params,
+            g1_params=g1_params,
+            m_params=m_params,
+            trimming_threshold=trimming_threshold,
+        )
 
         all_g_hat0.append(g_hat0_list)
         all_g_hat1.append(g_hat1_list)
@@ -35,10 +53,10 @@ def fit_did(y, x, d,
         all_p_hat.append(p_hat_list)
 
         resid_d0, g_hat0, g_hat1, m_hat, p_hat = compute_did_residuals(
-            y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls)
+            y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls
+        )
 
-        psi_a, psi_b = did_score_elements(g_hat0, g_hat1, m_hat, p_hat,
-                                          resid_d0, d, score, in_sample_normalization)
+        psi_a, psi_b = did_score_elements(g_hat0, g_hat1, m_hat, p_hat, resid_d0, d, score, in_sample_normalization)
 
         all_psi_a.append(psi_a)
         all_psi_b.append(psi_b)
@@ -48,51 +66,56 @@ def fit_did(y, x, d,
     theta = np.median(thetas)
     se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs)
 
-    res = {'theta': theta, 'se': se,
-           'thetas': thetas, 'ses': ses,
-           'all_g_hat0': all_g_hat0, 'all_g_hat1': all_g_hat1, 'all_m_hat': all_m_hat, 'all_p_hat': all_p_hat,
-           'all_psi_a': all_psi_a, 'all_psi_b': all_psi_b}
+    res = {
+        "theta": theta,
+        "se": se,
+        "thetas": thetas,
+        "ses": ses,
+        "all_g_hat0": all_g_hat0,
+        "all_g_hat1": all_g_hat1,
+        "all_m_hat": all_m_hat,
+        "all_p_hat": all_p_hat,
+        "all_psi_a": all_psi_a,
+        "all_psi_b": all_psi_b,
+    }
 
     return res
 
 
-def fit_nuisance_did(y, x, d, learner_g, learner_m, smpls, score,
-                     g0_params=None, g1_params=None, m_params=None,
-                     trimming_threshold=1e-12):
+def fit_nuisance_did(
+    y, x, d, learner_g, learner_m, smpls, score, g0_params=None, g1_params=None, m_params=None, trimming_threshold=1e-12
+):
     ml_g0 = clone(learner_g)
     ml_g1 = clone(learner_g)
     train_cond0 = np.where(d == 0)[0]
-    g_hat0_list = fit_predict(y, x, ml_g0, g0_params, smpls,
-                              train_cond=train_cond0)
+    g_hat0_list = fit_predict(y, x, ml_g0, g0_params, smpls, train_cond=train_cond0)
 
     train_cond1 = np.where(d == 1)[0]
-    g_hat1_list = fit_predict(y, x, ml_g1, g1_params, smpls,
-                              train_cond=train_cond1)
-    if score == 'experimental':
+    g_hat1_list = fit_predict(y, x, ml_g1, g1_params, smpls, train_cond=train_cond1)
+    if score == "experimental":
         m_hat_list = list()
         for idx, _ in enumerate(smpls):
             # fill it up, but its not further used
-            m_hat_list.append(np.zeros_like(g_hat0_list[idx], dtype='float64'))
+            m_hat_list.append(np.zeros_like(g_hat0_list[idx], dtype="float64"))
 
     else:
-        assert score == 'observational'
+        assert score == "observational"
         ml_m = clone(learner_m)
-        m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls,
-                                       trimming_threshold=trimming_threshold)
+        m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold)
 
     p_hat_list = []
-    for (train_index, _) in smpls:
+    for train_index, _ in smpls:
         p_hat_list.append(np.mean(d[train_index]))
 
     return g_hat0_list, g_hat1_list, m_hat_list, p_hat_list
 
 
 def compute_did_residuals(y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls):
-    resid_d0 = np.full_like(y, np.nan, dtype='float64')
-    g_hat0 = np.full_like(y, np.nan, dtype='float64')
-    g_hat1 = np.full_like(y, np.nan, dtype='float64')
-    m_hat = np.full_like(y, np.nan, dtype='float64')
-    p_hat = np.full_like(y, np.nan, dtype='float64')
+    resid_d0 = np.full_like(y, np.nan, dtype="float64")
+    g_hat0 = np.full_like(y, np.nan, dtype="float64")
+    g_hat1 = np.full_like(y, np.nan, dtype="float64")
+    m_hat = np.full_like(y, np.nan, dtype="float64")
+    p_hat = np.full_like(y, np.nan, dtype="float64")
     for idx, (_, test_index) in enumerate(smpls):
         resid_d0[test_index] = y[test_index] - g_hat0_list[idx]
         g_hat0[test_index] = g_hat0_list[idx]
@@ -105,54 +128,52 @@ def compute_did_residuals(y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, s
 
 def did_dml2(psi_a, psi_b):
     n_obs = len(psi_a)
-    theta_hat = - np.mean(psi_b) / np.mean(psi_a)
+    theta_hat = -np.mean(psi_b) / np.mean(psi_a)
     se = np.sqrt(var_did(theta_hat, psi_a, psi_b, n_obs))
 
     return theta_hat, se
 
 
 def did_score_elements(g_hat0, g_hat1, m_hat, p_hat, resid_d0, d, score, in_sample_normalization):
-
-    if score == 'observational':
+    if score == "observational":
         if in_sample_normalization:
             weight_psi_a = np.divide(d, np.mean(d))
-            propensity_weight = np.multiply(1.0-d, np.divide(m_hat, 1.0-m_hat))
+            propensity_weight = np.multiply(1.0 - d, np.divide(m_hat, 1.0 - m_hat))
             weight_resid_d0 = np.divide(d, np.mean(d)) - np.divide(propensity_weight, np.mean(propensity_weight))
         else:
             weight_psi_a = np.divide(d, p_hat)
-            weight_resid_d0 = np.divide(d-m_hat, np.multiply(p_hat, 1.0-m_hat))
+            weight_resid_d0 = np.divide(d - m_hat, np.multiply(p_hat, 1.0 - m_hat))
 
         psi_b_1 = np.zeros_like(d)
 
     else:
-        assert score == 'experimental'
+        assert score == "experimental"
         if in_sample_normalization:
             weight_psi_a = np.ones_like(d)
             weight_g0 = np.divide(d, np.mean(d)) - 1.0
             weight_g1 = 1.0 - np.divide(d, np.mean(d))
-            weight_resid_d0 = np.divide(d, np.mean(d)) - np.divide(1.0-d, np.mean(1.0-d))
+            weight_resid_d0 = np.divide(d, np.mean(d)) - np.divide(1.0 - d, np.mean(1.0 - d))
         else:
             weight_psi_a = np.ones_like(d)
             weight_g0 = np.divide(d, p_hat) - 1.0
             weight_g1 = 1.0 - np.divide(d, p_hat)
-            weight_resid_d0 = np.divide(d-p_hat, np.multiply(p_hat, 1.0-p_hat))
+            weight_resid_d0 = np.divide(d - p_hat, np.multiply(p_hat, 1.0 - p_hat))
 
-        psi_b_1 = np.multiply(weight_g0,  g_hat0) + np.multiply(weight_g1,  g_hat1)
+        psi_b_1 = np.multiply(weight_g0, g_hat0) + np.multiply(weight_g1, g_hat1)
 
     psi_a = -1.0 * weight_psi_a
-    psi_b = psi_b_1 + np.multiply(weight_resid_d0,  resid_d0)
+    psi_b = psi_b_1 + np.multiply(weight_resid_d0, resid_d0)
 
     return psi_a, psi_b
 
 
 def var_did(theta, psi_a, psi_b, n_obs):
     J = np.mean(psi_a)
-    var = 1/n_obs * np.mean(np.power(np.multiply(psi_a, theta) + psi_b, 2)) / np.power(J, 2)
+    var = 1 / n_obs * np.mean(np.power(np.multiply(psi_a, theta) + psi_b, 2)) / np.power(J, 2)
     return var
 
 
-def boot_did(y, thetas, ses, all_psi_a, all_psi_b,
-             all_smpls, bootstrap, n_rep_boot, n_rep=1, apply_cross_fitting=True):
+def boot_did(y, thetas, ses, all_psi_a, all_psi_b, all_smpls, bootstrap, n_rep_boot, n_rep=1, apply_cross_fitting=True):
     all_boot_t_stat = list()
     for i_rep in range(n_rep):
         smpls = all_smpls[i_rep]
@@ -163,8 +184,8 @@ def boot_did(y, thetas, ses, all_psi_a, all_psi_b,
             n_obs = len(test_index)
         weights = draw_weights(bootstrap, n_rep_boot, n_obs)
         boot_t_stat = boot_did_single_split(
-            thetas[i_rep], all_psi_a[i_rep], all_psi_b[i_rep], smpls,
-            ses[i_rep], weights, n_rep_boot, apply_cross_fitting)
+            thetas[i_rep], all_psi_a[i_rep], all_psi_b[i_rep], smpls, ses[i_rep], weights, n_rep_boot, apply_cross_fitting
+        )
         all_boot_t_stat.append(boot_t_stat)
 
     boot_t_stat = np.hstack(all_boot_t_stat)
@@ -172,9 +193,7 @@ def boot_did(y, thetas, ses, all_psi_a, all_psi_b,
     return boot_t_stat
 
 
-def boot_did_single_split(theta, psi_a, psi_b,
-                          smpls, se, weights, n_rep_boot, apply_cross_fitting):
-
+def boot_did_single_split(theta, psi_a, psi_b, smpls, se, weights, n_rep_boot, apply_cross_fitting):
     if apply_cross_fitting:
         J = np.mean(psi_a)
     else:
@@ -187,22 +206,19 @@ def boot_did_single_split(theta, psi_a, psi_b,
     return boot_t_stat
 
 
-def tune_nuisance_did(y, x, d, ml_g, ml_m, smpls, score, n_folds_tune,
-                      param_grid_g, param_grid_m):
+def tune_nuisance_did(y, x, d, ml_g, ml_m, smpls, score, n_folds_tune, param_grid_g, param_grid_m):
     train_cond0 = np.where(d == 0)[0]
-    g0_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune,
-                                   train_cond=train_cond0)
+    g0_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=train_cond0)
     g0_best_params = [xx.best_params_ for xx in g0_tune_res]
 
     train_cond1 = np.where(d == 1)[0]
-    g1_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune,
-                                   train_cond=train_cond1)
+    g1_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=train_cond1)
     g1_best_params = [xx.best_params_ for xx in g1_tune_res]
 
-    if score == 'experimental':
+    if score == "experimental":
         m_best_params = None
     else:
-        assert score == 'observational'
+        assert score == "observational"
         m_tune_res = tune_grid_search(d, x, ml_m, smpls, param_grid_m, n_folds_tune)
         m_best_params = [xx.best_params_ for xx in m_tune_res]
 
@@ -219,39 +235,36 @@ def fit_sensitivity_elements_did(y, d, all_coef, predictions, score, in_sample_n
     psi_nu2 = np.full(shape=(n_obs, n_rep, n_treat), fill_value=np.nan)
 
     for i_rep in range(n_rep):
+        g_hat0 = predictions["ml_g0"][:, i_rep, 0]
+        g_hat1 = predictions["ml_g1"][:, i_rep, 0]
 
-        g_hat0 = predictions['ml_g0'][:, i_rep, 0]
-        g_hat1 = predictions['ml_g1'][:, i_rep, 0]
-
-        sigma2_score_element = np.square(y - np.multiply(d, g_hat1) - np.multiply(1.0-d, g_hat0))
+        sigma2_score_element = np.square(y - np.multiply(d, g_hat1) - np.multiply(1.0 - d, g_hat0))
         sigma2[0, i_rep, 0] = np.mean(sigma2_score_element)
         psi_sigma2[:, i_rep, 0] = sigma2_score_element - sigma2[0, i_rep, 0]
 
-        if score == 'observational':
-            m_hat = predictions['ml_m'][:, i_rep, 0]
-            propensity_weight_d0 = np.divide(m_hat, 1.0-m_hat)
+        if score == "observational":
+            m_hat = predictions["ml_m"][:, i_rep, 0]
+            propensity_weight_d0 = np.divide(m_hat, 1.0 - m_hat)
             if in_sample_normalization:
                 m_alpha_1 = np.divide(d, np.mean(d))
-                m_alpha_2 = np.divide(1, np.mean(d)) + \
-                    np.divide(propensity_weight_d0, np.mean(np.multiply(1.0-d, propensity_weight_d0)))
+                m_alpha_2 = np.divide(1, np.mean(d)) + np.divide(
+                    propensity_weight_d0, np.mean(np.multiply(1.0 - d, propensity_weight_d0))
+                )
                 m_alpha = np.multiply(m_alpha_1, m_alpha_2)
-                rr_1 = np.multiply(1.0-d, propensity_weight_d0)
+                rr_1 = np.multiply(1.0 - d, propensity_weight_d0)
                 rr = np.divide(d, np.mean(d)) - np.divide(rr_1, np.mean(rr_1))
             else:
                 m_alpha_1 = np.divide(d, np.square(np.mean(d)))
-                m_alpha = np.multiply(m_alpha_1, 1.0+propensity_weight_d0)
-                rr = np.divide(d, np.mean(d)) - np.multiply(np.divide(1.0-d, np.mean(d)), propensity_weight_d0)
+                m_alpha = np.multiply(m_alpha_1, 1.0 + propensity_weight_d0)
+                rr = np.divide(d, np.mean(d)) - np.multiply(np.divide(1.0 - d, np.mean(d)), propensity_weight_d0)
         else:
-            assert score == 'experimental'
-            m_alpha = np.divide(1.0, np.mean(d)) + np.divide(1.0, 1.0-np.mean(d))
-            rr = np.divide(d, np.mean(d)) - np.divide(1.0-d, 1.0-np.mean(d))
+            assert score == "experimental"
+            m_alpha = np.divide(1.0, np.mean(d)) + np.divide(1.0, 1.0 - np.mean(d))
+            rr = np.divide(d, np.mean(d)) - np.divide(1.0 - d, 1.0 - np.mean(d))
 
         nu2_score_element = np.multiply(2.0, m_alpha) - np.square(rr)
         nu2[0, i_rep, 0] = np.mean(nu2_score_element)
         psi_nu2[:, i_rep, 0] = nu2_score_element - nu2[0, i_rep, 0]
 
-    element_dict = {'sigma2': sigma2,
-                    'nu2': nu2,
-                    'psi_sigma2': psi_sigma2,
-                    'psi_nu2': psi_nu2}
+    element_dict = {"sigma2": sigma2, "nu2": nu2, "psi_sigma2": psi_sigma2, "psi_nu2": psi_nu2}
     return element_dict
diff --git a/doubleml/did/tests/conftest.py b/doubleml/did/tests/conftest.py
index b2cf2c99d..90e8394c2 100644
--- a/doubleml/did/tests/conftest.py
+++ b/doubleml/did/tests/conftest.py
@@ -4,10 +4,7 @@
 from doubleml.datasets import make_did_SZ2020
 
 
-@pytest.fixture(scope='session',
-                params=[(500, 1),
-                        (1000, 1),
-                        (1000, 2)])
+@pytest.fixture(scope="session", params=[(500, 1), (1000, 1), (1000, 2)])
 def generate_data_did(request):
     params = request.param
     np.random.seed(1111)
@@ -16,15 +13,12 @@ def generate_data_did(request):
     dpg = params[1]
 
     # generating data
-    data = make_did_SZ2020(n, dgp_type=dpg, return_type='array')
+    data = make_did_SZ2020(n, dgp_type=dpg, return_type="array")
 
     return data
 
 
-@pytest.fixture(scope='session',
-                params=[(500, 1),
-                        (1000, 1),
-                        (1000, 2)])
+@pytest.fixture(scope="session", params=[(500, 1), (1000, 1), (1000, 2)])
 def generate_data_did_cs(request):
     params = request.param
     np.random.seed(1111)
@@ -33,6 +27,6 @@ def generate_data_did_cs(request):
     dpg = params[1]
 
     # generating data
-    data = make_did_SZ2020(n, dgp_type=dpg, cross_sectional_data=True, return_type='array')
+    data = make_did_SZ2020(n, dgp_type=dpg, cross_sectional_data=True, return_type="array")
 
     return data
diff --git a/doubleml/did/tests/test_did.py b/doubleml/did/tests/test_did.py
index 88e8539ea..a6160b39e 100644
--- a/doubleml/did/tests/test_did.py
+++ b/doubleml/did/tests/test_did.py
@@ -1,48 +1,49 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-
-from sklearn.linear_model import LogisticRegression, LinearRegression
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_did_manual import fit_did, boot_did, fit_sensitivity_elements_did
-
-
-@pytest.fixture(scope='module',
-                params=[[LinearRegression(),
-                         LogisticRegression(solver='lbfgs', max_iter=250)],
-                        [RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
-                         RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42)]])
+from ._utils_did_manual import boot_did, fit_did, fit_sensitivity_elements_did
+
+
+@pytest.fixture(
+    scope="module",
+    params=[
+        [LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250)],
+        [
+            RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
+            RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42),
+        ],
+    ],
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['observational', 'experimental'])
+@pytest.fixture(scope="module", params=["observational", "experimental"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def in_sample_normalization(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.1])
+@pytest.fixture(scope="module", params=[0.1])
 def trimming_threshold(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization, trimming_threshold):
-    boot_methods = ['normal']
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
 
@@ -59,96 +60,116 @@ def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization,
     obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d)
 
     np.random.seed(3141)
-    dml_did_obj = dml.DoubleMLDID(obj_dml_data,
-                                  ml_g, ml_m,
-                                  n_folds,
-                                  score=score,
-                                  in_sample_normalization=in_sample_normalization,
-                                  draw_sample_splitting=False,
-                                  trimming_threshold=trimming_threshold)
+    dml_did_obj = dml.DoubleMLDID(
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        n_folds,
+        score=score,
+        in_sample_normalization=in_sample_normalization,
+        draw_sample_splitting=False,
+        trimming_threshold=trimming_threshold,
+    )
 
     # synchronize the sample splitting
     dml_did_obj.set_sample_splitting(all_smpls=all_smpls)
     dml_did_obj.fit()
 
     np.random.seed(3141)
-    res_manual = fit_did(y, x, d,
-                         clone(learner[0]), clone(learner[1]),
-                         all_smpls, score, in_sample_normalization,
-                         trimming_threshold=trimming_threshold)
-
-    res_dict = {'coef': dml_did_obj.coef,
-                'coef_manual': res_manual['theta'],
-                'se': dml_did_obj.se,
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_manual = fit_did(
+        y,
+        x,
+        d,
+        clone(learner[0]),
+        clone(learner[1]),
+        all_smpls,
+        score,
+        in_sample_normalization,
+        trimming_threshold=trimming_threshold,
+    )
+
+    res_dict = {
+        "coef": dml_did_obj.coef,
+        "coef_manual": res_manual["theta"],
+        "se": dml_did_obj.se,
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_did(y, res_manual['thetas'], res_manual['ses'],
-                               res_manual['all_psi_a'], res_manual['all_psi_b'],
-                               all_smpls, bootstrap, n_rep_boot)
+        boot_t_stat = boot_did(
+            y,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_psi_a"],
+            res_manual["all_psi_b"],
+            all_smpls,
+            bootstrap,
+            n_rep_boot,
+        )
 
         np.random.seed(3141)
         dml_did_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_did_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_did_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     # sensitivity tests
-    res_dict['sensitivity_elements'] = dml_did_obj.sensitivity_elements
-    res_dict['sensitivity_elements_manual'] = fit_sensitivity_elements_did(y, d,
-                                                                           all_coef=dml_did_obj.all_coef,
-                                                                           predictions=dml_did_obj.predictions,
-                                                                           score=score,
-                                                                           in_sample_normalization=in_sample_normalization,
-                                                                           n_rep=1)
+    res_dict["sensitivity_elements"] = dml_did_obj.sensitivity_elements
+    res_dict["sensitivity_elements_manual"] = fit_sensitivity_elements_did(
+        y,
+        d,
+        all_coef=dml_did_obj.all_coef,
+        predictions=dml_did_obj.predictions,
+        score=score,
+        in_sample_normalization=in_sample_normalization,
+        n_rep=1,
+    )
 
     # check if sensitivity score with rho=0 gives equal asymptotic standard deviation
     dml_did_obj.sensitivity_analysis(rho=0.0)
-    res_dict['sensitivity_ses'] = dml_did_obj.sensitivity_params['se']
+    res_dict["sensitivity_ses"] = dml_did_obj.sensitivity_params["se"]
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_did_coef(dml_did_fixture):
-    assert math.isclose(dml_did_fixture['coef'][0],
-                        dml_did_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_did_fixture["coef"][0], dml_did_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_did_se(dml_did_fixture):
-    assert math.isclose(dml_did_fixture['se'][0],
-                        dml_did_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_did_fixture["se"][0], dml_did_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_did_boot(dml_did_fixture):
-    for bootstrap in dml_did_fixture['boot_methods']:
-        assert np.allclose(dml_did_fixture['boot_t_stat' + bootstrap],
-                           dml_did_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_did_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_did_fixture["boot_t_stat" + bootstrap],
+            dml_did_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
 
 
 @pytest.mark.ci
 def test_dml_did_sensitivity(dml_did_fixture):
-    sensitivity_element_names = ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2']
+    sensitivity_element_names = ["sigma2", "nu2", "psi_sigma2", "psi_nu2"]
     for sensitivity_element in sensitivity_element_names:
-        assert np.allclose(dml_did_fixture['sensitivity_elements'][sensitivity_element],
-                           dml_did_fixture['sensitivity_elements_manual'][sensitivity_element],
-                           rtol=1e-9, atol=1e-4)
+        assert np.allclose(
+            dml_did_fixture["sensitivity_elements"][sensitivity_element],
+            dml_did_fixture["sensitivity_elements_manual"][sensitivity_element],
+            rtol=1e-9,
+            atol=1e-4,
+        )
 
 
 @pytest.mark.ci
 def test_dml_did_sensitivity_rho0(dml_did_fixture):
-    assert np.allclose(dml_did_fixture['se'],
-                       dml_did_fixture['sensitivity_ses']['lower'],
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_did_fixture['se'],
-                       dml_did_fixture['sensitivity_ses']['upper'],
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_did_fixture["se"], dml_did_fixture["sensitivity_ses"]["lower"], rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_did_fixture["se"], dml_did_fixture["sensitivity_ses"]["upper"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
@@ -164,25 +185,21 @@ def test_dml_did_experimental(generate_data_did, in_sample_normalization, learne
     obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d)
 
     np.random.seed(3141)
-    dml_did_obj_without_ml_m = dml.DoubleMLDID(obj_dml_data,
-                                               ml_g,
-                                               score='experimental',
-                                               in_sample_normalization=in_sample_normalization)
+    dml_did_obj_without_ml_m = dml.DoubleMLDID(
+        obj_dml_data, ml_g, score="experimental", in_sample_normalization=in_sample_normalization
+    )
     dml_did_obj_without_ml_m.fit()
 
     np.random.seed(3141)
-    dml_did_obj_with_ml_m = dml.DoubleMLDID(obj_dml_data,
-                                            ml_g, ml_m,
-                                            score='experimental',
-                                            in_sample_normalization=in_sample_normalization)
+    dml_did_obj_with_ml_m = dml.DoubleMLDID(
+        obj_dml_data, ml_g, ml_m, score="experimental", in_sample_normalization=in_sample_normalization
+    )
     dml_did_obj_with_ml_m.fit()
-    assert math.isclose(dml_did_obj_with_ml_m.coef[0],
-                        dml_did_obj_without_ml_m.coef[0],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_did_obj_with_ml_m.coef[0], dml_did_obj_without_ml_m.coef[0], rel_tol=1e-9, abs_tol=1e-4)
 
-    msg = ('A learner ml_m has been provided for score = "experimental" but will be ignored. '
-           'A learner ml_m is not required for estimation.')
+    msg = (
+        'A learner ml_m has been provided for score = "experimental" but will be ignored. '
+        "A learner ml_m is not required for estimation."
+    )
     with pytest.warns(UserWarning, match=msg):
-        dml.DoubleMLDID(obj_dml_data, ml_g, ml_m,
-                        score='experimental',
-                        in_sample_normalization=in_sample_normalization)
+        dml.DoubleMLDID(obj_dml_data, ml_g, ml_m, score="experimental", in_sample_normalization=in_sample_normalization)
diff --git a/doubleml/did/tests/test_did_cs.py b/doubleml/did/tests/test_did_cs.py
index df3460b48..ae6335883 100644
--- a/doubleml/did/tests/test_did_cs.py
+++ b/doubleml/did/tests/test_did_cs.py
@@ -1,11 +1,10 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-
-from sklearn.linear_model import LogisticRegression, LinearRegression
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
 
@@ -14,36 +13,38 @@
 from ._utils_did_manual import boot_did
 
 
-@pytest.fixture(scope='module',
-                params=[[LinearRegression(),
-                         LogisticRegression(solver='lbfgs', max_iter=250)],
-                        [RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
-                         RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42)]])
+@pytest.fixture(
+    scope="module",
+    params=[
+        [LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250)],
+        [
+            RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
+            RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42),
+        ],
+    ],
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['observational', 'experimental'])
+@pytest.fixture(scope="module", params=["observational", "experimental"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def in_sample_normalization(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.1])
+@pytest.fixture(scope="module", params=[0.1])
 def trimming_threshold(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normalization, trimming_threshold):
-    boot_methods = ['normal']
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
 
@@ -57,99 +58,121 @@ def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normaliza
     np.random.seed(3141)
     n_obs = len(y)
 
-    all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d+2*t)
+    all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d + 2 * t)
     obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, t=t)
 
     np.random.seed(3141)
-    dml_did_cs_obj = dml.DoubleMLDIDCS(obj_dml_data,
-                                       ml_g, ml_m,
-                                       n_folds,
-                                       score=score,
-                                       in_sample_normalization=in_sample_normalization,
-                                       draw_sample_splitting=False,
-                                       trimming_threshold=trimming_threshold)
+    dml_did_cs_obj = dml.DoubleMLDIDCS(
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        n_folds,
+        score=score,
+        in_sample_normalization=in_sample_normalization,
+        draw_sample_splitting=False,
+        trimming_threshold=trimming_threshold,
+    )
 
     # synchronize the sample splitting
     dml_did_cs_obj.set_sample_splitting(all_smpls=all_smpls)
     dml_did_cs_obj.fit()
 
     np.random.seed(3141)
-    res_manual = fit_did_cs(y, x, d, t,
-                            clone(learner[0]), clone(learner[1]),
-                            all_smpls, score, in_sample_normalization,
-                            trimming_threshold=trimming_threshold)
-
-    res_dict = {'coef': dml_did_cs_obj.coef,
-                'coef_manual': res_manual['theta'],
-                'se': dml_did_cs_obj.se,
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_manual = fit_did_cs(
+        y,
+        x,
+        d,
+        t,
+        clone(learner[0]),
+        clone(learner[1]),
+        all_smpls,
+        score,
+        in_sample_normalization,
+        trimming_threshold=trimming_threshold,
+    )
+
+    res_dict = {
+        "coef": dml_did_cs_obj.coef,
+        "coef_manual": res_manual["theta"],
+        "se": dml_did_cs_obj.se,
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_did(y, res_manual['thetas'], res_manual['ses'],
-                               res_manual['all_psi_a'], res_manual['all_psi_b'],
-                               all_smpls, bootstrap, n_rep_boot)
+        boot_t_stat = boot_did(
+            y,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_psi_a"],
+            res_manual["all_psi_b"],
+            all_smpls,
+            bootstrap,
+            n_rep_boot,
+        )
 
         np.random.seed(3141)
         dml_did_cs_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_did_cs_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_did_cs_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     # sensitivity tests
-    res_dict['sensitivity_elements'] = dml_did_cs_obj.sensitivity_elements
-    res_dict['sensitivity_elements_manual'] = fit_sensitivity_elements_did_cs(y, d, t,
-                                                                              all_coef=dml_did_cs_obj.all_coef,
-                                                                              predictions=dml_did_cs_obj.predictions,
-                                                                              score=score,
-                                                                              in_sample_normalization=in_sample_normalization,
-                                                                              n_rep=1)
+    res_dict["sensitivity_elements"] = dml_did_cs_obj.sensitivity_elements
+    res_dict["sensitivity_elements_manual"] = fit_sensitivity_elements_did_cs(
+        y,
+        d,
+        t,
+        all_coef=dml_did_cs_obj.all_coef,
+        predictions=dml_did_cs_obj.predictions,
+        score=score,
+        in_sample_normalization=in_sample_normalization,
+        n_rep=1,
+    )
 
     # check if sensitivity score with rho=0 gives equal asymptotic standard deviation
     dml_did_cs_obj.sensitivity_analysis(rho=0.0)
-    res_dict['sensitivity_ses'] = dml_did_cs_obj.sensitivity_params['se']
+    res_dict["sensitivity_ses"] = dml_did_cs_obj.sensitivity_params["se"]
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_did_cs_coef(dml_did_cs_fixture):
-    assert math.isclose(dml_did_cs_fixture['coef'][0],
-                        dml_did_cs_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_did_cs_fixture["coef"][0], dml_did_cs_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_did_cs_se(dml_did_cs_fixture):
-    assert math.isclose(dml_did_cs_fixture['se'][0],
-                        dml_did_cs_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_did_cs_fixture["se"][0], dml_did_cs_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_did_cs_boot(dml_did_cs_fixture):
-    for bootstrap in dml_did_cs_fixture['boot_methods']:
-        assert np.allclose(dml_did_cs_fixture['boot_t_stat' + bootstrap],
-                           dml_did_cs_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_did_cs_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_did_cs_fixture["boot_t_stat" + bootstrap],
+            dml_did_cs_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
 
 
 @pytest.mark.ci
 def test_dml_did_cs_sensitivity(dml_did_cs_fixture):
-    sensitivity_element_names = ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2']
+    sensitivity_element_names = ["sigma2", "nu2", "psi_sigma2", "psi_nu2"]
     for sensitivity_element in sensitivity_element_names:
-        assert np.allclose(dml_did_cs_fixture['sensitivity_elements'][sensitivity_element],
-                           dml_did_cs_fixture['sensitivity_elements_manual'][sensitivity_element],
-                           rtol=1e-9, atol=1e-4)
+        assert np.allclose(
+            dml_did_cs_fixture["sensitivity_elements"][sensitivity_element],
+            dml_did_cs_fixture["sensitivity_elements_manual"][sensitivity_element],
+            rtol=1e-9,
+            atol=1e-4,
+        )
 
 
 @pytest.mark.ci
 def test_dml_did_cs_sensitivity_rho0(dml_did_cs_fixture):
-    assert np.allclose(dml_did_cs_fixture['se'],
-                       dml_did_cs_fixture['sensitivity_ses']['lower'],
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_did_cs_fixture['se'],
-                       dml_did_cs_fixture['sensitivity_ses']['upper'],
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_did_cs_fixture["se"], dml_did_cs_fixture["sensitivity_ses"]["lower"], rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_did_cs_fixture["se"], dml_did_cs_fixture["sensitivity_ses"]["upper"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
@@ -165,25 +188,21 @@ def test_dml_did_cs_experimental(generate_data_did_cs, in_sample_normalization,
     obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, t=t)
 
     np.random.seed(3141)
-    dml_did_obj_without_ml_m = dml.DoubleMLDIDCS(obj_dml_data,
-                                                 ml_g,
-                                                 score='experimental',
-                                                 in_sample_normalization=in_sample_normalization)
+    dml_did_obj_without_ml_m = dml.DoubleMLDIDCS(
+        obj_dml_data, ml_g, score="experimental", in_sample_normalization=in_sample_normalization
+    )
     dml_did_obj_without_ml_m.fit()
 
     np.random.seed(3141)
-    dml_did_obj_with_ml_m = dml.DoubleMLDIDCS(obj_dml_data,
-                                              ml_g, ml_m,
-                                              score='experimental',
-                                              in_sample_normalization=in_sample_normalization)
+    dml_did_obj_with_ml_m = dml.DoubleMLDIDCS(
+        obj_dml_data, ml_g, ml_m, score="experimental", in_sample_normalization=in_sample_normalization
+    )
     dml_did_obj_with_ml_m.fit()
-    assert math.isclose(dml_did_obj_with_ml_m.coef[0],
-                        dml_did_obj_without_ml_m.coef[0],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_did_obj_with_ml_m.coef[0], dml_did_obj_without_ml_m.coef[0], rel_tol=1e-9, abs_tol=1e-4)
 
-    msg = ('A learner ml_m has been provided for score = "experimental" but will be ignored. '
-           'A learner ml_m is not required for estimation.')
+    msg = (
+        'A learner ml_m has been provided for score = "experimental" but will be ignored. '
+        "A learner ml_m is not required for estimation."
+    )
     with pytest.warns(UserWarning, match=msg):
-        dml.DoubleMLDIDCS(obj_dml_data, ml_g, ml_m,
-                          score='experimental',
-                          in_sample_normalization=in_sample_normalization)
+        dml.DoubleMLDIDCS(obj_dml_data, ml_g, ml_m, score="experimental", in_sample_normalization=in_sample_normalization)
diff --git a/doubleml/did/tests/test_did_cs_external_predictions.py b/doubleml/did/tests/test_did_cs_external_predictions.py
index 732aaa54d..f4a479971 100644
--- a/doubleml/did/tests/test_did_cs_external_predictions.py
+++ b/doubleml/did/tests/test_did_cs_external_predictions.py
@@ -1,10 +1,13 @@
+import math
+
 import numpy as np
 import pytest
-import math
 from sklearn.linear_model import LinearRegression, LogisticRegression
+
 from doubleml import DoubleMLDIDCS
 from doubleml.datasets import make_did_SZ2020
-from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
+from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
+
 from ...tests._utils import draw_smpls
 
 
@@ -23,13 +26,7 @@ def doubleml_didcs_fixture(did_score, n_rep):
     ext_predictions = {"d": {}}
     dml_data = make_did_SZ2020(n_obs=500, cross_sectional_data=True, return_type="DoubleMLData")
     all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.d)
-    kwargs = {
-        "obj_dml_data": dml_data,
-        "score": did_score,
-        "n_rep": n_rep,
-        "n_folds": 5,
-        "draw_sample_splitting": False
-    }
+    kwargs = {"obj_dml_data": dml_data, "score": did_score, "n_rep": n_rep, "n_folds": 5, "draw_sample_splitting": False}
     dml_did_cs = DoubleMLDIDCS(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
     dml_did_cs.set_sample_splitting(all_smpls)
     np.random.seed(3141)
diff --git a/doubleml/did/tests/test_did_cs_tune.py b/doubleml/did/tests/test_did_cs_tune.py
index bc8abec84..5ec33e822 100644
--- a/doubleml/did/tests/test_did_cs_tune.py
+++ b/doubleml/did/tests/test_did_cs_tune.py
@@ -1,65 +1,58 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-
-from sklearn.linear_model import LogisticRegression
 from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import LogisticRegression
 
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_did_manual import boot_did
 from ._utils_did_cs_manual import fit_did_cs, tune_nuisance_did_cs
+from ._utils_did_manual import boot_did
 
 
-@pytest.fixture(scope='module',
-                params=[RandomForestRegressor(random_state=42)])
+@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
 def learner_g(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[LogisticRegression()])
+@pytest.fixture(scope="module", params=[LogisticRegression()])
 def learner_m(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['observational', 'experimental'])
+@pytest.fixture(scope="module", params=["observational", "experimental"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def in_sample_normalization(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def tune_on_folds(request):
     return request.param
 
 
 def get_par_grid(learner):
     if learner.__class__ in [RandomForestRegressor]:
-        par_grid = {'n_estimators': [5, 10, 20]}
+        par_grid = {"n_estimators": [5, 10, 20]}
     else:
         assert learner.__class__ in [LogisticRegression]
-        par_grid = {'C': np.logspace(-4, 2, 10)}
+        par_grid = {"C": np.logspace(-4, 2, 10)}
     return par_grid
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_did_cs_fixture(generate_data_did_cs, learner_g, learner_m, score, in_sample_normalization, tune_on_folds):
-    par_grid = {'ml_g': get_par_grid(learner_g),
-                'ml_m': get_par_grid(learner_m)}
+    par_grid = {"ml_g": get_par_grid(learner_g), "ml_m": get_par_grid(learner_m)}
     n_folds_tune = 4
 
-    boot_methods = ['normal']
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
 
@@ -71,24 +64,25 @@ def dml_did_cs_fixture(generate_data_did_cs, learner_g, learner_m, score, in_sam
     ml_m = clone(learner_m)
 
     n_obs = len(y)
-    all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d+2*t)
+    all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d + 2 * t)
 
     np.random.seed(3141)
     obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, t=t)
-    dml_did_cs_obj = dml.DoubleMLDIDCS(obj_dml_data,
-                                       ml_g, ml_m,
-                                       n_folds,
-                                       score=score,
-                                       in_sample_normalization=in_sample_normalization,
-                                       draw_sample_splitting=False)
+    dml_did_cs_obj = dml.DoubleMLDIDCS(
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        n_folds,
+        score=score,
+        in_sample_normalization=in_sample_normalization,
+        draw_sample_splitting=False,
+    )
     # synchronize the sample splitting
     dml_did_cs_obj.set_sample_splitting(all_smpls=all_smpls)
 
     np.random.seed(3141)
     # tune hyperparameters
-    tune_res = dml_did_cs_obj.tune(par_grid, tune_on_folds=tune_on_folds,
-                                   n_folds_tune=n_folds_tune,
-                                   return_tune_res=False)
+    tune_res = dml_did_cs_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune, return_tune_res=False)
     assert isinstance(tune_res, dml.DoubleMLDIDCS)
 
     dml_did_cs_obj.fit()
@@ -97,71 +91,86 @@ def dml_did_cs_fixture(generate_data_did_cs, learner_g, learner_m, score, in_sam
     smpls = all_smpls[0]
 
     if tune_on_folds:
-        g_d0_t0_params, g_d0_t1_params, g_d1_t0_params, g_d1_t1_params, \
-            m_params = tune_nuisance_did_cs(y, x, d, t,
-                                            clone(learner_g), clone(learner_m),
-                                            smpls, score, n_folds_tune,
-                                            par_grid['ml_g'], par_grid['ml_m'])
+        g_d0_t0_params, g_d0_t1_params, g_d1_t0_params, g_d1_t1_params, m_params = tune_nuisance_did_cs(
+            y, x, d, t, clone(learner_g), clone(learner_m), smpls, score, n_folds_tune, par_grid["ml_g"], par_grid["ml_m"]
+        )
     else:
         xx = [(np.arange(len(y)), np.array([]))]
-        g_d0_t0_params, g_d0_t1_params, g_d1_t0_params, g_d1_t1_params, \
-            m_params = tune_nuisance_did_cs(y, x, d, t,
-                                            clone(learner_g), clone(learner_m),
-                                            xx, score, n_folds_tune,
-                                            par_grid['ml_g'], par_grid['ml_m'])
+        g_d0_t0_params, g_d0_t1_params, g_d1_t0_params, g_d1_t1_params, m_params = tune_nuisance_did_cs(
+            y, x, d, t, clone(learner_g), clone(learner_m), xx, score, n_folds_tune, par_grid["ml_g"], par_grid["ml_m"]
+        )
         g_d0_t0_params = g_d0_t0_params * n_folds
         g_d0_t1_params = g_d0_t1_params * n_folds
         g_d1_t0_params = g_d1_t0_params * n_folds
         g_d1_t1_params = g_d1_t1_params * n_folds
-        if score == 'observational':
+        if score == "observational":
             m_params = m_params * n_folds
         else:
-            assert score == 'experimental'
+            assert score == "experimental"
             m_params = None
 
-    res_manual = fit_did_cs(y, x, d, t, clone(learner_g), clone(learner_m),
-                            all_smpls, score, in_sample_normalization,
-                            g_d0_t0_params=g_d0_t0_params, g_d0_t1_params=g_d0_t1_params,
-                            g_d1_t0_params=g_d1_t0_params, g_d1_t1_params=g_d1_t1_params,
-                            m_params=m_params)
-
-    res_dict = {'coef': dml_did_cs_obj.coef,
-                'coef_manual': res_manual['theta'],
-                'se': dml_did_cs_obj.se,
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_manual = fit_did_cs(
+        y,
+        x,
+        d,
+        t,
+        clone(learner_g),
+        clone(learner_m),
+        all_smpls,
+        score,
+        in_sample_normalization,
+        g_d0_t0_params=g_d0_t0_params,
+        g_d0_t1_params=g_d0_t1_params,
+        g_d1_t0_params=g_d1_t0_params,
+        g_d1_t1_params=g_d1_t1_params,
+        m_params=m_params,
+    )
+
+    res_dict = {
+        "coef": dml_did_cs_obj.coef,
+        "coef_manual": res_manual["theta"],
+        "se": dml_did_cs_obj.se,
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_did(y, res_manual['thetas'], res_manual['ses'],
-                               res_manual['all_psi_a'], res_manual['all_psi_b'],
-                               all_smpls, bootstrap, n_rep_boot)
+        boot_t_stat = boot_did(
+            y,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_psi_a"],
+            res_manual["all_psi_b"],
+            all_smpls,
+            bootstrap,
+            n_rep_boot,
+        )
 
         np.random.seed(3141)
         dml_did_cs_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_did_cs_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_did_cs_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_did_cs_coef(dml_did_cs_fixture):
-    assert math.isclose(dml_did_cs_fixture['coef'][0],
-                        dml_did_cs_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_did_cs_fixture["coef"][0], dml_did_cs_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_did_cs_se(dml_did_cs_fixture):
-    assert math.isclose(dml_did_cs_fixture['se'][0],
-                        dml_did_cs_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_did_cs_fixture["se"][0], dml_did_cs_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_did_cs_boot(dml_did_cs_fixture):
-    for bootstrap in dml_did_cs_fixture['boot_methods']:
-        assert np.allclose(dml_did_cs_fixture['boot_t_stat' + bootstrap],
-                           dml_did_cs_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_did_cs_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_did_cs_fixture["boot_t_stat" + bootstrap],
+            dml_did_cs_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
diff --git a/doubleml/did/tests/test_did_external_predictions.py b/doubleml/did/tests/test_did_external_predictions.py
index 676302e96..9027e7dc7 100644
--- a/doubleml/did/tests/test_did_external_predictions.py
+++ b/doubleml/did/tests/test_did_external_predictions.py
@@ -1,10 +1,13 @@
+import math
+
 import numpy as np
 import pytest
-import math
 from sklearn.linear_model import LinearRegression, LogisticRegression
+
 from doubleml import DoubleMLDID
 from doubleml.datasets import make_did_SZ2020
-from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
+from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
+
 from ...tests._utils import draw_smpls
 
 
@@ -23,12 +26,7 @@ def doubleml_did_fixture(did_score, n_rep):
     ext_predictions = {"d": {}}
     dml_data = make_did_SZ2020(n_obs=500, return_type="DoubleMLData")
     all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.d)
-    kwargs = {
-        "obj_dml_data": dml_data,
-        "score": did_score,
-        "n_rep": n_rep,
-        "draw_sample_splitting": False
-    }
+    kwargs = {"obj_dml_data": dml_data, "score": did_score, "n_rep": n_rep, "draw_sample_splitting": False}
     dml_did = DoubleMLDID(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
     dml_did.set_sample_splitting(all_smpls)
     np.random.seed(3141)
diff --git a/doubleml/did/tests/test_did_tune.py b/doubleml/did/tests/test_did_tune.py
index da85f6934..c5a381f3c 100644
--- a/doubleml/did/tests/test_did_tune.py
+++ b/doubleml/did/tests/test_did_tune.py
@@ -1,65 +1,57 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-
-from sklearn.linear_model import LogisticRegression
 from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import LogisticRegression
 
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_did_manual import fit_did, boot_did, tune_nuisance_did
+from ._utils_did_manual import boot_did, fit_did, tune_nuisance_did
 
 
-@pytest.fixture(scope='module',
-                params=[RandomForestRegressor(random_state=42)])
+@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
 def learner_g(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[LogisticRegression()])
+@pytest.fixture(scope="module", params=[LogisticRegression()])
 def learner_m(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['observational', 'experimental'])
+@pytest.fixture(scope="module", params=["observational", "experimental"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def in_sample_normalization(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def tune_on_folds(request):
     return request.param
 
 
 def get_par_grid(learner):
     if learner.__class__ in [RandomForestRegressor]:
-        par_grid = {'n_estimators': [5, 10, 20]}
+        par_grid = {"n_estimators": [5, 10, 20]}
     else:
         assert learner.__class__ in [LogisticRegression]
-        par_grid = {'C': np.logspace(-4, 2, 10)}
+        par_grid = {"C": np.logspace(-4, 2, 10)}
     return par_grid
 
 
-@pytest.fixture(scope='module')
-def dml_did_fixture(generate_data_did, learner_g, learner_m, score, in_sample_normalization,
-                    tune_on_folds):
-    par_grid = {'ml_g': get_par_grid(learner_g),
-                'ml_m': get_par_grid(learner_m)}
+@pytest.fixture(scope="module")
+def dml_did_fixture(generate_data_did, learner_g, learner_m, score, in_sample_normalization, tune_on_folds):
+    par_grid = {"ml_g": get_par_grid(learner_g), "ml_m": get_par_grid(learner_m)}
     n_folds_tune = 4
 
-    boot_methods = ['normal']
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
 
@@ -74,18 +66,20 @@ def dml_did_fixture(generate_data_did, learner_g, learner_m, score, in_sample_no
 
     np.random.seed(3141)
     obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d)
-    dml_did_obj = dml.DoubleMLDID(obj_dml_data,
-                                  ml_g, ml_m,
-                                  n_folds,
-                                  score=score,
-                                  in_sample_normalization=in_sample_normalization,
-                                  draw_sample_splitting=False)
+    dml_did_obj = dml.DoubleMLDID(
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        n_folds,
+        score=score,
+        in_sample_normalization=in_sample_normalization,
+        draw_sample_splitting=False,
+    )
     # synchronize the sample splitting
     dml_did_obj.set_sample_splitting(all_smpls=all_smpls)
 
     # tune hyperparameters
-    tune_res = dml_did_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune,
-                                return_tune_res=False)
+    tune_res = dml_did_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune, return_tune_res=False)
     assert isinstance(tune_res, dml.DoubleMLDID)
 
     dml_did_obj.fit()
@@ -94,66 +88,82 @@ def dml_did_fixture(generate_data_did, learner_g, learner_m, score, in_sample_no
     smpls = all_smpls[0]
 
     if tune_on_folds:
-        g0_params, g1_params, m_params = tune_nuisance_did(y, x, d,
-                                                           clone(learner_g), clone(learner_m), smpls, score,
-                                                           n_folds_tune,
-                                                           par_grid['ml_g'], par_grid['ml_m'])
+        g0_params, g1_params, m_params = tune_nuisance_did(
+            y, x, d, clone(learner_g), clone(learner_m), smpls, score, n_folds_tune, par_grid["ml_g"], par_grid["ml_m"]
+        )
     else:
         xx = [(np.arange(len(y)), np.array([]))]
-        g0_params, g1_params, m_params = tune_nuisance_did(y, x, d,
-                                                           clone(learner_g), clone(learner_m), xx, score,
-                                                           n_folds_tune,
-                                                           par_grid['ml_g'], par_grid['ml_m'])
+        g0_params, g1_params, m_params = tune_nuisance_did(
+            y, x, d, clone(learner_g), clone(learner_m), xx, score, n_folds_tune, par_grid["ml_g"], par_grid["ml_m"]
+        )
         g0_params = g0_params * n_folds
-        if score == 'experimental':
+        if score == "experimental":
             g1_params = g1_params * n_folds
             m_params = None
         else:
-            assert score == 'observational'
+            assert score == "observational"
             g1_params = None
             m_params = m_params * n_folds
 
-    res_manual = fit_did(y, x, d, clone(learner_g), clone(learner_m),
-                         all_smpls, score, in_sample_normalization,
-                         g0_params=g0_params, g1_params=g1_params, m_params=m_params)
-
-    res_dict = {'coef': dml_did_obj.coef,
-                'coef_manual': res_manual['theta'],
-                'se': dml_did_obj.se,
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_manual = fit_did(
+        y,
+        x,
+        d,
+        clone(learner_g),
+        clone(learner_m),
+        all_smpls,
+        score,
+        in_sample_normalization,
+        g0_params=g0_params,
+        g1_params=g1_params,
+        m_params=m_params,
+    )
+
+    res_dict = {
+        "coef": dml_did_obj.coef,
+        "coef_manual": res_manual["theta"],
+        "se": dml_did_obj.se,
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_did(y, res_manual['thetas'], res_manual['ses'],
-                               res_manual['all_psi_a'], res_manual['all_psi_b'],
-                               all_smpls, bootstrap, n_rep_boot)
+        boot_t_stat = boot_did(
+            y,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_psi_a"],
+            res_manual["all_psi_b"],
+            all_smpls,
+            bootstrap,
+            n_rep_boot,
+        )
 
         np.random.seed(3141)
         dml_did_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_did_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_did_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_did_coef(dml_did_fixture):
-    assert math.isclose(dml_did_fixture['coef'][0],
-                        dml_did_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_did_fixture["coef"][0], dml_did_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_did_se(dml_did_fixture):
-    assert math.isclose(dml_did_fixture['se'][0],
-                        dml_did_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_did_fixture["se"][0], dml_did_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_did_boot(dml_did_fixture):
-    for bootstrap in dml_did_fixture['boot_methods']:
-        assert np.allclose(dml_did_fixture['boot_t_stat' + bootstrap],
-                           dml_did_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_did_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_did_fixture["boot_t_stat" + bootstrap],
+            dml_did_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 71f8b4418..e83a9b786 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1,43 +1,36 @@
-import numpy as np
-import pandas as pd
-import warnings
 import copy
+import warnings
+from abc import ABC, abstractmethod
 
-from sklearn.base import is_regressor, is_classifier
-
+import numpy as np
+import pandas as pd
 from scipy.stats import norm
-
-from abc import ABC, abstractmethod
+from sklearn.base import is_classifier, is_regressor
 
 from .double_ml_data import DoubleMLBaseData, DoubleMLClusterData
 from .double_ml_framework import DoubleMLFramework
-
-from .utils.resampling import DoubleMLResampling, DoubleMLClusterResampling
-from .utils._estimation import _rmse, _aggregate_coefs_and_ses, _var_est, _set_external_predictions
 from .utils._checks import _check_external_predictions, _check_sample_splitting
+from .utils._estimation import _aggregate_coefs_and_ses, _rmse, _set_external_predictions, _var_est
 from .utils.gain_statistics import gain_statistics
+from .utils.resampling import DoubleMLClusterResampling, DoubleMLResampling
 
-_implemented_data_backends = ['DoubleMLData', 'DoubleMLClusterData']
+_implemented_data_backends = ["DoubleMLData", "DoubleMLClusterData"]
 
 
 class DoubleML(ABC):
-    """Double Machine Learning.
-    """
-
-    def __init__(self,
-                 obj_dml_data,
-                 n_folds,
-                 n_rep,
-                 score,
-                 draw_sample_splitting):
+    """Double Machine Learning."""
+
+    def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting):
         # check and pick up obj_dml_data
         if not isinstance(obj_dml_data, DoubleMLBaseData):
-            raise TypeError('The data must be of ' + ' or '.join(_implemented_data_backends) + ' type. '
-                            f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.')
+            raise TypeError(
+                "The data must be of " + " or ".join(_implemented_data_backends) + " type. "
+                f"{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+            )
         self._is_cluster_data = False
         if isinstance(obj_dml_data, DoubleMLClusterData):
             if obj_dml_data.n_cluster_vars > 2:
-                raise NotImplementedError('Multi-way (n_ways > 2) clustering not yet implemented.')
+                raise NotImplementedError("Multi-way (n_ways > 2) clustering not yet implemented.")
             self._is_cluster_data = True
         self._dml_data = obj_dml_data
 
@@ -67,27 +60,27 @@ def __init__(self,
 
         # check resampling specifications
         if not isinstance(n_folds, int):
-            raise TypeError('The number of folds must be of int type. '
-                            f'{str(n_folds)} of type {str(type(n_folds))} was passed.')
+            raise TypeError(
+                f"The number of folds must be of int type. {str(n_folds)} of type {str(type(n_folds))} was passed."
+            )
         if n_folds < 1:
-            raise ValueError('The number of folds must be positive. '
-                             f'{str(n_folds)} was passed.')
+            raise ValueError(f"The number of folds must be positive. {str(n_folds)} was passed.")
 
         if not isinstance(n_rep, int):
-            raise TypeError('The number of repetitions for the sample splitting must be of int type. '
-                            f'{str(n_rep)} of type {str(type(n_rep))} was passed.')
+            raise TypeError(
+                "The number of repetitions for the sample splitting must be of int type. "
+                f"{str(n_rep)} of type {str(type(n_rep))} was passed."
+            )
         if n_rep < 1:
-            raise ValueError('The number of repetitions for the sample splitting must be positive. '
-                             f'{str(n_rep)} was passed.')
+            raise ValueError(f"The number of repetitions for the sample splitting must be positive. {str(n_rep)} was passed.")
 
         if not isinstance(draw_sample_splitting, bool):
-            raise TypeError('draw_sample_splitting must be True or False. '
-                            f'Got {str(draw_sample_splitting)}.')
+            raise TypeError(f"draw_sample_splitting must be True or False. Got {str(draw_sample_splitting)}.")
 
         # set resampling specifications
         if self._is_cluster_data:
             self._n_folds_per_cluster = n_folds
-            self._n_folds = n_folds ** self._dml_data.n_cluster_vars
+            self._n_folds = n_folds**self._dml_data.n_cluster_vars
         else:
             self._n_folds = n_folds
         self._n_rep = n_rep
@@ -102,8 +95,16 @@ def __init__(self,
             self.draw_sample_splitting()
 
         # initialize arrays according to obj_dml_data and the resampling settings
-        self._psi, self._psi_deriv, self._psi_elements, self._var_scaling_factors, \
-            self._coef, self._se, self._all_coef, self._all_se = self._initialize_arrays()
+        (
+            self._psi,
+            self._psi_deriv,
+            self._psi_elements,
+            self._var_scaling_factors,
+            self._coef,
+            self._se,
+            self._all_coef,
+            self._all_se,
+        ) = self._initialize_arrays()
 
         # initialize instance attributes which are later used for iterating
         self._i_rep = None
@@ -111,39 +112,47 @@ def __init__(self,
 
     def __str__(self):
         class_name = self.__class__.__name__
-        header = f'================== {class_name} Object ==================\n'
+        header = f"================== {class_name} Object ==================\n"
         data_summary = self._dml_data._data_summary_str()
-        score_info = f'Score function: {str(self.score)}\n'
-        learner_info = ''
+        score_info = f"Score function: {str(self.score)}\n"
+        learner_info = ""
         for key, value in self.learner.items():
-            learner_info += f'Learner {key}: {str(value)}\n'
+            learner_info += f"Learner {key}: {str(value)}\n"
         if self.nuisance_loss is not None:
-            learner_info += 'Out-of-sample Performance:\n'
+            learner_info += "Out-of-sample Performance:\n"
             is_classifier = [value for value in self._is_classifier.values()]
             is_regressor = [not value for value in is_classifier]
             if any(is_regressor):
-                learner_info += 'Regression:\n'
+                learner_info += "Regression:\n"
                 for learner in [key for key, value in self._is_classifier.items() if value is False]:
-                    learner_info += f'Learner {learner} RMSE: {self.nuisance_loss[learner]}\n'
+                    learner_info += f"Learner {learner} RMSE: {self.nuisance_loss[learner]}\n"
             if any(is_classifier):
-                learner_info += 'Classification:\n'
+                learner_info += "Classification:\n"
                 for learner in [key for key, value in self._is_classifier.items() if value is True]:
-                    learner_info += f'Learner {learner} Log Loss: {self.nuisance_loss[learner]}\n'
+                    learner_info += f"Learner {learner} Log Loss: {self.nuisance_loss[learner]}\n"
 
         if self._is_cluster_data:
-            resampling_info = f'No. folds per cluster: {self._n_folds_per_cluster}\n' \
-                              f'No. folds: {self.n_folds}\n' \
-                              f'No. repeated sample splits: {self.n_rep}\n'
+            resampling_info = (
+                f"No. folds per cluster: {self._n_folds_per_cluster}\n"
+                f"No. folds: {self.n_folds}\n"
+                f"No. repeated sample splits: {self.n_rep}\n"
+            )
         else:
-            resampling_info = f'No. folds: {self.n_folds}\n' \
-                              f'No. repeated sample splits: {self.n_rep}\n'
+            resampling_info = f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}\n"
         fit_summary = str(self.summary)
-        res = header + \
-            '\n------------------ Data summary      ------------------\n' + data_summary + \
-            '\n------------------ Score & algorithm ------------------\n' + score_info + \
-            '\n------------------ Machine learner   ------------------\n' + learner_info + \
-            '\n------------------ Resampling        ------------------\n' + resampling_info + \
-            '\n------------------ Fit summary       ------------------\n' + fit_summary
+        res = (
+            header
+            + "\n------------------ Data summary      ------------------\n"
+            + data_summary
+            + "\n------------------ Score & algorithm ------------------\n"
+            + score_info
+            + "\n------------------ Machine learner   ------------------\n"
+            + learner_info
+            + "\n------------------ Resampling        ------------------\n"
+            + resampling_info
+            + "\n------------------ Fit summary       ------------------\n"
+            + fit_summary
+        )
         return res
 
     @property
@@ -269,8 +278,14 @@ def get_params(self, learner):
         """
         valid_learner = self.params_names
         if (not isinstance(learner, str)) | (learner not in valid_learner):
-            raise ValueError('Invalid nuisance learner ' + str(learner) + '. ' +
-                             'Valid nuisance learner ' + ' or '.join(valid_learner) + '.')
+            raise ValueError(
+                "Invalid nuisance learner "
+                + str(learner)
+                + ". "
+                + "Valid nuisance learner "
+                + " or ".join(valid_learner)
+                + "."
+            )
         return self._params[learner]
 
     # The private function _get_params delivers the single treatment, single (cross-fitting) sample subselection.
@@ -286,8 +301,10 @@ def smpls(self):
         The partition used for cross-fitting.
         """
         if self._smpls is None:
-            err_msg = ('Sample splitting not specified. Either draw samples via .draw_sample splitting() ' +
-                       'or set external samples via .set_sample_splitting().')
+            err_msg = (
+                "Sample splitting not specified. Either draw samples via .draw_sample splitting() "
+                + "or set external samples via .set_sample_splitting()."
+            )
             raise ValueError(err_msg)
         return self._smpls
 
@@ -418,16 +435,12 @@ def summary(self):
         """
         A summary for the estimated causal effect after calling :meth:`fit`.
         """
-        col_names = ['coef', 'std err', 't', 'P>|t|']
+        col_names = ["coef", "std err", "t", "P>|t|"]
         if np.isnan(self.coef).all():
             df_summary = pd.DataFrame(columns=col_names)
         else:
-            summary_stats = np.transpose(np.vstack(
-                [self.coef, self.se,
-                 self.t_stat, self.pval]))
-            df_summary = pd.DataFrame(summary_stats,
-                                      columns=col_names,
-                                      index=self._dml_data.d_cols)
+            summary_stats = np.transpose(np.vstack([self.coef, self.se, self.t_stat, self.pval]))
+            df_summary = pd.DataFrame(summary_stats, columns=col_names, index=self._dml_data.d_cols)
             ci = self.confint()
             df_summary = df_summary.join(ci)
         return df_summary
@@ -501,10 +514,8 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
 
                 # predictions have to be stored in loop for sensitivity analysis
                 nuisance_predictions = self._fit_nuisance_and_score_elements(
-                    n_jobs_cv,
-                    store_predictions,
-                    external_predictions,
-                    store_models)
+                    n_jobs_cv, store_predictions, external_predictions, store_models
+                )
 
                 self._solve_score_and_estimate_se()
 
@@ -535,42 +546,46 @@ def construct_framework(self):
         scaled_psi_reshape = np.transpose(scaled_psi, (0, 2, 1))
 
         doubleml_dict = {
-            'thetas': self.coef,
-            'all_thetas': self.all_coef,
-            'ses': self.se,
-            'all_ses': self.all_se,
-            'var_scaling_factors': self._var_scaling_factors,
-            'scaled_psi': scaled_psi_reshape,
-            'is_cluster_data': self._is_cluster_data
+            "thetas": self.coef,
+            "all_thetas": self.all_coef,
+            "ses": self.se,
+            "all_ses": self.all_se,
+            "var_scaling_factors": self._var_scaling_factors,
+            "scaled_psi": scaled_psi_reshape,
+            "is_cluster_data": self._is_cluster_data,
         }
 
         if self._sensitivity_implemented:
             # reshape sensitivity elements to (n_obs, n_coefs, n_rep)
-            doubleml_dict.update({
-                'sensitivity_elements': {
-                    'sigma2': np.transpose(self.sensitivity_elements['sigma2'], (0, 2, 1)),
-                    'nu2': np.transpose(self.sensitivity_elements['nu2'], (0, 2, 1)),
-                    'psi_sigma2': np.transpose(self.sensitivity_elements['psi_sigma2'], (0, 2, 1)),
-                    'psi_nu2': np.transpose(self.sensitivity_elements['psi_nu2'], (0, 2, 1)),
-                    'riesz_rep': np.transpose(self.sensitivity_elements['riesz_rep'], (0, 2, 1))
+            doubleml_dict.update(
+                {
+                    "sensitivity_elements": {
+                        "sigma2": np.transpose(self.sensitivity_elements["sigma2"], (0, 2, 1)),
+                        "nu2": np.transpose(self.sensitivity_elements["nu2"], (0, 2, 1)),
+                        "psi_sigma2": np.transpose(self.sensitivity_elements["psi_sigma2"], (0, 2, 1)),
+                        "psi_nu2": np.transpose(self.sensitivity_elements["psi_nu2"], (0, 2, 1)),
+                        "riesz_rep": np.transpose(self.sensitivity_elements["riesz_rep"], (0, 2, 1)),
+                    }
                 }
-            })
+            )
 
         if self._is_cluster_data:
-            doubleml_dict.update({
-                'is_cluster_data': True,
-                'cluster_dict': {
-                    'smpls': self._smpls,
-                    'smpls_cluster': self._smpls_cluster,
-                    'cluster_vars': self._dml_data.cluster_vars,
-                    'n_folds_per_cluster': self._n_folds_per_cluster,
+            doubleml_dict.update(
+                {
+                    "is_cluster_data": True,
+                    "cluster_dict": {
+                        "smpls": self._smpls,
+                        "smpls_cluster": self._smpls_cluster,
+                        "cluster_vars": self._dml_data.cluster_vars,
+                        "n_folds_per_cluster": self._n_folds_per_cluster,
+                    },
                 }
-            })
+            )
 
         doubleml_framework = DoubleMLFramework(doubleml_dict)
         return doubleml_framework
 
-    def bootstrap(self, method='normal', n_rep_boot=500):
+    def bootstrap(self, method="normal", n_rep_boot=500):
         """
         Multiplier bootstrap for DoubleML models.
 
@@ -588,7 +603,7 @@ def bootstrap(self, method='normal', n_rep_boot=500):
         self : object
         """
         if self._framework is None:
-            raise ValueError('Apply fit() before bootstrap().')
+            raise ValueError("Apply fit() before bootstrap().")
         self._framework.bootstrap(method=method, n_rep_boot=n_rep_boot)
 
         return self
@@ -614,14 +629,14 @@ def confint(self, joint=False, level=0.95):
         """
 
         if self.framework is None:
-            raise ValueError('Apply fit() before confint().')
+            raise ValueError("Apply fit() before confint().")
 
         df_ci = self.framework.confint(joint=joint, level=level)
         df_ci.set_index(pd.Index(self._dml_data.d_cols), inplace=True)
 
         return df_ci
 
-    def p_adjust(self, method='romano-wolf'):
+    def p_adjust(self, method="romano-wolf"):
         """
         Multiple testing adjustment for DoubleML models.
 
@@ -640,23 +655,25 @@ def p_adjust(self, method='romano-wolf'):
         """
 
         if self.framework is None:
-            raise ValueError('Apply fit() before p_adjust().')
+            raise ValueError("Apply fit() before p_adjust().")
 
         p_val, _ = self.framework.p_adjust(method=method)
         p_val.set_index(pd.Index(self._dml_data.d_cols), inplace=True)
 
         return p_val
 
-    def tune(self,
-             param_grids,
-             tune_on_folds=False,
-             scoring_methods=None,  # if None the estimator's score method is used
-             n_folds_tune=5,
-             search_mode='grid_search',
-             n_iter_randomized_search=100,
-             n_jobs_cv=None,
-             set_as_params=True,
-             return_tune_res=False):
+    def tune(
+        self,
+        param_grids,
+        tune_on_folds=False,
+        scoring_methods=None,  # if None the estimator's score method is used
+        n_folds_tune=5,
+        search_mode="grid_search",
+        n_iter_randomized_search=100,
+        n_jobs_cv=None,
+        set_as_params=True,
+        return_tune_res=False,
+    ):
         """
         Hyperparameter-tuning for DoubleML models.
 
@@ -715,14 +732,22 @@ def tune(self,
         """
 
         if (not isinstance(param_grids, dict)) | (not all(k in param_grids for k in self.learner_names)):
-            raise ValueError('Invalid param_grids ' + str(param_grids) + '. '
-                             'param_grids must be a dictionary with keys ' + ' and '.join(self.learner_names) + '.')
+            raise ValueError(
+                "Invalid param_grids " + str(param_grids) + ". "
+                "param_grids must be a dictionary with keys " + " and ".join(self.learner_names) + "."
+            )
 
         if scoring_methods is not None:
             if (not isinstance(scoring_methods, dict)) | (not all(k in self.learner_names for k in scoring_methods)):
-                raise ValueError('Invalid scoring_methods ' + str(scoring_methods) + '. ' +
-                                 'scoring_methods must be a dictionary. ' +
-                                 'Valid keys are ' + ' and '.join(self.learner_names) + '.')
+                raise ValueError(
+                    "Invalid scoring_methods "
+                    + str(scoring_methods)
+                    + ". "
+                    + "scoring_methods must be a dictionary. "
+                    + "Valid keys are "
+                    + " and ".join(self.learner_names)
+                    + "."
+                )
             if not all(k in scoring_methods for k in self.learner_names):
                 # if there are learners for which no scoring_method was set, we fall back to None, i.e., default scoring
                 for learner in self.learner_names:
@@ -730,40 +755,43 @@ def tune(self,
                         scoring_methods[learner] = None
 
         if not isinstance(tune_on_folds, bool):
-            raise TypeError('tune_on_folds must be True or False. '
-                            f'Got {str(tune_on_folds)}.')
+            raise TypeError(f"tune_on_folds must be True or False. Got {str(tune_on_folds)}.")
 
         if not isinstance(n_folds_tune, int):
-            raise TypeError('The number of folds used for tuning must be of int type. '
-                            f'{str(n_folds_tune)} of type {str(type(n_folds_tune))} was passed.')
+            raise TypeError(
+                "The number of folds used for tuning must be of int type. "
+                f"{str(n_folds_tune)} of type {str(type(n_folds_tune))} was passed."
+            )
         if n_folds_tune < 2:
-            raise ValueError('The number of folds used for tuning must be at least two. '
-                             f'{str(n_folds_tune)} was passed.')
+            raise ValueError(f"The number of folds used for tuning must be at least two. {str(n_folds_tune)} was passed.")
 
-        if (not isinstance(search_mode, str)) | (search_mode not in ['grid_search', 'randomized_search']):
-            raise ValueError('search_mode must be "grid_search" or "randomized_search". '
-                             f'Got {str(search_mode)}.')
+        if (not isinstance(search_mode, str)) | (search_mode not in ["grid_search", "randomized_search"]):
+            raise ValueError(f'search_mode must be "grid_search" or "randomized_search". Got {str(search_mode)}.')
 
         if not isinstance(n_iter_randomized_search, int):
-            raise TypeError('The number of parameter settings sampled for the randomized search must be of int type. '
-                            f'{str(n_iter_randomized_search)} of type '
-                            f'{str(type(n_iter_randomized_search))} was passed.')
+            raise TypeError(
+                "The number of parameter settings sampled for the randomized search must be of int type. "
+                f"{str(n_iter_randomized_search)} of type "
+                f"{str(type(n_iter_randomized_search))} was passed."
+            )
         if n_iter_randomized_search < 2:
-            raise ValueError('The number of parameter settings sampled for the randomized search must be at least two. '
-                             f'{str(n_iter_randomized_search)} was passed.')
+            raise ValueError(
+                "The number of parameter settings sampled for the randomized search must be at least two. "
+                f"{str(n_iter_randomized_search)} was passed."
+            )
 
         if n_jobs_cv is not None:
             if not isinstance(n_jobs_cv, int):
-                raise TypeError('The number of CPUs used to fit the learners must be of int type. '
-                                f'{str(n_jobs_cv)} of type {str(type(n_jobs_cv))} was passed.')
+                raise TypeError(
+                    "The number of CPUs used to fit the learners must be of int type. "
+                    f"{str(n_jobs_cv)} of type {str(type(n_jobs_cv))} was passed."
+                )
 
         if not isinstance(set_as_params, bool):
-            raise TypeError('set_as_params must be True or False. '
-                            f'Got {str(set_as_params)}.')
+            raise TypeError(f"set_as_params must be True or False. Got {str(set_as_params)}.")
 
         if not isinstance(return_tune_res, bool):
-            raise TypeError('return_tune_res must be True or False. '
-                            f'Got {str(return_tune_res)}.')
+            raise TypeError(f"return_tune_res must be True or False. Got {str(return_tune_res)}.")
 
         if tune_on_folds:
             tuning_res = [[None] * self.n_rep] * self._dml_data.n_treat
@@ -782,14 +810,18 @@ def tune(self,
                     self._i_rep = i_rep
 
                     # tune hyperparameters
-                    res = self._nuisance_tuning(self.__smpls,
-                                                param_grids, scoring_methods,
-                                                n_folds_tune,
-                                                n_jobs_cv,
-                                                search_mode, n_iter_randomized_search)
+                    res = self._nuisance_tuning(
+                        self.__smpls,
+                        param_grids,
+                        scoring_methods,
+                        n_folds_tune,
+                        n_jobs_cv,
+                        search_mode,
+                        n_iter_randomized_search,
+                    )
 
                     tuning_res[i_rep][i_d] = res
-                    nuisance_params.append(res['params'])
+                    nuisance_params.append(res["params"])
 
                 if set_as_params:
                     for nuisance_model in nuisance_params[0].keys():
@@ -799,16 +831,14 @@ def tune(self,
             else:
                 smpls = [(np.arange(self._dml_data.n_obs), np.arange(self._dml_data.n_obs))]
                 # tune hyperparameters
-                res = self._nuisance_tuning(smpls,
-                                            param_grids, scoring_methods,
-                                            n_folds_tune,
-                                            n_jobs_cv,
-                                            search_mode, n_iter_randomized_search)
+                res = self._nuisance_tuning(
+                    smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+                )
                 tuning_res[i_d] = res
 
                 if set_as_params:
-                    for nuisance_model in res['params'].keys():
-                        params = res['params'][nuisance_model]
+                    for nuisance_model in res["params"].keys():
+                        params = res["params"][nuisance_model]
                         self.set_ml_nuisance_params(nuisance_model, self._dml_data.d_cols[i_d], params[0])
 
         if return_tune_res:
@@ -838,12 +868,19 @@ def set_ml_nuisance_params(self, learner, treat_var, params):
         """
         valid_learner = self.params_names
         if learner not in valid_learner:
-            raise ValueError('Invalid nuisance learner ' + learner + '. ' +
-                             'Valid nuisance learner ' + ' or '.join(valid_learner) + '.')
+            raise ValueError(
+                "Invalid nuisance learner " + learner + ". " + "Valid nuisance learner " + " or ".join(valid_learner) + "."
+            )
 
         if treat_var not in self._dml_data.d_cols:
-            raise ValueError('Invalid treatment variable ' + treat_var + '. ' +
-                             'Valid treatment variable ' + ' or '.join(self._dml_data.d_cols) + '.')
+            raise ValueError(
+                "Invalid treatment variable "
+                + treat_var
+                + ". "
+                + "Valid treatment variable "
+                + " or ".join(self._dml_data.d_cols)
+                + "."
+            )
 
         if params is None:
             all_params = [None] * self.n_rep
@@ -869,24 +906,25 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models, external_predictions):
         pass
 
     @abstractmethod
-    def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                         search_mode, n_iter_randomized_search):
+    def _nuisance_tuning(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
         pass
 
     @staticmethod
     def _check_learner(learner, learner_name, regressor, classifier):
-        err_msg_prefix = f'Invalid learner provided for {learner_name}: '
-        warn_msg_prefix = f'Learner provided for {learner_name} is probably invalid: '
+        err_msg_prefix = f"Invalid learner provided for {learner_name}: "
+        warn_msg_prefix = f"Learner provided for {learner_name} is probably invalid: "
 
         if isinstance(learner, type):
-            raise TypeError(err_msg_prefix + 'provide an instance of a learner instead of a class.')
+            raise TypeError(err_msg_prefix + "provide an instance of a learner instead of a class.")
 
-        if not hasattr(learner, 'fit'):
-            raise TypeError(err_msg_prefix + f'{str(learner)} has no method .fit().')
-        if not hasattr(learner, 'set_params'):
-            raise TypeError(err_msg_prefix + f'{str(learner)} has no method .set_params().')
-        if not hasattr(learner, 'get_params'):
-            raise TypeError(err_msg_prefix + f'{str(learner)} has no method .get_params().')
+        if not hasattr(learner, "fit"):
+            raise TypeError(err_msg_prefix + f"{str(learner)} has no method .fit().")
+        if not hasattr(learner, "set_params"):
+            raise TypeError(err_msg_prefix + f"{str(learner)} has no method .set_params().")
+        if not hasattr(learner, "get_params"):
+            raise TypeError(err_msg_prefix + f"{str(learner)} has no method .get_params().")
 
         if regressor & classifier:
             if is_classifier(learner):
@@ -894,50 +932,55 @@ def _check_learner(learner, learner_name, regressor, classifier):
             elif is_regressor(learner):
                 learner_is_classifier = False
             else:
-                warnings.warn(warn_msg_prefix + f'{str(learner)} is (probably) neither a regressor nor a classifier. ' +
-                              'Method predict is used for prediction.')
+                warnings.warn(
+                    warn_msg_prefix
+                    + f"{str(learner)} is (probably) neither a regressor nor a classifier. "
+                    + "Method predict is used for prediction."
+                )
                 learner_is_classifier = False
         elif classifier:
             if not is_classifier(learner):
-                warnings.warn(warn_msg_prefix + f'{str(learner)} is (probably) no classifier.')
+                warnings.warn(warn_msg_prefix + f"{str(learner)} is (probably) no classifier.")
             learner_is_classifier = True
         else:
             assert regressor  # classifier, regressor or both must be True
             if not is_regressor(learner):
-                warnings.warn(warn_msg_prefix + f'{str(learner)} is (probably) no regressor.')
+                warnings.warn(warn_msg_prefix + f"{str(learner)} is (probably) no regressor.")
             learner_is_classifier = False
 
         # check existence of the prediction method
         if learner_is_classifier:
-            if not hasattr(learner, 'predict_proba'):
-                raise TypeError(err_msg_prefix + f'{str(learner)} has no method .predict_proba().')
+            if not hasattr(learner, "predict_proba"):
+                raise TypeError(err_msg_prefix + f"{str(learner)} has no method .predict_proba().")
         else:
-            if not hasattr(learner, 'predict'):
-                raise TypeError(err_msg_prefix + f'{str(learner)} has no method .predict().')
+            if not hasattr(learner, "predict"):
+                raise TypeError(err_msg_prefix + f"{str(learner)} has no method .predict().")
 
         return learner_is_classifier
 
     def _check_fit(self, n_jobs_cv, store_predictions, external_predictions, store_models):
         if n_jobs_cv is not None:
             if not isinstance(n_jobs_cv, int):
-                raise TypeError('The number of CPUs used to fit the learners must be of int type. '
-                                f'{str(n_jobs_cv)} of type {str(type(n_jobs_cv))} was passed.')
+                raise TypeError(
+                    "The number of CPUs used to fit the learners must be of int type. "
+                    f"{str(n_jobs_cv)} of type {str(type(n_jobs_cv))} was passed."
+                )
 
         if not isinstance(store_predictions, bool):
-            raise TypeError('store_predictions must be True or False. '
-                            f'Got {str(store_predictions)}.')
+            raise TypeError(f"store_predictions must be True or False. Got {str(store_predictions)}.")
 
         if not isinstance(store_models, bool):
-            raise TypeError('store_models must be True or False. '
-                            f'Got {str(store_models)}.')
+            raise TypeError(f"store_models must be True or False. Got {str(store_models)}.")
 
         # check if external predictions are implemented
         if self._external_predictions_implemented:
-            _check_external_predictions(external_predictions=external_predictions,
-                                        valid_treatments=self._dml_data.d_cols,
-                                        valid_learners=self.params_names,
-                                        n_obs=self._dml_data.n_obs,
-                                        n_rep=self.n_rep)
+            _check_external_predictions(
+                external_predictions=external_predictions,
+                valid_treatments=self._dml_data.d_cols,
+                valid_learners=self.params_names,
+                n_obs=self._dml_data.n_obs,
+                n_rep=self.n_rep,
+            )
         elif not self._external_predictions_implemented and external_predictions is not None:
             raise NotImplementedError(f"External predictions not implemented for {self.__class__.__name__}.")
 
@@ -952,46 +995,46 @@ def _initalize_fit(self, store_predictions, store_models):
             self._initialize_models()
 
         if self._sensitivity_implemented:
-            self._sensitivity_elements = self._initialize_sensitivity_elements((self._dml_data.n_obs,
-                                                                                self.n_rep,
-                                                                                self._dml_data.n_coefs))
+            self._sensitivity_elements = self._initialize_sensitivity_elements(
+                (self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs)
+            )
 
     def _fit_nuisance_and_score_elements(self, n_jobs_cv, store_predictions, external_predictions, store_models):
-        ext_prediction_dict = _set_external_predictions(external_predictions,
-                                                        learners=self.params_names,
-                                                        treatment=self._dml_data.d_cols[self._i_treat],
-                                                        i_rep=self._i_rep)
+        ext_prediction_dict = _set_external_predictions(
+            external_predictions, learners=self.params_names, treatment=self._dml_data.d_cols[self._i_treat], i_rep=self._i_rep
+        )
 
         # ml estimation of nuisance models and computation of score elements
-        score_elements, preds = self._nuisance_est(self.__smpls, n_jobs_cv,
-                                                   external_predictions=ext_prediction_dict,
-                                                   return_models=store_models)
+        score_elements, preds = self._nuisance_est(
+            self.__smpls, n_jobs_cv, external_predictions=ext_prediction_dict, return_models=store_models
+        )
 
         self._set_score_elements(score_elements, self._i_rep, self._i_treat)
 
         # calculate nuisance losses and store predictions and targets of the nuisance models
-        self._calc_nuisance_loss(preds['predictions'], preds['targets'])
+        self._calc_nuisance_loss(preds["predictions"], preds["targets"])
         if store_predictions:
-            self._store_predictions_and_targets(preds['predictions'], preds['targets'])
+            self._store_predictions_and_targets(preds["predictions"], preds["targets"])
         if store_models:
-            self._store_models(preds['models'])
+            self._store_models(preds["models"])
 
         return preds
 
     def _solve_score_and_estimate_se(self):
         # estimate the causal parameter
-        self._all_coef[self._i_treat, self._i_rep] = \
-            self._est_causal_pars(self._get_score_elements(self._i_rep, self._i_treat))
+        self._all_coef[self._i_treat, self._i_rep] = self._est_causal_pars(
+            self._get_score_elements(self._i_rep, self._i_treat)
+        )
 
         # compute score (depends on the estimated causal parameter)
         self._psi[:, self._i_rep, self._i_treat] = self._compute_score(
-            self._get_score_elements(self._i_rep, self._i_treat),
-            self._all_coef[self._i_treat, self._i_rep])
+            self._get_score_elements(self._i_rep, self._i_treat), self._all_coef[self._i_treat, self._i_rep]
+        )
 
         # compute score derivative (can depend on the estimated causal parameter)
         self._psi_deriv[:, self._i_rep, self._i_treat] = self._compute_score_deriv(
-            self._get_score_elements(self._i_rep, self._i_treat),
-            self._all_coef[self._i_treat, self._i_rep])
+            self._get_score_elements(self._i_rep, self._i_treat), self._all_coef[self._i_treat, self._i_rep]
+        )
 
         # compute standard errors for causal parameter
         self._all_se[self._i_treat, self._i_rep], self._var_scaling_factors[self._i_treat] = self._se_causal_pars()
@@ -999,7 +1042,7 @@ def _solve_score_and_estimate_se(self):
     def _fit_sensitivity_elements(self, nuisance_predictions):
         if self._sensitivity_implemented:
             if callable(self.score):
-                warnings.warn('Sensitivity analysis not implemented for callable scores.')
+                warnings.warn("Sensitivity analysis not implemented for callable scores.")
             else:
                 # compute sensitivity analysis elements
                 element_dict = self._sensitivity_element_est(nuisance_predictions)
@@ -1023,20 +1066,22 @@ def _initialize_arrays(self):
         return psi, psi_deriv, psi_elements, var_scaling_factors, coef, se, all_coef, all_se
 
     def _initialize_predictions_and_targets(self):
-        self._predictions = {learner: np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan)
-                             for learner in self.params_names}
-        self._nuisance_targets = {learner: np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan)
-                                  for learner in self.params_names}
-
-    def _initialize_nuisance_loss(self):
-        self._nuisance_loss = {
-            learner: np.full((self.n_rep, self._dml_data.n_coefs), np.nan)
+        self._predictions = {
+            learner: np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan)
+            for learner in self.params_names
+        }
+        self._nuisance_targets = {
+            learner: np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan)
             for learner in self.params_names
         }
 
+    def _initialize_nuisance_loss(self):
+        self._nuisance_loss = {learner: np.full((self.n_rep, self._dml_data.n_coefs), np.nan) for learner in self.params_names}
+
     def _initialize_models(self):
-        self._models = {learner: {treat_var: [None] * self.n_rep for treat_var in self._dml_data.d_cols}
-                        for learner in self.params_names}
+        self._models = {
+            learner: {treat_var: [None] * self.n_rep for treat_var in self._dml_data.d_cols} for learner in self.params_names
+        }
 
     def _store_predictions_and_targets(self, preds, targets):
         for learner in self.params_names:
@@ -1050,9 +1095,7 @@ def _calc_nuisance_loss(self, preds, targets):
             learner_keys = [key for key in self._learner.keys() if key in learner]
             assert len(learner_keys) == 1
             self._is_classifier[learner] = self._check_learner(
-                self._learner[learner_keys[0]],
-                learner,
-                regressor=True, classifier=True
+                self._learner[learner_keys[0]], learner, regressor=True, classifier=True
             )
 
             if targets[learner] is None:
@@ -1124,27 +1167,28 @@ def evaluate_learners(self, learners=None, metric=_rmse):
 
         # check metric
         if not callable(metric):
-            raise TypeError('metric should be a callable. '
-                            '%r was passed.' % metric)
+            raise TypeError("metric should be a callable. %r was passed." % metric)
 
         if all(learner in self.params_names for learner in learners):
             if self.nuisance_targets is None:
-                raise ValueError('Apply fit() before evaluate_learners().')
+                raise ValueError("Apply fit() before evaluate_learners().")
             else:
-                dist = {learner: np.full((self.n_rep, self._dml_data.n_coefs), np.nan)
-                        for learner in learners}
+                dist = {learner: np.full((self.n_rep, self._dml_data.n_coefs), np.nan) for learner in learners}
             for learner in learners:
                 for rep in range(self.n_rep):
                     for coef_idx in range(self._dml_data.n_coefs):
-                        res = metric(y_pred=self.predictions[learner][:, rep, coef_idx].reshape(1, -1),
-                                     y_true=self.nuisance_targets[learner][:, rep, coef_idx].reshape(1, -1))
+                        res = metric(
+                            y_pred=self.predictions[learner][:, rep, coef_idx].reshape(1, -1),
+                            y_true=self.nuisance_targets[learner][:, rep, coef_idx].reshape(1, -1),
+                        )
                         if not np.isfinite(res):
-                            raise ValueError(f'Evaluation from learner {str(learner)} is not finite.')
+                            raise ValueError(f"Evaluation from learner {str(learner)} is not finite.")
                         dist[learner][rep, coef_idx] = res
             return dist
         else:
-            raise ValueError(f'The learners have to be a subset of {str(self.params_names)}. '
-                             f'Learners {str(learners)} provided.')
+            raise ValueError(
+                f"The learners have to be a subset of {str(self.params_names)}. Learners {str(learners)} provided."
+            )
 
     def draw_sample_splitting(self):
         """
@@ -1158,17 +1202,18 @@ def draw_sample_splitting(self):
         self : object
         """
         if self._is_cluster_data:
-            obj_dml_resampling = DoubleMLClusterResampling(n_folds=self._n_folds_per_cluster,
-                                                           n_rep=self.n_rep,
-                                                           n_obs=self._dml_data.n_obs,
-                                                           n_cluster_vars=self._dml_data.n_cluster_vars,
-                                                           cluster_vars=self._dml_data.cluster_vars)
+            obj_dml_resampling = DoubleMLClusterResampling(
+                n_folds=self._n_folds_per_cluster,
+                n_rep=self.n_rep,
+                n_obs=self._dml_data.n_obs,
+                n_cluster_vars=self._dml_data.n_cluster_vars,
+                cluster_vars=self._dml_data.cluster_vars,
+            )
             self._smpls, self._smpls_cluster = obj_dml_resampling.split_samples()
         else:
-            obj_dml_resampling = DoubleMLResampling(n_folds=self.n_folds,
-                                                    n_rep=self.n_rep,
-                                                    n_obs=self._dml_data.n_obs,
-                                                    stratify=self._strata)
+            obj_dml_resampling = DoubleMLResampling(
+                n_folds=self.n_folds, n_rep=self.n_rep, n_obs=self._dml_data.n_obs, stratify=self._strata
+            )
             self._smpls = obj_dml_resampling.split_samples()
 
         return self
@@ -1234,10 +1279,19 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None):
         >>> dml_plr_obj.set_sample_splitting(smpls)
         """
         self._smpls, self._smpls_cluster, self._n_rep, self._n_folds = _check_sample_splitting(
-            all_smpls, all_smpls_cluster, self._dml_data, self._is_cluster_data)
+            all_smpls, all_smpls_cluster, self._dml_data, self._is_cluster_data
+        )
 
-        self._psi, self._psi_deriv, self._psi_elements, self._var_scaling_factors, \
-            self._coef, self._se, self._all_coef, self._all_se = self._initialize_arrays()
+        (
+            self._psi,
+            self._psi_deriv,
+            self._psi_elements,
+            self._var_scaling_factors,
+            self._coef,
+            self._se,
+            self._all_coef,
+            self._all_se,
+        ) = self._initialize_arrays()
         self._initialize_ml_nuisance_params()
 
         return self
@@ -1248,10 +1302,10 @@ def _est_causal_pars(self, psi_elements):
         if not self._is_cluster_data:
             coef = self._est_coef(psi_elements)
         else:
-            scaling_factor = [1.] * len(smpls)
+            scaling_factor = [1.0] * len(smpls)
             for i_fold, (_, _) in enumerate(smpls):
                 test_cluster_inds = self.__smpls_cluster[i_fold][1]
-                scaling_factor[i_fold] = 1./np.prod(np.array([len(inds) for inds in test_cluster_inds]))
+                scaling_factor[i_fold] = 1.0 / np.prod(np.array([len(inds) for inds in test_cluster_inds]))
             coef = self._est_coef(psi_elements, smpls=smpls, scaling_factor=scaling_factor)
 
         return coef
@@ -1267,13 +1321,15 @@ def _se_causal_pars(self):
             smpls_cluster = self.__smpls_cluster
             n_folds_per_cluster = self._n_folds_per_cluster
 
-        sigma2_hat, var_scaling_factor = _var_est(psi=self.__psi,
-                                                  psi_deriv=self.__psi_deriv,
-                                                  smpls=self.__smpls,
-                                                  is_cluster_data=self._is_cluster_data,
-                                                  cluster_vars=cluster_vars,
-                                                  smpls_cluster=smpls_cluster,
-                                                  n_folds_per_cluster=n_folds_per_cluster)
+        sigma2_hat, var_scaling_factor = _var_est(
+            psi=self.__psi,
+            psi_deriv=self.__psi_deriv,
+            smpls=self.__smpls,
+            is_cluster_data=self._is_cluster_data,
+            cluster_vars=cluster_vars,
+            smpls_cluster=smpls_cluster,
+            n_folds_per_cluster=n_folds_per_cluster,
+        )
 
         se = np.sqrt(sigma2_hat)
         return se, var_scaling_factor
@@ -1286,18 +1342,19 @@ def _est_causal_pars_and_se(self):
                 self._i_treat = i_d
 
                 # estimate the causal parameter
-                self._all_coef[self._i_treat, self._i_rep] = \
-                    self._est_causal_pars(self._get_score_elements(self._i_rep, self._i_treat))
+                self._all_coef[self._i_treat, self._i_rep] = self._est_causal_pars(
+                    self._get_score_elements(self._i_rep, self._i_treat)
+                )
 
                 # compute score (depends on the estimated causal parameter)
                 self._psi[:, self._i_rep, self._i_treat] = self._compute_score(
-                    self._get_score_elements(self._i_rep, self._i_treat),
-                    self._all_coef[self._i_treat, self._i_rep])
+                    self._get_score_elements(self._i_rep, self._i_treat), self._all_coef[self._i_treat, self._i_rep]
+                )
 
                 # compute score (can depend on the estimated causal parameter)
                 self._psi_deriv[:, self._i_rep, self._i_treat] = self._compute_score_deriv(
-                    self._get_score_elements(self._i_rep, self._i_treat),
-                    self._all_coef[self._i_treat, self._i_rep])
+                    self._get_score_elements(self._i_rep, self._i_treat), self._all_coef[self._i_treat, self._i_rep]
+                )
 
                 # compute standard errors for causal parameter
                 self._all_se[self._i_treat, self._i_rep], self._var_scaling_factors[self._i_treat] = self._se_causal_pars()
@@ -1329,12 +1386,15 @@ def _get_score_elements(self, i_rep, i_treat):
 
     def _set_score_elements(self, psi_elements, i_rep, i_treat):
         if not isinstance(psi_elements, dict):
-            raise TypeError('_ml_nuisance_and_score_elements must return score elements in a dict. '
-                            f'Got type {str(type(psi_elements))}.')
+            raise TypeError(
+                f"_ml_nuisance_and_score_elements must return score elements in a dict. Got type {str(type(psi_elements))}."
+            )
         if not (set(self._score_element_names) == set(psi_elements.keys())):
-            raise ValueError('_ml_nuisance_and_score_elements returned incomplete score elements. '
-                             'Expected dict with keys: ' + ' and '.join(set(self._score_element_names)) + '.'
-                             'Got dict with keys: ' + ' and '.join(set(psi_elements.keys())) + '.')
+            raise ValueError(
+                "_ml_nuisance_and_score_elements returned incomplete score elements. "
+                "Expected dict with keys: " + " and ".join(set(self._score_element_names)) + "."
+                "Got dict with keys: " + " and ".join(set(psi_elements.keys())) + "."
+            )
         for key in self._score_element_names:
             self.psi_elements[key][:, i_rep, i_treat] = psi_elements[key]
         return
@@ -1350,15 +1410,17 @@ def _sensitivity_element_est(self, preds):
 
     @property
     def _sensitivity_element_names(self):
-        return ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2', 'riesz_rep']
+        return ["sigma2", "nu2", "psi_sigma2", "psi_nu2", "riesz_rep"]
 
     # the dimensions will usually be (n_obs, n_rep, n_coefs) to be equal to the score dimensions psi
     def _initialize_sensitivity_elements(self, score_dim):
-        sensitivity_elements = {'sigma2': np.full((1, score_dim[1], score_dim[2]), np.nan),
-                                'nu2': np.full((1, score_dim[1], score_dim[2]), np.nan),
-                                'psi_sigma2': np.full(score_dim, np.nan),
-                                'psi_nu2': np.full(score_dim, np.nan),
-                                'riesz_rep': np.full(score_dim, np.nan)}
+        sensitivity_elements = {
+            "sigma2": np.full((1, score_dim[1], score_dim[2]), np.nan),
+            "nu2": np.full((1, score_dim[1], score_dim[2]), np.nan),
+            "psi_sigma2": np.full(score_dim, np.nan),
+            "psi_nu2": np.full(score_dim, np.nan),
+            "riesz_rep": np.full(score_dim, np.nan),
+        }
         return sensitivity_elements
 
     def _get_sensitivity_elements(self, i_rep, i_treat):
@@ -1367,12 +1429,16 @@ def _get_sensitivity_elements(self, i_rep, i_treat):
 
     def _set_sensitivity_elements(self, sensitivity_elements, i_rep, i_treat):
         if not isinstance(sensitivity_elements, dict):
-            raise TypeError('_sensitivity_element_est must return sensitivity elements in a dict. '
-                            f'Got type {str(type(sensitivity_elements))}.')
+            raise TypeError(
+                "_sensitivity_element_est must return sensitivity elements in a dict. "
+                f"Got type {str(type(sensitivity_elements))}."
+            )
         if not (set(self._sensitivity_element_names) == set(sensitivity_elements.keys())):
-            raise ValueError('_sensitivity_element_est returned incomplete sensitivity elements. '
-                             'Expected dict with keys: ' + ' and '.join(set(self._sensitivity_element_names)) + '. '
-                             'Got dict with keys: ' + ' and '.join(set(sensitivity_elements.keys())) + '.')
+            raise ValueError(
+                "_sensitivity_element_est returned incomplete sensitivity elements. "
+                "Expected dict with keys: " + " and ".join(set(self._sensitivity_element_names)) + ". "
+                "Got dict with keys: " + " and ".join(set(sensitivity_elements.keys())) + "."
+            )
         for key in self._sensitivity_element_names:
             self.sensitivity_elements[key][:, i_rep, i_treat] = sensitivity_elements[key]
         return
@@ -1415,14 +1481,8 @@ def sensitivity_analysis(self, cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95, null_h
         """
 
         if self._framework is None:
-            raise ValueError('Apply fit() before sensitivity_analysis().')
-        self._framework.sensitivity_analysis(
-            cf_y=cf_y,
-            cf_d=cf_d,
-            rho=rho,
-            level=level,
-            null_hypothesis=null_hypothesis
-        )
+            raise ValueError("Apply fit() before sensitivity_analysis().")
+        self._framework.sensitivity_analysis(cf_y=cf_y, cf_d=cf_d, rho=rho, level=level, null_hypothesis=null_hypothesis)
 
         return self
 
@@ -1437,13 +1497,24 @@ def sensitivity_summary(self):
             Summary for the sensitivity analysis.
         """
         if self._framework is None:
-            raise ValueError('Apply sensitivity_analysis() before sensitivity_summary.')
+            raise ValueError("Apply sensitivity_analysis() before sensitivity_summary.")
         else:
             sensitivity_summary = self._framework.sensitivity_summary
         return sensitivity_summary
 
-    def sensitivity_plot(self, idx_treatment=0, value='theta', rho=1.0, level=0.95, null_hypothesis=0.0,
-                         include_scenario=True, benchmarks=None, fill=True, grid_bounds=(0.15, 0.15), grid_size=100):
+    def sensitivity_plot(
+        self,
+        idx_treatment=0,
+        value="theta",
+        rho=1.0,
+        level=0.95,
+        null_hypothesis=0.0,
+        include_scenario=True,
+        benchmarks=None,
+        fill=True,
+        grid_bounds=(0.15, 0.15),
+        grid_size=100,
+    ):
         """
         Contour plot of the sensivity with respect to latent/confounding variables.
 
@@ -1497,7 +1568,7 @@ def sensitivity_plot(self, idx_treatment=0, value='theta', rho=1.0, level=0.95,
             Plotly figure of the sensitivity contours.
         """
         if self._framework is None:
-            raise ValueError('Apply fit() before sensitivity_plot().')
+            raise ValueError("Apply fit() before sensitivity_plot().")
         fig = self._framework.sensitivity_plot(
             idx_treatment=idx_treatment,
             value=value,
@@ -1508,7 +1579,7 @@ def sensitivity_plot(self, idx_treatment=0, value='theta', rho=1.0, level=0.95,
             benchmarks=benchmarks,
             fill=fill,
             grid_bounds=grid_bounds,
-            grid_size=grid_size
+            grid_size=grid_size,
         )
 
         return fig
@@ -1526,18 +1597,20 @@ def sensitivity_benchmark(self, benchmarking_set, fit_args=None):
 
         # input checks
         if self._sensitivity_elements is None:
-            raise NotImplementedError(f'Sensitivity analysis not yet implemented for {self.__class__.__name__}.')
+            raise NotImplementedError(f"Sensitivity analysis not yet implemented for {self.__class__.__name__}.")
         if not isinstance(benchmarking_set, list):
-            raise TypeError('benchmarking_set must be a list. '
-                            f'{str(benchmarking_set)} of type {type(benchmarking_set)} was passed.')
+            raise TypeError(
+                f"benchmarking_set must be a list. {str(benchmarking_set)} of type {type(benchmarking_set)} was passed."
+            )
         if len(benchmarking_set) == 0:
-            raise ValueError('benchmarking_set must not be empty.')
+            raise ValueError("benchmarking_set must not be empty.")
         if not set(benchmarking_set) <= set(x_list_long):
-            raise ValueError(f"benchmarking_set must be a subset of features {str(self._dml_data.x_cols)}. "
-                             f'{str(benchmarking_set)} was passed.')
+            raise ValueError(
+                f"benchmarking_set must be a subset of features {str(self._dml_data.x_cols)}. "
+                f"{str(benchmarking_set)} was passed."
+            )
         if fit_args is not None and not isinstance(fit_args, dict):
-            raise TypeError('fit_args must be a dict. '
-                            f'{str(fit_args)} of type {type(fit_args)} was passed.')
+            raise TypeError(f"fit_args must be a dict. {str(fit_args)} of type {type(fit_args)} was passed.")
 
         # refit short form of the model
         x_list_short = [x for x in x_list_long if x not in benchmarking_set]
diff --git a/doubleml/double_ml_data.py b/doubleml/double_ml_data.py
index fdee739dd..3ebf2f765 100644
--- a/doubleml/double_ml_data.py
+++ b/doubleml/double_ml_data.py
@@ -1,27 +1,24 @@
-import numpy as np
-import pandas as pd
 import io
-
 from abc import ABC, abstractmethod
 
-from sklearn.utils.validation import check_array, column_or_1d,  check_consistent_length
+import numpy as np
+import pandas as pd
 from sklearn.utils import assert_all_finite
 from sklearn.utils.multiclass import type_of_target
-from .utils._estimation import _assure_2d_array
+from sklearn.utils.validation import check_array, check_consistent_length, column_or_1d
+
 from .utils._checks import _check_set
+from .utils._estimation import _assure_2d_array
 
 
 class DoubleMLBaseData(ABC):
-    """Base Class Double machine learning data-backends
-    """
-    def __init__(self,
-                 data):
+    """Base Class Double machine learning data-backends"""
+
+    def __init__(self, data):
         if not isinstance(data, pd.DataFrame):
-            raise TypeError('data must be of pd.DataFrame type. '
-                            f'{str(data)} of type {str(type(data))} was passed.')
+            raise TypeError(f"data must be of pd.DataFrame type. {str(data)} of type {str(type(data))} was passed.")
         if not data.columns.is_unique:
-            raise ValueError('Invalid pd.DataFrame: '
-                             'Contains duplicate column names.')
+            raise ValueError("Invalid pd.DataFrame: Contains duplicate column names.")
         self._data = data
 
     def __str__(self):
@@ -29,13 +26,17 @@ def __str__(self):
         buf = io.StringIO()
         self.data.info(verbose=False, buf=buf)
         df_info = buf.getvalue()
-        res = '================== DoubleMLBaseData Object ==================\n' + \
-              '\n------------------ Data summary      ------------------\n' + data_summary + \
-              '\n------------------ DataFrame info    ------------------\n' + df_info
+        res = (
+            "================== DoubleMLBaseData Object ==================\n"
+            + "\n------------------ Data summary      ------------------\n"
+            + data_summary
+            + "\n------------------ DataFrame info    ------------------\n"
+            + df_info
+        )
         return res
 
     def _data_summary_str(self):
-        data_summary = f'No. Observations: {self.n_obs}\n'
+        data_summary = f"No. Observations: {self.n_obs}\n"
         return data_summary
 
     @property
@@ -63,7 +64,7 @@ def n_obs(self):
     #  multiple treatment variables case) and other things are also build around it, see for example DoubleML._params
     @property
     def d_cols(self):
-        return ['theta']
+        return ["theta"]
 
     @property
     def n_treat(self):
@@ -137,16 +138,19 @@ class DoubleMLData(DoubleMLBaseData):
     >>> (x, y, d) = make_plr_CCDDHNR2018(return_type='array')
     >>> obj_dml_data_from_array = DoubleMLData.from_arrays(x, y, d)
     """
-    def __init__(self,
-                 data,
-                 y_col,
-                 d_cols,
-                 x_cols=None,
-                 z_cols=None,
-                 t_col=None,
-                 s_col=None,
-                 use_other_treat_as_covariate=True,
-                 force_all_x_finite=True):
+
+    def __init__(
+        self,
+        data,
+        y_col,
+        d_cols,
+        x_cols=None,
+        z_cols=None,
+        t_col=None,
+        s_col=None,
+        use_other_treat_as_covariate=True,
+        force_all_x_finite=True,
+    ):
         DoubleMLBaseData.__init__(self, data)
 
         self.y_col = y_col
@@ -169,26 +173,31 @@ def __str__(self):
         buf = io.StringIO()
         self.data.info(verbose=False, buf=buf)
         df_info = buf.getvalue()
-        res = '================== DoubleMLData Object ==================\n' + \
-              '\n------------------ Data summary      ------------------\n' + data_summary + \
-              '\n------------------ DataFrame info    ------------------\n' + df_info
+        res = (
+            "================== DoubleMLData Object ==================\n"
+            + "\n------------------ Data summary      ------------------\n"
+            + data_summary
+            + "\n------------------ DataFrame info    ------------------\n"
+            + df_info
+        )
         return res
 
     def _data_summary_str(self):
-        data_summary = f'Outcome variable: {self.y_col}\n' \
-                       f'Treatment variable(s): {self.d_cols}\n' \
-                       f'Covariates: {self.x_cols}\n' \
-                       f'Instrument variable(s): {self.z_cols}\n'
+        data_summary = (
+            f"Outcome variable: {self.y_col}\n"
+            f"Treatment variable(s): {self.d_cols}\n"
+            f"Covariates: {self.x_cols}\n"
+            f"Instrument variable(s): {self.z_cols}\n"
+        )
         if self.t_col is not None:
-            data_summary += f'Time variable: {self.t_col}\n'
+            data_summary += f"Time variable: {self.t_col}\n"
         if self.s_col is not None:
-            data_summary += f'Score/Selection variable: {self.s_col}\n'
-        data_summary += f'No. Observations: {self.n_obs}\n'
+            data_summary += f"Score/Selection variable: {self.s_col}\n"
+        data_summary += f"No. Observations: {self.n_obs}\n"
         return data_summary
 
     @classmethod
-    def from_arrays(cls, x, y, d, z=None, t=None, s=None, use_other_treat_as_covariate=True,
-                    force_all_x_finite=True):
+    def from_arrays(cls, x, y, d, z=None, t=None, s=None, use_other_treat_as_covariate=True, force_all_x_finite=True):
         """
         Initialize :class:`DoubleMLData` from :class:`numpy.ndarray`'s.
 
@@ -236,22 +245,24 @@ def from_arrays(cls, x, y, d, z=None, t=None, s=None, use_other_treat_as_covaria
         >>> obj_dml_data_from_array = DoubleMLData.from_arrays(x, y, d)
         """
         if isinstance(force_all_x_finite, str):
-            if force_all_x_finite != 'allow-nan':
-                raise ValueError("Invalid force_all_x_finite " + force_all_x_finite + ". " +
-                                 "force_all_x_finite must be True, False or 'allow-nan'.")
+            if force_all_x_finite != "allow-nan":
+                raise ValueError(
+                    "Invalid force_all_x_finite "
+                    + force_all_x_finite
+                    + ". "
+                    + "force_all_x_finite must be True, False or 'allow-nan'."
+                )
         elif not isinstance(force_all_x_finite, bool):
-            raise TypeError("Invalid force_all_x_finite. " +
-                            "force_all_x_finite must be True, False or 'allow-nan'.")
+            raise TypeError("Invalid force_all_x_finite. " + "force_all_x_finite must be True, False or 'allow-nan'.")
 
-        x = check_array(x, ensure_2d=False, allow_nd=False,
-                        force_all_finite=force_all_x_finite)
+        x = check_array(x, ensure_2d=False, allow_nd=False, force_all_finite=force_all_x_finite)
         d = check_array(d, ensure_2d=False, allow_nd=False)
         y = column_or_1d(y, warn=True)
 
         x = _assure_2d_array(x)
         d = _assure_2d_array(d)
 
-        y_col = 'y'
+        y_col = "y"
         if z is None:
             check_consistent_length(x, y, d)
             z_cols = None
@@ -260,34 +271,33 @@ def from_arrays(cls, x, y, d, z=None, t=None, s=None, use_other_treat_as_covaria
             z = _assure_2d_array(z)
             check_consistent_length(x, y, d, z)
             if z.shape[1] == 1:
-                z_cols = ['z']
+                z_cols = ["z"]
             else:
-                z_cols = [f'z{i + 1}' for i in np.arange(z.shape[1])]
+                z_cols = [f"z{i + 1}" for i in np.arange(z.shape[1])]
 
         if t is None:
             t_col = None
         else:
             t = column_or_1d(t, warn=True)
             check_consistent_length(x, y, d, t)
-            t_col = 't'
+            t_col = "t"
 
         if s is None:
             s_col = None
         else:
             s = column_or_1d(s, warn=True)
             check_consistent_length(x, y, d, s)
-            s_col = 's'
+            s_col = "s"
 
         if d.shape[1] == 1:
-            d_cols = ['d']
+            d_cols = ["d"]
         else:
-            d_cols = [f'd{i+1}' for i in np.arange(d.shape[1])]
+            d_cols = [f"d{i + 1}" for i in np.arange(d.shape[1])]
 
-        x_cols = [f'X{i+1}' for i in np.arange(x.shape[1])]
+        x_cols = [f"X{i + 1}" for i in np.arange(x.shape[1])]
 
         # basline version with features, outcome and treatments
-        data = pd.DataFrame(np.column_stack((x, y, d)),
-                            columns=x_cols + [y_col] + d_cols)
+        data = pd.DataFrame(np.column_stack((x, y, d)), columns=x_cols + [y_col] + d_cols)
 
         if z is not None:
             df_z = pd.DataFrame(z, columns=z_cols)
@@ -406,24 +416,24 @@ def x_cols(self):
 
     @x_cols.setter
     def x_cols(self, value):
-        reset_value = hasattr(self, '_x_cols')
+        reset_value = hasattr(self, "_x_cols")
         if value is not None:
             if isinstance(value, str):
                 value = [value]
             if not isinstance(value, list):
-                raise TypeError('The covariates x_cols must be of str or list type (or None). '
-                                f'{str(value)} of type {str(type(value))} was passed.')
+                raise TypeError(
+                    "The covariates x_cols must be of str or list type (or None). "
+                    f"{str(value)} of type {str(type(value))} was passed."
+                )
             if not len(set(value)) == len(value):
-                raise ValueError('Invalid covariates x_cols: '
-                                 'Contains duplicate values.')
+                raise ValueError("Invalid covariates x_cols: Contains duplicate values.")
             if not set(value).issubset(set(self.all_variables)):
-                raise ValueError('Invalid covariates x_cols. '
-                                 'At least one covariate is no data column.')
+                raise ValueError("Invalid covariates x_cols. At least one covariate is no data column.")
             assert set(value).issubset(set(self.all_variables))
             self._x_cols = value
         else:
             excluded_cols = set.union({self.y_col}, set(self.d_cols))
-            if (self.z_cols is not None):
+            if self.z_cols is not None:
                 excluded_cols = set.union(excluded_cols, set(self.z_cols))
             for col in [self.t_col, self.s_col]:
                 col = _check_set(col)
@@ -443,18 +453,18 @@ def d_cols(self):
 
     @d_cols.setter
     def d_cols(self, value):
-        reset_value = hasattr(self, '_d_cols')
+        reset_value = hasattr(self, "_d_cols")
         if isinstance(value, str):
             value = [value]
         if not isinstance(value, list):
-            raise TypeError('The treatment variable(s) d_cols must be of str or list type. '
-                            f'{str(value)} of type {str(type(value))} was passed.')
+            raise TypeError(
+                "The treatment variable(s) d_cols must be of str or list type. "
+                f"{str(value)} of type {str(type(value))} was passed."
+            )
         if not len(set(value)) == len(value):
-            raise ValueError('Invalid treatment variable(s) d_cols: '
-                             'Contains duplicate values.')
+            raise ValueError("Invalid treatment variable(s) d_cols: Contains duplicate values.")
         if not set(value).issubset(set(self.all_variables)):
-            raise ValueError('Invalid treatment variable(s) d_cols. '
-                             'At least one treatment variable is no data column.')
+            raise ValueError("Invalid treatment variable(s) d_cols. At least one treatment variable is no data column.")
         self._d_cols = value
         if reset_value:
             self._check_disjoint_sets()
@@ -470,13 +480,13 @@ def y_col(self):
 
     @y_col.setter
     def y_col(self, value):
-        reset_value = hasattr(self, '_y_col')
+        reset_value = hasattr(self, "_y_col")
         if not isinstance(value, str):
-            raise TypeError('The outcome variable y_col must be of str type. '
-                            f'{str(value)} of type {str(type(value))} was passed.')
+            raise TypeError(
+                f"The outcome variable y_col must be of str type. {str(value)} of type {str(type(value))} was passed."
+            )
         if value not in self.all_variables:
-            raise ValueError('Invalid outcome variable y_col. '
-                             f'{value} is no data column.')
+            raise ValueError(f"Invalid outcome variable y_col. {value} is no data column.")
         self._y_col = value
         if reset_value:
             self._check_disjoint_sets()
@@ -491,19 +501,21 @@ def z_cols(self):
 
     @z_cols.setter
     def z_cols(self, value):
-        reset_value = hasattr(self, '_z_cols')
+        reset_value = hasattr(self, "_z_cols")
         if value is not None:
             if isinstance(value, str):
                 value = [value]
             if not isinstance(value, list):
-                raise TypeError('The instrumental variable(s) z_cols must be of str or list type (or None). '
-                                f'{str(value)} of type {str(type(value))} was passed.')
+                raise TypeError(
+                    "The instrumental variable(s) z_cols must be of str or list type (or None). "
+                    f"{str(value)} of type {str(type(value))} was passed."
+                )
             if not len(set(value)) == len(value):
-                raise ValueError('Invalid instrumental variable(s) z_cols: '
-                                 'Contains duplicate values.')
+                raise ValueError("Invalid instrumental variable(s) z_cols: Contains duplicate values.")
             if not set(value).issubset(set(self.all_variables)):
-                raise ValueError('Invalid instrumental variable(s) z_cols. '
-                                 'At least one instrumental variable is no data column.')
+                raise ValueError(
+                    "Invalid instrumental variable(s) z_cols. At least one instrumental variable is no data column."
+                )
             self._z_cols = value
         else:
             self._z_cols = None
@@ -520,14 +532,15 @@ def t_col(self):
 
     @t_col.setter
     def t_col(self, value):
-        reset_value = hasattr(self, '_t_col')
+        reset_value = hasattr(self, "_t_col")
         if value is not None:
             if not isinstance(value, str):
-                raise TypeError('The time variable t_col must be of str type (or None). '
-                                f'{str(value)} of type {str(type(value))} was passed.')
+                raise TypeError(
+                    "The time variable t_col must be of str type (or None). "
+                    f"{str(value)} of type {str(type(value))} was passed."
+                )
             if value not in self.all_variables:
-                raise ValueError('Invalid time variable t_col. '
-                                 f'{value} is no data column.')
+                raise ValueError(f"Invalid time variable t_col. {value} is no data column.")
             self._t_col = value
         else:
             self._t_col = None
@@ -544,14 +557,15 @@ def s_col(self):
 
     @s_col.setter
     def s_col(self, value):
-        reset_value = hasattr(self, '_s_col')
+        reset_value = hasattr(self, "_s_col")
         if value is not None:
             if not isinstance(value, str):
-                raise TypeError('The score or selection variable s_col must be of str type (or None). '
-                                f'{str(value)} of type {str(type(value))} was passed.')
+                raise TypeError(
+                    "The score or selection variable s_col must be of str type (or None). "
+                    f"{str(value)} of type {str(type(value))} was passed."
+                )
             if value not in self.all_variables:
-                raise ValueError('Invalid score or selection variable s_col. '
-                                 f'{value} is no data column.')
+                raise ValueError(f"Invalid score or selection variable s_col. {value} is no data column.")
             self._s_col = value
         else:
             self._s_col = None
@@ -568,10 +582,9 @@ def use_other_treat_as_covariate(self):
 
     @use_other_treat_as_covariate.setter
     def use_other_treat_as_covariate(self, value):
-        reset_value = hasattr(self, '_use_other_treat_as_covariate')
+        reset_value = hasattr(self, "_use_other_treat_as_covariate")
         if not isinstance(value, bool):
-            raise TypeError('use_other_treat_as_covariate must be True or False. '
-                            f'Got {str(value)}.')
+            raise TypeError(f"use_other_treat_as_covariate must be True or False. Got {str(value)}.")
         self._use_other_treat_as_covariate = value
         if reset_value:
             # by default, we initialize to the first treatment variable
@@ -586,14 +599,14 @@ def force_all_x_finite(self):
 
     @force_all_x_finite.setter
     def force_all_x_finite(self, value):
-        reset_value = hasattr(self, '_force_all_x_finite')
+        reset_value = hasattr(self, "_force_all_x_finite")
         if isinstance(value, str):
-            if value != 'allow-nan':
-                raise ValueError("Invalid force_all_x_finite " + value + ". " +
-                                 "force_all_x_finite must be True, False or 'allow-nan'.")
+            if value != "allow-nan":
+                raise ValueError(
+                    "Invalid force_all_x_finite " + value + ". " + "force_all_x_finite must be True, False or 'allow-nan'."
+                )
         elif not isinstance(value, bool):
-            raise TypeError("Invalid force_all_x_finite. " +
-                            "force_all_x_finite must be True, False or 'allow-nan'.")
+            raise TypeError("Invalid force_all_x_finite. " + "force_all_x_finite must be True, False or 'allow-nan'.")
         self._force_all_x_finite = value
         if reset_value:
             # by default, we initialize to the first treatment variable
@@ -630,11 +643,11 @@ def set_x_d(self, treatment_var):
             Active treatment variable that will be set to d.
         """
         if not isinstance(treatment_var, str):
-            raise TypeError('treatment_var must be of str type. '
-                            f'{str(treatment_var)} of type {str(type(treatment_var))} was passed.')
+            raise TypeError(
+                f"treatment_var must be of str type. {str(treatment_var)} of type {str(type(treatment_var))} was passed."
+            )
         if treatment_var not in self.d_cols:
-            raise ValueError('Invalid treatment_var. '
-                             f'{treatment_var} is not in d_cols.')
+            raise ValueError(f"Invalid treatment_var. {treatment_var} is not in d_cols.")
         if self.use_other_treat_as_covariate:
             # note that the following line needs to be adapted in case an intersection of x_cols and d_cols as allowed
             # (see https://github.com/DoubleML/doubleml-for-py/issues/83)
@@ -644,8 +657,7 @@ def set_x_d(self, treatment_var):
             xd_list = self.x_cols
         assert_all_finite(self.data.loc[:, treatment_var])
         if self.force_all_x_finite:
-            assert_all_finite(self.data.loc[:, xd_list],
-                              allow_nan=self.force_all_x_finite == 'allow-nan')
+            assert_all_finite(self.data.loc[:, xd_list], allow_nan=self.force_all_x_finite == "allow-nan")
         self._d = self.data.loc[:, treatment_var]
         self._X = self.data.loc[:, xd_list]
 
@@ -653,16 +665,16 @@ def _check_binary_treats(self):
         is_binary = pd.Series(dtype=bool, index=self.d_cols)
         for treatment_var in self.d_cols:
             this_d = self.data.loc[:, treatment_var]
-            binary_treat = (type_of_target(this_d) == 'binary')
+            binary_treat = type_of_target(this_d) == "binary"
             zero_one_treat = np.all((np.power(this_d, 2) - this_d) == 0)
-            is_binary[treatment_var] = (binary_treat & zero_one_treat)
+            is_binary[treatment_var] = binary_treat & zero_one_treat
         return is_binary
 
     def _check_binary_outcome(self):
         y = self.data.loc[:, self.y_col]
-        binary_outcome = (type_of_target(y) == 'binary')
+        binary_outcome = type_of_target(y) == "binary"
         zero_one_outcome = np.all((np.power(y, 2) - y) == 0)
-        is_binary = (binary_outcome & zero_one_outcome)
+        is_binary = binary_outcome & zero_one_outcome
         return is_binary
 
     def _check_disjoint_sets(self):
@@ -675,28 +687,34 @@ def _check_disjoint_sets_y_d_x_z_t_s(self):
         d_cols_set = set(self.d_cols)
 
         if not y_col_set.isdisjoint(x_cols_set):
-            raise ValueError(f'{str(self.y_col)} cannot be set as outcome variable ``y_col`` and covariate in '
-                             '``x_cols``.')
+            raise ValueError(f"{str(self.y_col)} cannot be set as outcome variable ``y_col`` and covariate in ``x_cols``.")
         if not y_col_set.isdisjoint(d_cols_set):
-            raise ValueError(f'{str(self.y_col)} cannot be set as outcome variable ``y_col`` and treatment variable in '
-                             '``d_cols``.')
+            raise ValueError(
+                f"{str(self.y_col)} cannot be set as outcome variable ``y_col`` and treatment variable in ``d_cols``."
+            )
         # note that the line xd_list = self.x_cols + self.d_cols in method set_x_d needs adaption if an intersection of
         # x_cols and d_cols as allowed (see https://github.com/DoubleML/doubleml-for-py/issues/83)
         if not d_cols_set.isdisjoint(x_cols_set):
-            raise ValueError('At least one variable/column is set as treatment variable (``d_cols``) and as covariate'
-                             '(``x_cols``). Consider using parameter ``use_other_treat_as_covariate``.')
+            raise ValueError(
+                "At least one variable/column is set as treatment variable (``d_cols``) and as covariate"
+                "(``x_cols``). Consider using parameter ``use_other_treat_as_covariate``."
+            )
 
         if self.z_cols is not None:
             z_cols_set = set(self.z_cols)
             if not y_col_set.isdisjoint(z_cols_set):
-                raise ValueError(f'{str(self.y_col)} cannot be set as outcome variable ``y_col`` and instrumental '
-                                 'variable in ``z_cols``.')
+                raise ValueError(
+                    f"{str(self.y_col)} cannot be set as outcome variable ``y_col`` and instrumental variable in ``z_cols``."
+                )
             if not d_cols_set.isdisjoint(z_cols_set):
-                raise ValueError('At least one variable/column is set as treatment variable (``d_cols``) and '
-                                 'instrumental variable in ``z_cols``.')
+                raise ValueError(
+                    "At least one variable/column is set as treatment variable (``d_cols``) and "
+                    "instrumental variable in ``z_cols``."
+                )
             if not x_cols_set.isdisjoint(z_cols_set):
-                raise ValueError('At least one variable/column is set as covariate (``x_cols``) and instrumental '
-                                 'variable in ``z_cols``.')
+                raise ValueError(
+                    "At least one variable/column is set as covariate (``x_cols``) and instrumental variable in ``z_cols``."
+                )
 
         self._check_disjoint_sets_t_s()
 
@@ -708,41 +726,49 @@ def _check_disjoint_sets_t_s(self):
         if self.t_col is not None:
             t_col_set = {self.t_col}
             if not t_col_set.isdisjoint(x_cols_set):
-                raise ValueError(f'{str(self.t_col)} cannot be set as time variable ``t_col`` and covariate in '
-                                 '``x_cols``.')
+                raise ValueError(f"{str(self.t_col)} cannot be set as time variable ``t_col`` and covariate in ``x_cols``.")
             if not t_col_set.isdisjoint(d_cols_set):
-                raise ValueError(f'{str(self.t_col)} cannot be set as time variable ``t_col`` and treatment variable in '
-                                 '``d_cols``.')
+                raise ValueError(
+                    f"{str(self.t_col)} cannot be set as time variable ``t_col`` and treatment variable in ``d_cols``."
+                )
             if not t_col_set.isdisjoint(y_col_set):
-                raise ValueError(f'{str(self.t_col)} cannot be set as time variable ``t_col`` and outcome variable '
-                                 '``y_col``.')
+                raise ValueError(f"{str(self.t_col)} cannot be set as time variable ``t_col`` and outcome variable ``y_col``.")
             if self.z_cols is not None:
                 z_cols_set = set(self.z_cols)
                 if not t_col_set.isdisjoint(z_cols_set):
-                    raise ValueError(f'{str(self.t_col)} cannot be set as time variable ``t_col`` and instrumental '
-                                     'variable in ``z_cols``.')
+                    raise ValueError(
+                        f"{str(self.t_col)} cannot be set as time variable ``t_col`` and instrumental variable in ``z_cols``."
+                    )
 
         if self.s_col is not None:
             s_col_set = {self.s_col}
             if not s_col_set.isdisjoint(x_cols_set):
-                raise ValueError(f'{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and covariate in '
-                                 '``x_cols``.')
+                raise ValueError(
+                    f"{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and covariate in ``x_cols``."
+                )
             if not s_col_set.isdisjoint(d_cols_set):
-                raise ValueError(f'{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and treatment '
-                                 'variable in ``d_cols``.')
+                raise ValueError(
+                    f"{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and treatment "
+                    "variable in ``d_cols``."
+                )
             if not s_col_set.isdisjoint(y_col_set):
-                raise ValueError(f'{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and outcome '
-                                 'variable ``y_col``.')
+                raise ValueError(
+                    f"{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and outcome variable ``y_col``."
+                )
             if self.z_cols is not None:
                 z_cols_set = set(self.z_cols)
                 if not s_col_set.isdisjoint(z_cols_set):
-                    raise ValueError(f'{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and '
-                                     'instrumental variable in ``z_cols``.')
+                    raise ValueError(
+                        f"{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and "
+                        "instrumental variable in ``z_cols``."
+                    )
             if self.t_col is not None:
                 t_col_set = {self.t_col}
                 if not s_col_set.isdisjoint(t_col_set):
-                    raise ValueError(f'{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and time '
-                                     'variable ``t_col``.')
+                    raise ValueError(
+                        f"{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and time "
+                        "variable ``t_col``."
+                    )
 
 
 class DoubleMLClusterData(DoubleMLData):
@@ -807,32 +833,28 @@ class DoubleMLClusterData(DoubleMLData):
     >>> (x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(return_type='array')
     >>> obj_dml_data_from_array = DoubleMLClusterData.from_arrays(x, y, d, cluster_vars, z)
     """
-    def __init__(self,
-                 data,
-                 y_col,
-                 d_cols,
-                 cluster_cols,
-                 x_cols=None,
-                 z_cols=None,
-                 t_col=None,
-                 s_col=None,
-                 use_other_treat_as_covariate=True,
-                 force_all_x_finite=True):
+
+    def __init__(
+        self,
+        data,
+        y_col,
+        d_cols,
+        cluster_cols,
+        x_cols=None,
+        z_cols=None,
+        t_col=None,
+        s_col=None,
+        use_other_treat_as_covariate=True,
+        force_all_x_finite=True,
+    ):
         DoubleMLBaseData.__init__(self, data)
 
         # we need to set cluster_cols (needs _data) before call to the super __init__ because of the x_cols setter
         self.cluster_cols = cluster_cols
         self._set_cluster_vars()
-        DoubleMLData.__init__(self,
-                              data,
-                              y_col,
-                              d_cols,
-                              x_cols,
-                              z_cols,
-                              t_col,
-                              s_col,
-                              use_other_treat_as_covariate,
-                              force_all_x_finite)
+        DoubleMLData.__init__(
+            self, data, y_col, d_cols, x_cols, z_cols, t_col, s_col, use_other_treat_as_covariate, force_all_x_finite
+        )
         self._check_disjoint_sets_cluster_cols()
 
     def __str__(self):
@@ -840,28 +862,35 @@ def __str__(self):
         buf = io.StringIO()
         self.data.info(verbose=False, buf=buf)
         df_info = buf.getvalue()
-        res = '================== DoubleMLClusterData Object ==================\n' + \
-              '\n------------------ Data summary      ------------------\n' + data_summary + \
-              '\n------------------ DataFrame info    ------------------\n' + df_info
+        res = (
+            "================== DoubleMLClusterData Object ==================\n"
+            + "\n------------------ Data summary      ------------------\n"
+            + data_summary
+            + "\n------------------ DataFrame info    ------------------\n"
+            + df_info
+        )
         return res
 
     def _data_summary_str(self):
-        data_summary = f'Outcome variable: {self.y_col}\n' \
-                       f'Treatment variable(s): {self.d_cols}\n' \
-                       f'Cluster variable(s): {self.cluster_cols}\n' \
-                       f'Covariates: {self.x_cols}\n' \
-                       f'Instrument variable(s): {self.z_cols}\n'
+        data_summary = (
+            f"Outcome variable: {self.y_col}\n"
+            f"Treatment variable(s): {self.d_cols}\n"
+            f"Cluster variable(s): {self.cluster_cols}\n"
+            f"Covariates: {self.x_cols}\n"
+            f"Instrument variable(s): {self.z_cols}\n"
+        )
         if self.t_col is not None:
-            data_summary += f'Time variable: {self.t_col}\n'
+            data_summary += f"Time variable: {self.t_col}\n"
         if self.s_col is not None:
-            data_summary += f'Score/Selection variable: {self.s_col}\n'
+            data_summary += f"Score/Selection variable: {self.s_col}\n"
 
-        data_summary += f'No. Observations: {self.n_obs}\n'
+        data_summary += f"No. Observations: {self.n_obs}\n"
         return data_summary
 
     @classmethod
-    def from_arrays(cls, x, y, d, cluster_vars, z=None, t=None, s=None, use_other_treat_as_covariate=True,
-                    force_all_x_finite=True):
+    def from_arrays(
+        cls, x, y, d, cluster_vars, z=None, t=None, s=None, use_other_treat_as_covariate=True, force_all_x_finite=True
+    ):
         """
         Initialize :class:`DoubleMLClusterData` from :class:`numpy.ndarray`'s.
 
@@ -915,15 +944,24 @@ def from_arrays(cls, x, y, d, cluster_vars, z=None, t=None, s=None, use_other_tr
         cluster_vars = check_array(cluster_vars, ensure_2d=False, allow_nd=False)
         cluster_vars = _assure_2d_array(cluster_vars)
         if cluster_vars.shape[1] == 1:
-            cluster_cols = ['cluster_var']
+            cluster_cols = ["cluster_var"]
         else:
-            cluster_cols = [f'cluster_var{i + 1}' for i in np.arange(cluster_vars.shape[1])]
+            cluster_cols = [f"cluster_var{i + 1}" for i in np.arange(cluster_vars.shape[1])]
 
         data = pd.concat((pd.DataFrame(cluster_vars, columns=cluster_cols), dml_data.data), axis=1)
 
-        return (cls(data, dml_data.y_col, dml_data.d_cols, cluster_cols,
-                    dml_data.x_cols, dml_data.z_cols, dml_data.t_col, dml_data.s_col,
-                    dml_data.use_other_treat_as_covariate, dml_data.force_all_x_finite))
+        return cls(
+            data,
+            dml_data.y_col,
+            dml_data.d_cols,
+            cluster_cols,
+            dml_data.x_cols,
+            dml_data.z_cols,
+            dml_data.t_col,
+            dml_data.s_col,
+            dml_data.use_other_treat_as_covariate,
+            dml_data.force_all_x_finite,
+        )
 
     @property
     def cluster_cols(self):
@@ -934,18 +972,18 @@ def cluster_cols(self):
 
     @cluster_cols.setter
     def cluster_cols(self, value):
-        reset_value = hasattr(self, '_cluster_cols')
+        reset_value = hasattr(self, "_cluster_cols")
         if isinstance(value, str):
             value = [value]
         if not isinstance(value, list):
-            raise TypeError('The cluster variable(s) cluster_cols must be of str or list type. '
-                            f'{str(value)} of type {str(type(value))} was passed.')
+            raise TypeError(
+                "The cluster variable(s) cluster_cols must be of str or list type. "
+                f"{str(value)} of type {str(type(value))} was passed."
+            )
         if not len(set(value)) == len(value):
-            raise ValueError('Invalid cluster variable(s) cluster_cols: '
-                             'Contains duplicate values.')
+            raise ValueError("Invalid cluster variable(s) cluster_cols: Contains duplicate values.")
         if not set(value).issubset(set(self.all_variables)):
-            raise ValueError('Invalid cluster variable(s) cluster_cols. '
-                             'At least one cluster variable is no data column.')
+            raise ValueError("Invalid cluster variable(s) cluster_cols. At least one cluster variable is no data column.")
         self._cluster_cols = value
         if reset_value:
             self._check_disjoint_sets()
@@ -986,8 +1024,9 @@ def x_cols(self, value):
                     x_cols = [col for col in self.data.columns if col not in y_d]
             else:
                 if (self.z_cols is not None) & (self.t_col is not None):
-                    y_d_z_t_s = set.union({self.y_col}, set(self.d_cols), set(self.z_cols), {self.t_col}, {self.s_col},
-                                          set(self.cluster_cols))
+                    y_d_z_t_s = set.union(
+                        {self.y_col}, set(self.d_cols), set(self.z_cols), {self.t_col}, {self.s_col}, set(self.cluster_cols)
+                    )
                     x_cols = [col for col in self.data.columns if col not in y_d_z_t_s]
                 elif self.z_cols is not None:
                     y_d_z_s = set.union({self.y_col}, set(self.d_cols), set(self.z_cols), {self.s_col}, set(self.cluster_cols))
@@ -1019,28 +1058,37 @@ def _check_disjoint_sets_cluster_cols(self):
         s_col_set = {self.s_col}
 
         if not y_col_set.isdisjoint(cluster_cols_set):
-            raise ValueError(f'{str(self.y_col)} cannot be set as outcome variable ``y_col`` and cluster '
-                             'variable in ``cluster_cols``.')
+            raise ValueError(
+                f"{str(self.y_col)} cannot be set as outcome variable ``y_col`` and cluster variable in ``cluster_cols``."
+            )
         if not d_cols_set.isdisjoint(cluster_cols_set):
-            raise ValueError('At least one variable/column is set as treatment variable (``d_cols``) and '
-                             'cluster variable in ``cluster_cols``.')
+            raise ValueError(
+                "At least one variable/column is set as treatment variable (``d_cols``) and "
+                "cluster variable in ``cluster_cols``."
+            )
         # TODO: Is the following combination allowed, or not?
         if not x_cols_set.isdisjoint(cluster_cols_set):
-            raise ValueError('At least one variable/column is set as covariate (``x_cols``) and cluster '
-                             'variable in ``cluster_cols``.')
+            raise ValueError(
+                "At least one variable/column is set as covariate (``x_cols``) and cluster variable in ``cluster_cols``."
+            )
         if self.z_cols is not None:
             z_cols_set = set(self.z_cols)
             if not z_cols_set.isdisjoint(cluster_cols_set):
-                raise ValueError('At least one variable/column is set as instrumental variable (``z_cols``) and '
-                                 'cluster variable in ``cluster_cols``.')
+                raise ValueError(
+                    "At least one variable/column is set as instrumental variable (``z_cols``) and "
+                    "cluster variable in ``cluster_cols``."
+                )
         if self.t_col is not None:
             if not t_col_set.isdisjoint(cluster_cols_set):
-                raise ValueError(f'{str(self.t_col)} cannot be set as time variable ``t_col`` and '
-                                 'cluster variable in ``cluster_cols``.')
+                raise ValueError(
+                    f"{str(self.t_col)} cannot be set as time variable ``t_col`` and cluster variable in ``cluster_cols``."
+                )
         if self.s_col is not None:
             if not s_col_set.isdisjoint(cluster_cols_set):
-                raise ValueError(f'{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and '
-                                 'cluster variable in ``cluster_cols``.')
+                raise ValueError(
+                    f"{str(self.s_col)} cannot be set as score or selection variable ``s_col`` and "
+                    "cluster variable in ``cluster_cols``."
+                )
 
     def _set_cluster_vars(self):
         assert_all_finite(self.data.loc[:, self.cluster_cols])
diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index b70a16230..2718629d2 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -1,19 +1,26 @@
-import numpy as np
-import pandas as pd
 import copy
 
-from scipy.stats import norm
+import numpy as np
+import pandas as pd
 from scipy.optimize import minimize_scalar
+from scipy.stats import norm
 from statsmodels.stats.multitest import multipletests
 
-from .utils._estimation import _draw_weights, _aggregate_coefs_and_ses, _var_est
-from .utils._checks import _check_bootstrap, _check_framework_compatibility, _check_in_zero_one, \
-    _check_float, _check_integer, _check_bool, _check_benchmarks
+from .utils._checks import (
+    _check_benchmarks,
+    _check_bool,
+    _check_bootstrap,
+    _check_float,
+    _check_framework_compatibility,
+    _check_in_zero_one,
+    _check_integer,
+)
 from .utils._descriptive import generate_summary
+from .utils._estimation import _aggregate_coefs_and_ses, _draw_weights, _var_est
 from .utils._plots import _sensitivity_contour_plot
 
 
-class DoubleMLFramework():
+class DoubleMLFramework:
     """Double Machine Learning Framework to combine DoubleML classes and compute confidendence intervals.
 
     Parameters
@@ -26,29 +33,29 @@ class DoubleMLFramework():
     """
 
     def __init__(
-            self,
-            doubleml_dict=None,
+        self,
+        doubleml_dict=None,
     ):
         self._is_cluster_data = False
 
         # check input
         if not isinstance(doubleml_dict, dict):
-            raise TypeError('doubleml_dict must be a dictionary.')
-        expected_keys = ['thetas', 'ses', 'all_thetas', 'all_ses', 'var_scaling_factors', 'scaled_psi']
+            raise TypeError("doubleml_dict must be a dictionary.")
+        expected_keys = ["thetas", "ses", "all_thetas", "all_ses", "var_scaling_factors", "scaled_psi"]
         if not all(key in doubleml_dict.keys() for key in expected_keys):
-            raise ValueError('The dict must contain the following keys: ' + ', '.join(expected_keys))
+            raise ValueError("The dict must contain the following keys: " + ", ".join(expected_keys))
 
         # set scores and parameters
-        self._n_thetas = doubleml_dict['scaled_psi'].shape[1]
-        self._n_rep = doubleml_dict['scaled_psi'].shape[2]
-        self._n_obs = doubleml_dict['scaled_psi'].shape[0]
+        self._n_thetas = doubleml_dict["scaled_psi"].shape[1]
+        self._n_rep = doubleml_dict["scaled_psi"].shape[2]
+        self._n_obs = doubleml_dict["scaled_psi"].shape[0]
 
-        self._thetas = doubleml_dict['thetas']
-        self._ses = doubleml_dict['ses']
-        self._all_thetas = doubleml_dict['all_thetas']
-        self._all_ses = doubleml_dict['all_ses']
-        self._var_scaling_factors = doubleml_dict['var_scaling_factors']
-        self._scaled_psi = doubleml_dict['scaled_psi']
+        self._thetas = doubleml_dict["thetas"]
+        self._ses = doubleml_dict["ses"]
+        self._all_thetas = doubleml_dict["all_thetas"]
+        self._all_ses = doubleml_dict["all_ses"]
+        self._var_scaling_factors = doubleml_dict["var_scaling_factors"]
+        self._scaled_psi = doubleml_dict["scaled_psi"]
 
         # initialize cluster data
         self._check_and_set_cluster_data(doubleml_dict)
@@ -60,9 +67,9 @@ def __init__(
         self._check_framework_shapes()
 
         self._treatment_names = None
-        if 'treatment_names' in doubleml_dict.keys():
-            self._check_treatment_names(doubleml_dict['treatment_names'])
-            self._treatment_names = doubleml_dict['treatment_names']
+        if "treatment_names" in doubleml_dict.keys():
+            self._check_treatment_names(doubleml_dict["treatment_names"])
+            self._treatment_names = doubleml_dict["treatment_names"]
 
         # initialize bootstrap distribution
         self._boot_t_stat = None
@@ -221,8 +228,7 @@ def summary(self):
         A summary for the estimated causal parameters ``thetas``.
         """
         ci = self.confint()
-        df_summary = generate_summary(self.thetas, self.ses, self.t_stats,
-                                      self.pvals, ci, self._treatment_names)
+        df_summary = generate_summary(self.thetas, self.ses, self.t_stats, self.pvals, ci, self._treatment_names)
         return df_summary
 
     @property
@@ -235,47 +241,58 @@ def sensitivity_summary(self):
         res : str
             Summary for the sensitivity analysis.
         """
-        header = '================== Sensitivity Analysis ==================\n'
+        header = "================== Sensitivity Analysis ==================\n"
         if self.sensitivity_params is None:
-            res = header + 'Apply sensitivity_analysis() to generate sensitivity_summary.'
+            res = header + "Apply sensitivity_analysis() to generate sensitivity_summary."
         else:
-            sig_level = f'Significance Level: level={self.sensitivity_params["input"]["level"]}\n'
-            scenario_params = f'Sensitivity parameters: cf_y={self.sensitivity_params["input"]["cf_y"]}; ' \
-                              f'cf_d={self.sensitivity_params["input"]["cf_d"]}, ' \
-                              f'rho={self.sensitivity_params["input"]["rho"]}'
-
-            theta_and_ci_col_names = ['CI lower', 'theta lower', ' theta', 'theta upper', 'CI upper']
-            theta_and_ci = np.transpose(np.vstack((self.sensitivity_params['ci']['lower'],
-                                                   self.sensitivity_params['theta']['lower'],
-                                                   self.thetas,
-                                                   self.sensitivity_params['theta']['upper'],
-                                                   self.sensitivity_params['ci']['upper'])))
-            df_theta_and_ci = pd.DataFrame(theta_and_ci,
-                                           columns=theta_and_ci_col_names,
-                                           index=self.treatment_names)
+            sig_level = f"Significance Level: level={self.sensitivity_params['input']['level']}\n"
+            scenario_params = (
+                f"Sensitivity parameters: cf_y={self.sensitivity_params['input']['cf_y']}; "
+                f"cf_d={self.sensitivity_params['input']['cf_d']}, "
+                f"rho={self.sensitivity_params['input']['rho']}"
+            )
+
+            theta_and_ci_col_names = ["CI lower", "theta lower", " theta", "theta upper", "CI upper"]
+            theta_and_ci = np.transpose(
+                np.vstack(
+                    (
+                        self.sensitivity_params["ci"]["lower"],
+                        self.sensitivity_params["theta"]["lower"],
+                        self.thetas,
+                        self.sensitivity_params["theta"]["upper"],
+                        self.sensitivity_params["ci"]["upper"],
+                    )
+                )
+            )
+            df_theta_and_ci = pd.DataFrame(theta_and_ci, columns=theta_and_ci_col_names, index=self.treatment_names)
             theta_and_ci_summary = str(df_theta_and_ci)
 
-            rvs_col_names = ['H_0', 'RV (%)', 'RVa (%)']
-            rvs = np.transpose(np.vstack((self.sensitivity_params['rv'],
-                                          self.sensitivity_params['rva']))) * 100
+            rvs_col_names = ["H_0", "RV (%)", "RVa (%)"]
+            rvs = np.transpose(np.vstack((self.sensitivity_params["rv"], self.sensitivity_params["rva"]))) * 100
 
-            df_rvs = pd.DataFrame(np.column_stack((self.sensitivity_params["input"]["null_hypothesis"], rvs)),
-                                  columns=rvs_col_names,
-                                  index=self.treatment_names)
+            df_rvs = pd.DataFrame(
+                np.column_stack((self.sensitivity_params["input"]["null_hypothesis"], rvs)),
+                columns=rvs_col_names,
+                index=self.treatment_names,
+            )
             rvs_summary = str(df_rvs)
 
-            res = header + \
-                '\n------------------ Scenario          ------------------\n' + \
-                sig_level + scenario_params + '\n' + \
-                '\n------------------ Bounds with CI    ------------------\n' + \
-                theta_and_ci_summary + '\n' + \
-                '\n------------------ Robustness Values ------------------\n' + \
-                rvs_summary
+            res = (
+                header
+                + "\n------------------ Scenario          ------------------\n"
+                + sig_level
+                + scenario_params
+                + "\n"
+                + "\n------------------ Bounds with CI    ------------------\n"
+                + theta_and_ci_summary
+                + "\n"
+                + "\n------------------ Robustness Values ------------------\n"
+                + rvs_summary
+            )
 
         return res
 
     def __add__(self, other):
-
         if isinstance(other, DoubleMLFramework):
             # internal consistency check
             self._check_framework_shapes()
@@ -290,39 +307,41 @@ def __add__(self, other):
             var_scaling_factors = self._var_scaling_factors
 
             # compute standard errors
-            sigma2_hat = np.divide(
-                np.mean(np.square(scaled_psi), axis=0),
-                var_scaling_factors.reshape(-1, 1))
+            sigma2_hat = np.divide(np.mean(np.square(scaled_psi), axis=0), var_scaling_factors.reshape(-1, 1))
             all_ses = np.sqrt(sigma2_hat)
             thetas, ses = _aggregate_coefs_and_ses(all_thetas, all_ses, var_scaling_factors)
 
             doubleml_dict = {
-                'thetas': thetas,
-                'ses': ses,
-                'all_thetas': all_thetas,
-                'all_ses': all_ses,
-                'var_scaling_factors': var_scaling_factors,
-                'scaled_psi': scaled_psi,
-                'is_cluster_data': self._is_cluster_data,
-                'cluster_dict': self._cluster_dict,
+                "thetas": thetas,
+                "ses": ses,
+                "all_thetas": all_thetas,
+                "all_ses": all_ses,
+                "var_scaling_factors": var_scaling_factors,
+                "scaled_psi": scaled_psi,
+                "is_cluster_data": self._is_cluster_data,
+                "cluster_dict": self._cluster_dict,
             }
 
             # sensitivity combination only available for same outcome and cond. expectation (e.g. IRM)
             if self._sensitivity_implemented and other._sensitivity_implemented:
-                nu2_score_element = self._sensitivity_elements['psi_nu2'] + other._sensitivity_elements['psi_nu2'] - \
-                     np.multiply(2.0, np.multiply(self._sensitivity_elements['riesz_rep'],
-                                                  self._sensitivity_elements['riesz_rep']))
+                nu2_score_element = (
+                    self._sensitivity_elements["psi_nu2"]
+                    + other._sensitivity_elements["psi_nu2"]
+                    - np.multiply(
+                        2.0, np.multiply(self._sensitivity_elements["riesz_rep"], self._sensitivity_elements["riesz_rep"])
+                    )
+                )
                 nu2 = np.mean(nu2_score_element, axis=0, keepdims=True)
                 psi_nu2 = nu2_score_element - nu2
 
                 sensitivity_elements = {
-                    'sigma2': self._sensitivity_elements['sigma2'],
-                    'nu2': nu2,
-                    'psi_sigma2': self._sensitivity_elements['psi_sigma2'],
-                    'psi_nu2': psi_nu2,
-                    'riesz_rep': self._sensitivity_elements['riesz_rep'] + other._sensitivity_elements['riesz_rep'],
+                    "sigma2": self._sensitivity_elements["sigma2"],
+                    "nu2": nu2,
+                    "psi_sigma2": self._sensitivity_elements["psi_sigma2"],
+                    "psi_nu2": psi_nu2,
+                    "riesz_rep": self._sensitivity_elements["riesz_rep"] + other._sensitivity_elements["riesz_rep"],
                 }
-                doubleml_dict['sensitivity_elements'] = sensitivity_elements
+                doubleml_dict["sensitivity_elements"] = sensitivity_elements
 
             new_obj = DoubleMLFramework(doubleml_dict)
         else:
@@ -334,7 +353,6 @@ def __radd__(self, other):
         return self.__add__(other)
 
     def __sub__(self, other):
-
         if isinstance(other, DoubleMLFramework):
             # internal consistency check
             self._check_framework_shapes()
@@ -349,39 +367,41 @@ def __sub__(self, other):
             var_scaling_factors = self._var_scaling_factors
 
             # compute standard errors
-            sigma2_hat = np.divide(
-                np.mean(np.square(scaled_psi), axis=0),
-                var_scaling_factors.reshape(-1, 1))
+            sigma2_hat = np.divide(np.mean(np.square(scaled_psi), axis=0), var_scaling_factors.reshape(-1, 1))
             all_ses = np.sqrt(sigma2_hat)
             thetas, ses = _aggregate_coefs_and_ses(all_thetas, all_ses, var_scaling_factors)
 
             doubleml_dict = {
-                'thetas': thetas,
-                'ses': ses,
-                'all_thetas': all_thetas,
-                'all_ses': all_ses,
-                'var_scaling_factors': var_scaling_factors,
-                'scaled_psi': scaled_psi,
-                'is_cluster_data': self._is_cluster_data,
-                'cluster_dict': self._cluster_dict,
+                "thetas": thetas,
+                "ses": ses,
+                "all_thetas": all_thetas,
+                "all_ses": all_ses,
+                "var_scaling_factors": var_scaling_factors,
+                "scaled_psi": scaled_psi,
+                "is_cluster_data": self._is_cluster_data,
+                "cluster_dict": self._cluster_dict,
             }
 
             # sensitivity combination only available for same outcome and cond. expectation (e.g. IRM)
             if self._sensitivity_implemented and other._sensitivity_implemented:
-                nu2_score_element = self._sensitivity_elements['psi_nu2'] - other._sensitivity_elements['psi_nu2'] + \
-                     np.multiply(2.0, np.multiply(self._sensitivity_elements['riesz_rep'],
-                                                  self._sensitivity_elements['riesz_rep']))
+                nu2_score_element = (
+                    self._sensitivity_elements["psi_nu2"]
+                    - other._sensitivity_elements["psi_nu2"]
+                    + np.multiply(
+                        2.0, np.multiply(self._sensitivity_elements["riesz_rep"], self._sensitivity_elements["riesz_rep"])
+                    )
+                )
                 nu2 = np.mean(nu2_score_element, axis=0, keepdims=True)
                 psi_nu2 = nu2_score_element - nu2
 
                 sensitivity_elements = {
-                    'sigma2': self._sensitivity_elements['sigma2'],
-                    'nu2': nu2,
-                    'psi_sigma2': self._sensitivity_elements['psi_sigma2'],
-                    'psi_nu2': psi_nu2,
-                    'riesz_rep': self._sensitivity_elements['riesz_rep'] - other._sensitivity_elements['riesz_rep'],
+                    "sigma2": self._sensitivity_elements["sigma2"],
+                    "nu2": nu2,
+                    "psi_sigma2": self._sensitivity_elements["psi_sigma2"],
+                    "psi_nu2": psi_nu2,
+                    "riesz_rep": self._sensitivity_elements["riesz_rep"] - other._sensitivity_elements["riesz_rep"],
                 }
-                doubleml_dict['sensitivity_elements'] = sensitivity_elements
+                doubleml_dict["sensitivity_elements"] = sensitivity_elements
 
             new_obj = DoubleMLFramework(doubleml_dict)
         else:
@@ -404,30 +424,30 @@ def __mul__(self, other):
             scaled_psi = np.multiply(other, self._scaled_psi)
 
             doubleml_dict = {
-                'thetas': thetas,
-                'ses': ses,
-                'all_thetas': all_thetas,
-                'all_ses': all_ses,
-                'var_scaling_factors': var_scaling_factors,
-                'scaled_psi': scaled_psi,
-                'is_cluster_data': self._is_cluster_data,
-                'cluster_dict': self._cluster_dict,
+                "thetas": thetas,
+                "ses": ses,
+                "all_thetas": all_thetas,
+                "all_ses": all_ses,
+                "var_scaling_factors": var_scaling_factors,
+                "scaled_psi": scaled_psi,
+                "is_cluster_data": self._is_cluster_data,
+                "cluster_dict": self._cluster_dict,
             }
 
             # sensitivity combination only available for linear models
             if self._sensitivity_implemented:
-                nu2_score_element = np.multiply(np.square(other), self._sensitivity_elements['psi_nu2'])
+                nu2_score_element = np.multiply(np.square(other), self._sensitivity_elements["psi_nu2"])
                 nu2 = np.mean(nu2_score_element, axis=0, keepdims=True)
                 psi_nu2 = nu2_score_element - nu2
 
                 sensitivity_elements = {
-                    'sigma2': self._sensitivity_elements['sigma2'],
-                    'nu2': nu2,
-                    'psi_sigma2': self._sensitivity_elements['psi_sigma2'],
-                    'psi_nu2': psi_nu2,
-                    'riesz_rep': np.multiply(other, self._sensitivity_elements['riesz_rep']),
+                    "sigma2": self._sensitivity_elements["sigma2"],
+                    "nu2": nu2,
+                    "psi_sigma2": self._sensitivity_elements["psi_sigma2"],
+                    "psi_nu2": psi_nu2,
+                    "riesz_rep": np.multiply(other, self._sensitivity_elements["riesz_rep"]),
                 }
-                doubleml_dict['sensitivity_elements'] = sensitivity_elements
+                doubleml_dict["sensitivity_elements"] = sensitivity_elements
 
             new_obj = DoubleMLFramework(doubleml_dict)
         else:
@@ -440,31 +460,32 @@ def __rmul__(self, other):
 
     def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
         if not self._sensitivity_implemented:
-            raise NotImplementedError('Sensitivity analysis is not implemented for this model.')
+            raise NotImplementedError("Sensitivity analysis is not implemented for this model.")
 
         # input checks
-        _check_in_zero_one(cf_y, 'cf_y', include_one=False)
-        _check_in_zero_one(cf_d, 'cf_d', include_one=False)
+        _check_in_zero_one(cf_y, "cf_y", include_one=False)
+        _check_in_zero_one(cf_d, "cf_d", include_one=False)
         if not isinstance(rho, float):
-            raise TypeError(f'rho must be of float type. '
-                            f'{str(rho)} of type {str(type(rho))} was passed.')
-        _check_in_zero_one(abs(rho), 'The absolute value of rho')
-        _check_in_zero_one(level, 'The confidence level', include_zero=False, include_one=False)
+            raise TypeError(f"rho must be of float type. {str(rho)} of type {str(type(rho))} was passed.")
+        _check_in_zero_one(abs(rho), "The absolute value of rho")
+        _check_in_zero_one(level, "The confidence level", include_zero=False, include_one=False)
 
         # set elements for readability
-        sigma2 = self.sensitivity_elements['sigma2']
-        nu2 = self.sensitivity_elements['nu2']
-        psi_sigma = self.sensitivity_elements['psi_sigma2']
-        psi_nu = self.sensitivity_elements['psi_nu2']
+        sigma2 = self.sensitivity_elements["sigma2"]
+        nu2 = self.sensitivity_elements["nu2"]
+        psi_sigma = self.sensitivity_elements["psi_sigma2"]
+        psi_nu = self.sensitivity_elements["psi_nu2"]
         psi_scaled = self._scaled_psi
 
         if (np.any(sigma2 < 0)) | (np.any(nu2 < 0)):
-            raise ValueError('sensitivity_elements sigma2 and nu2 have to be positive. '
-                             f"Got sigma2 {str(sigma2)} and nu2 {str(nu2)}. "
-                             'Most likely this is due to low quality learners (especially propensity scores).')
+            raise ValueError(
+                "sensitivity_elements sigma2 and nu2 have to be positive. "
+                f"Got sigma2 {str(sigma2)} and nu2 {str(nu2)}. "
+                "Most likely this is due to low quality learners (especially propensity scores)."
+            )
 
         # elementwise operations
-        confounding_strength = np.multiply(np.abs(rho), np.sqrt(np.multiply(cf_y, np.divide(cf_d, 1.0-cf_d))))
+        confounding_strength = np.multiply(np.abs(rho), np.sqrt(np.multiply(cf_y, np.divide(cf_d, 1.0 - cf_d))))
         sensitivity_scaling = np.sqrt(np.multiply(sigma2, nu2))
 
         # sigma2 and nu2 are of shape (1, n_thetas, n_rep), whereas the all_thetas is of shape (n_thetas, n_rep)
@@ -482,32 +503,35 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
 
         for i_rep in range(self.n_rep):
             for i_theta in range(self.n_thetas):
-
                 if not self._is_cluster_data:
                     smpls = None
                     cluster_vars = None
                     smpls_cluster = None
                     n_folds_per_cluster = None
                 else:
-                    smpls = self._cluster_dict['smpls'][i_rep]
-                    cluster_vars = self._cluster_dict['cluster_vars']
-                    smpls_cluster = self._cluster_dict['smpls_cluster'][i_rep]
-                    n_folds_per_cluster = self._cluster_dict['n_folds_per_cluster']
-
-                sigma2_lower_hat, _ = _var_est(psi=psi_lower[:, i_theta, i_rep],
-                                               psi_deriv=np.ones_like(psi_lower[:, i_theta, i_rep]),
-                                               smpls=smpls,
-                                               is_cluster_data=self._is_cluster_data,
-                                               cluster_vars=cluster_vars,
-                                               smpls_cluster=smpls_cluster,
-                                               n_folds_per_cluster=n_folds_per_cluster)
-                sigma2_upper_hat, _ = _var_est(psi=psi_upper[:, i_theta, i_rep],
-                                               psi_deriv=np.ones_like(psi_upper[:, i_theta, i_rep]),
-                                               smpls=smpls,
-                                               is_cluster_data=self._is_cluster_data,
-                                               cluster_vars=cluster_vars,
-                                               smpls_cluster=smpls_cluster,
-                                               n_folds_per_cluster=n_folds_per_cluster)
+                    smpls = self._cluster_dict["smpls"][i_rep]
+                    cluster_vars = self._cluster_dict["cluster_vars"]
+                    smpls_cluster = self._cluster_dict["smpls_cluster"][i_rep]
+                    n_folds_per_cluster = self._cluster_dict["n_folds_per_cluster"]
+
+                sigma2_lower_hat, _ = _var_est(
+                    psi=psi_lower[:, i_theta, i_rep],
+                    psi_deriv=np.ones_like(psi_lower[:, i_theta, i_rep]),
+                    smpls=smpls,
+                    is_cluster_data=self._is_cluster_data,
+                    cluster_vars=cluster_vars,
+                    smpls_cluster=smpls_cluster,
+                    n_folds_per_cluster=n_folds_per_cluster,
+                )
+                sigma2_upper_hat, _ = _var_est(
+                    psi=psi_upper[:, i_theta, i_rep],
+                    psi_deriv=np.ones_like(psi_upper[:, i_theta, i_rep]),
+                    smpls=smpls,
+                    is_cluster_data=self._is_cluster_data,
+                    cluster_vars=cluster_vars,
+                    smpls_cluster=smpls_cluster,
+                    n_folds_per_cluster=n_folds_per_cluster,
+                )
 
                 all_sigma_lower[i_theta, i_rep] = np.sqrt(sigma2_lower_hat)
                 all_sigma_upper[i_theta, i_rep] = np.sqrt(sigma2_upper_hat)
@@ -524,38 +548,33 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
         ci_lower = np.median(all_ci_lower, axis=1)
         ci_upper = np.median(all_ci_upper, axis=1)
 
-        theta_dict = {'lower': theta_lower,
-                      'upper': theta_upper}
+        theta_dict = {"lower": theta_lower, "upper": theta_upper}
 
-        se_dict = {'lower': sigma_lower,
-                   'upper': sigma_upper}
+        se_dict = {"lower": sigma_lower, "upper": sigma_upper}
 
-        ci_dict = {'lower': ci_lower,
-                   'upper': ci_upper}
+        ci_dict = {"lower": ci_lower, "upper": ci_upper}
 
-        res_dict = {'theta': theta_dict,
-                    'se': se_dict,
-                    'ci': ci_dict}
+        res_dict = {"theta": theta_dict, "se": se_dict, "ci": ci_dict}
 
         return res_dict
 
     def _calc_robustness_value(self, null_hypothesis, level, rho, idx_treatment):
         _check_float(null_hypothesis, "null_hypothesis")
-        _check_integer(idx_treatment, "idx_treatment", lower_bound=0, upper_bound=self._n_thetas-1)
+        _check_integer(idx_treatment, "idx_treatment", lower_bound=0, upper_bound=self._n_thetas - 1)
 
         # check which side is relvant
-        bound = 'upper' if (null_hypothesis > self.thetas[idx_treatment]) else 'lower'
+        bound = "upper" if (null_hypothesis > self.thetas[idx_treatment]) else "lower"
 
         # minimize the square to find boundary solutions
         def rv_fct(value, param):
-            res = self._calc_sensitivity_analysis(cf_y=value,
-                                                  cf_d=value,
-                                                  rho=rho,
-                                                  level=level)[param][bound][idx_treatment] - null_hypothesis
+            res = (
+                self._calc_sensitivity_analysis(cf_y=value, cf_d=value, rho=rho, level=level)[param][bound][idx_treatment]
+                - null_hypothesis
+            )
             return np.square(res)
 
-        rv = minimize_scalar(rv_fct, bounds=(0, 0.9999), method='bounded', args=('theta', )).x
-        rva = minimize_scalar(rv_fct, bounds=(0, 0.9999), method='bounded', args=('ci', )).x
+        rv = minimize_scalar(rv_fct, bounds=(0, 0.9999), method="bounded", args=("theta",)).x
+        rva = minimize_scalar(rv_fct, bounds=(0, 0.9999), method="bounded", args=("ci",)).x
 
         return rv, rva
 
@@ -602,12 +621,16 @@ def sensitivity_analysis(self, cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95, null_h
             if null_hypothesis.shape == (self._n_thetas,):
                 null_hypothesis_vec = null_hypothesis
             else:
-                raise ValueError("null_hypothesis is numpy.ndarray but does not have the required "
-                                 f"shape ({self._n_thetas},). "
-                                 f'Array of shape {str(null_hypothesis.shape)} was passed.')
+                raise ValueError(
+                    "null_hypothesis is numpy.ndarray but does not have the required "
+                    f"shape ({self._n_thetas},). "
+                    f"Array of shape {str(null_hypothesis.shape)} was passed."
+                )
         else:
-            raise TypeError("null_hypothesis has to be of type float or np.ndarry. "
-                            f"{str(null_hypothesis)} of type {str(type(null_hypothesis))} was passed.")
+            raise TypeError(
+                "null_hypothesis has to be of type float or np.ndarry. "
+                f"{str(null_hypothesis)} of type {str(type(null_hypothesis))} was passed."
+            )
 
         # compute sensitivity analysis
         sensitivity_dict = self._calc_sensitivity_analysis(cf_y=cf_y, cf_d=cf_d, rho=rho, level=level)
@@ -618,22 +641,15 @@ def sensitivity_analysis(self, cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95, null_h
 
         for i_theta in range(self._n_thetas):
             rv[i_theta], rva[i_theta] = self._calc_robustness_value(
-                null_hypothesis=null_hypothesis_vec[i_theta],
-                level=level,
-                rho=rho,
-                idx_treatment=i_theta
+                null_hypothesis=null_hypothesis_vec[i_theta], level=level, rho=rho, idx_treatment=i_theta
             )
 
-        sensitivity_dict['rv'] = rv
-        sensitivity_dict['rva'] = rva
+        sensitivity_dict["rv"] = rv
+        sensitivity_dict["rva"] = rva
 
         # add all input parameters
-        input_params = {'cf_y': cf_y,
-                        'cf_d': cf_d,
-                        'rho': rho,
-                        'level': level,
-                        'null_hypothesis': null_hypothesis_vec}
-        sensitivity_dict['input'] = input_params
+        input_params = {"cf_y": cf_y, "cf_d": cf_d, "rho": rho, "level": level, "null_hypothesis": null_hypothesis_vec}
+        sensitivity_dict["input"] = input_params
 
         self._sensitivity_params = sensitivity_dict
         return self
@@ -659,45 +675,38 @@ def confint(self, joint=False, level=0.95):
         """
 
         if not isinstance(joint, bool):
-            raise TypeError('joint must be True or False. '
-                            f'Got {str(joint)}.')
+            raise TypeError(f"joint must be True or False. Got {str(joint)}.")
 
         if not isinstance(level, float):
-            raise TypeError('The confidence level must be of float type. '
-                            f'{str(level)} of type {str(type(level))} was passed.')
+            raise TypeError(f"The confidence level must be of float type. {str(level)} of type {str(type(level))} was passed.")
         if (level <= 0) | (level >= 1):
-            raise ValueError('The confidence level must be in (0,1). '
-                             f'{str(level)} was passed.')
+            raise ValueError(f"The confidence level must be in (0,1). {str(level)} was passed.")
 
         # compute critical values
         alpha = 1 - level
-        percentages = np.array([alpha / 2, 1. - alpha / 2])
+        percentages = np.array([alpha / 2, 1.0 - alpha / 2])
         if joint:
             if self._boot_t_stat is None:
-                raise ValueError('Apply bootstrap() before confint(joint=True).')
+                raise ValueError("Apply bootstrap() before confint(joint=True).")
 
             max_abs_t_value_distribution = np.amax(np.abs(self._boot_t_stat), axis=1)
-            critical_values = np.quantile(
-                a=max_abs_t_value_distribution,
-                q=level,
-                axis=0)
+            critical_values = np.quantile(a=max_abs_t_value_distribution, q=level, axis=0)
         else:
             critical_values = np.repeat(norm.ppf(percentages[1]), self._n_rep)
 
         # compute all cis over repetitions (shape: n_thetas x 2 x n_rep)
         self._all_cis = np.stack(
-            (self.all_thetas - self.all_ses * critical_values,
-             self.all_thetas + self.all_ses * critical_values),
-            axis=1)
+            (self.all_thetas - self.all_ses * critical_values, self.all_thetas + self.all_ses * critical_values), axis=1
+        )
         ci = np.median(self._all_cis, axis=2)
-        df_ci = pd.DataFrame(ci, columns=['{:.1f} %'.format(i * 100) for i in percentages])
+        df_ci = pd.DataFrame(ci, columns=["{:.1f} %".format(i * 100) for i in percentages])
 
         if self._treatment_names is not None:
             df_ci.set_index(pd.Index(self._treatment_names), inplace=True)
 
         return df_ci
 
-    def bootstrap(self, method='normal', n_rep_boot=500):
+    def bootstrap(self, method="normal", n_rep_boot=500):
         """
         Multiplier bootstrap for DoubleMLFrameworks.
 
@@ -717,7 +726,7 @@ def bootstrap(self, method='normal', n_rep_boot=500):
 
         _check_bootstrap(method, n_rep_boot)
         if self._is_cluster_data:
-            raise NotImplementedError('bootstrap not yet implemented with clustering.')
+            raise NotImplementedError("bootstrap not yet implemented with clustering.")
 
         self._n_rep_boot = n_rep_boot
         self._boot_method = method
@@ -731,7 +740,7 @@ def bootstrap(self, method='normal', n_rep_boot=500):
 
         return self
 
-    def p_adjust(self, method='romano-wolf'):
+    def p_adjust(self, method="romano-wolf"):
         """
         Multiple testing adjustment for DoubleML Frameworks.
 
@@ -751,15 +760,14 @@ def p_adjust(self, method='romano-wolf'):
             A numpy array with all corrected p-values for each repetition.
         """
         if not isinstance(method, str):
-            raise TypeError('The p_adjust method must be of str type. '
-                            f'{str(method)} of type {str(type(method))} was passed.')
+            raise TypeError(f"The p_adjust method must be of str type. {str(method)} of type {str(type(method))} was passed.")
 
         all_p_vals_corrected = np.full_like(self.all_pvals, np.nan)
 
         for i_rep in range(self.n_rep):
             p_vals_tmp = self.all_pvals[:, i_rep]
 
-            if method.lower() in ['rw', 'romano-wolf']:
+            if method.lower() in ["rw", "romano-wolf"]:
                 if self._boot_t_stat is None:
                     raise ValueError(f'Apply bootstrap() before p_adjust("{method}").')
 
@@ -775,9 +783,7 @@ def p_adjust(self, method='romano-wolf'):
                 ro = np.argsort(stepdown_ind)
 
                 for i_theta in range(self.n_thetas):
-                    bootstrap_citical_value = np.max(
-                        abs(np.delete(bootstrap_t_stats, stepdown_ind[:i_theta], axis=1)),
-                        axis=1)
+                    bootstrap_citical_value = np.max(abs(np.delete(bootstrap_t_stats, stepdown_ind[:i_theta], axis=1)), axis=1)
                     p_init[i_theta] = np.minimum(1, np.mean(bootstrap_citical_value >= abs_t_stats_tmp[stepdown_ind][i_theta]))
 
                 for i_theta in range(self.n_thetas):
@@ -785,8 +791,8 @@ def p_adjust(self, method='romano-wolf'):
                         p_vals_corrected_tmp_sorted[i_theta] = p_init[i_theta]
                     else:
                         p_vals_corrected_tmp_sorted[i_theta] = np.maximum(
-                            p_init[i_theta],
-                            p_vals_corrected_tmp_sorted[i_theta - 1])
+                            p_init[i_theta], p_vals_corrected_tmp_sorted[i_theta - 1]
+                        )
 
                 # reorder p-values
                 p_vals_corrected_tmp = p_vals_corrected_tmp_sorted[ro]
@@ -796,14 +802,23 @@ def p_adjust(self, method='romano-wolf'):
             all_p_vals_corrected[:, i_rep] = p_vals_corrected_tmp
 
         p_vals_corrected = np.median(all_p_vals_corrected, axis=1)
-        df_p_vals = pd.DataFrame(
-            np.vstack((self.thetas, p_vals_corrected)).T,
-            columns=['thetas', 'pval'])
+        df_p_vals = pd.DataFrame(np.vstack((self.thetas, p_vals_corrected)).T, columns=["thetas", "pval"])
 
         return df_p_vals, all_p_vals_corrected
 
-    def sensitivity_plot(self, idx_treatment=0, value='theta', rho=1.0, level=0.95, null_hypothesis=0.0,
-                         include_scenario=True, benchmarks=None, fill=True, grid_bounds=(0.15, 0.15), grid_size=100):
+    def sensitivity_plot(
+        self,
+        idx_treatment=0,
+        value="theta",
+        rho=1.0,
+        level=0.95,
+        null_hypothesis=0.0,
+        include_scenario=True,
+        benchmarks=None,
+        fill=True,
+        grid_bounds=(0.15, 0.15),
+        grid_size=100,
+    ):
         """
         Contour plot of the sensivity with respect to latent/confounding variables.
 
@@ -856,28 +871,26 @@ def sensitivity_plot(self, idx_treatment=0, value='theta', rho=1.0, level=0.95,
         fig : object
             Plotly figure of the sensitivity contours.
         """
-        _check_integer(idx_treatment, "idx_treatment", lower_bound=0, upper_bound=self.n_thetas-1)
+        _check_integer(idx_treatment, "idx_treatment", lower_bound=0, upper_bound=self.n_thetas - 1)
         if not isinstance(value, str):
-            raise TypeError('value must be a string. '
-                            f'{str(value)} of type {type(value)} was passed.')
-        valid_values = ['theta', 'ci']
+            raise TypeError(f"value must be a string. {str(value)} of type {type(value)} was passed.")
+        valid_values = ["theta", "ci"]
         if value not in valid_values:
-            raise ValueError('Invalid value ' + value + '. ' +
-                             'Valid values ' + ' or '.join(valid_values) + '.')
+            raise ValueError("Invalid value " + value + ". " + "Valid values " + " or ".join(valid_values) + ".")
         _check_float(null_hypothesis, "null_hypothesis")
-        _check_bool(include_scenario, 'include_scenario')
+        _check_bool(include_scenario, "include_scenario")
         if include_scenario and self.sensitivity_params is None:
-            raise ValueError('Apply sensitivity_analysis() to include senario in sensitivity_plot. ')
+            raise ValueError("Apply sensitivity_analysis() to include senario in sensitivity_plot. ")
         _check_benchmarks(benchmarks)
-        _check_bool(fill, 'fill')
+        _check_bool(fill, "fill")
         _check_in_zero_one(grid_bounds[0], "grid_bounds", include_zero=False, include_one=False)
         _check_in_zero_one(grid_bounds[1], "grid_bounds", include_zero=False, include_one=False)
         _check_integer(grid_size, "grid_size", lower_bound=10)
 
-        null_hypothesis = self.sensitivity_params['input']['null_hypothesis'][idx_treatment]
+        null_hypothesis = self.sensitivity_params["input"]["null_hypothesis"][idx_treatment]
         unadjusted_theta = self.thetas[idx_treatment]
         # check which side is relvant
-        bound = 'upper' if (null_hypothesis > unadjusted_theta) else 'lower'
+        bound = "upper" if (null_hypothesis > unadjusted_theta) else "lower"
 
         # create evaluation grid
         cf_d_vec = np.linspace(0, grid_bounds[0], grid_size)
@@ -887,7 +900,6 @@ def sensitivity_plot(self, idx_treatment=0, value='theta', rho=1.0, level=0.95,
         contour_values = np.full(shape=(grid_size, grid_size), fill_value=np.nan)
         for i_cf_d_grid, cf_d_grid in enumerate(cf_d_vec):
             for i_cf_y_grid, cf_y_grid in enumerate(cf_y_vec):
-
                 sens_dict = self._calc_sensitivity_analysis(
                     cf_y=cf_y_grid,
                     cf_d=cf_d_grid,
@@ -897,12 +909,12 @@ def sensitivity_plot(self, idx_treatment=0, value='theta', rho=1.0, level=0.95,
                 contour_values[i_cf_d_grid, i_cf_y_grid] = sens_dict[value][bound][idx_treatment]
 
         # get the correct unadjusted value for confidence bands
-        if value == 'theta':
+        if value == "theta":
             unadjusted_value = unadjusted_theta
         else:
-            assert value == 'ci'
-            ci = self.confint(level=self.sensitivity_params['input']['level'])
-            if bound == 'upper':
+            assert value == "ci"
+            ci = self.confint(level=self.sensitivity_params["input"]["level"])
+            if bound == "upper":
                 unadjusted_value = ci.iloc[idx_treatment, 1]
             else:
                 unadjusted_value = ci.iloc[idx_treatment, 0]
@@ -910,77 +922,85 @@ def sensitivity_plot(self, idx_treatment=0, value='theta', rho=1.0, level=0.95,
         # compute the values for the benchmarks
         benchmark_dict = copy.deepcopy(benchmarks)
         if benchmarks is not None:
-            n_benchmarks = len(benchmarks['name'])
+            n_benchmarks = len(benchmarks["name"])
             benchmark_values = np.full(shape=(n_benchmarks,), fill_value=np.nan)
-            for benchmark_idx in range(len(benchmarks['name'])):
+            for benchmark_idx in range(len(benchmarks["name"])):
                 sens_dict_bench = self._calc_sensitivity_analysis(
-                    cf_y=benchmarks['cf_y'][benchmark_idx],
-                    cf_d=benchmarks['cf_d'][benchmark_idx],
-                    rho=self.sensitivity_params['input']['rho'],
-                    level=self.sensitivity_params['input']['level']
+                    cf_y=benchmarks["cf_y"][benchmark_idx],
+                    cf_d=benchmarks["cf_d"][benchmark_idx],
+                    rho=self.sensitivity_params["input"]["rho"],
+                    level=self.sensitivity_params["input"]["level"],
                 )
                 benchmark_values[benchmark_idx] = sens_dict_bench[value][bound][idx_treatment]
-            benchmark_dict['value'] = benchmark_values
-        fig = _sensitivity_contour_plot(x=cf_d_vec,
-                                        y=cf_y_vec,
-                                        contour_values=contour_values,
-                                        unadjusted_value=unadjusted_value,
-                                        scenario_x=self.sensitivity_params['input']['cf_d'],
-                                        scenario_y=self.sensitivity_params['input']['cf_y'],
-                                        scenario_value=self.sensitivity_params[value][bound][idx_treatment],
-                                        include_scenario=include_scenario,
-                                        benchmarks=benchmark_dict,
-                                        fill=fill)
+            benchmark_dict["value"] = benchmark_values
+        fig = _sensitivity_contour_plot(
+            x=cf_d_vec,
+            y=cf_y_vec,
+            contour_values=contour_values,
+            unadjusted_value=unadjusted_value,
+            scenario_x=self.sensitivity_params["input"]["cf_d"],
+            scenario_y=self.sensitivity_params["input"]["cf_y"],
+            scenario_value=self.sensitivity_params[value][bound][idx_treatment],
+            include_scenario=include_scenario,
+            benchmarks=benchmark_dict,
+            fill=fill,
+        )
         return fig
 
     def _check_and_set_cluster_data(self, doubleml_dict):
         self._cluster_dict = None
 
         if "is_cluster_data" in doubleml_dict.keys():
-            _check_bool(doubleml_dict['is_cluster_data'], 'is_cluster_data')
-            self._is_cluster_data = doubleml_dict['is_cluster_data']
+            _check_bool(doubleml_dict["is_cluster_data"], "is_cluster_data")
+            self._is_cluster_data = doubleml_dict["is_cluster_data"]
 
         if self._is_cluster_data:
-            if not ("cluster_dict" in doubleml_dict.keys()):
-                raise ValueError('If is_cluster_data is True, cluster_dict must be provided.')
-
-            if not isinstance(doubleml_dict['cluster_dict'], dict):
-                raise TypeError('cluster_dict must be a dictionary.')
-
-            expected_keys_cluster = ['smpls', 'smpls_cluster', 'cluster_vars', 'n_folds_per_cluster']
-            if not all(key in doubleml_dict['cluster_dict'].keys() for key in expected_keys_cluster):
-                raise ValueError('The cluster_dict must contain the following keys: ' + ', '.join(expected_keys_cluster)
-                                 + '. Got: ' + ', '.join(doubleml_dict['cluster_dict'].keys()) + '.')
+            if "cluster_dict" not in doubleml_dict.keys():
+                raise ValueError("If is_cluster_data is True, cluster_dict must be provided.")
+
+            if not isinstance(doubleml_dict["cluster_dict"], dict):
+                raise TypeError("cluster_dict must be a dictionary.")
+
+            expected_keys_cluster = ["smpls", "smpls_cluster", "cluster_vars", "n_folds_per_cluster"]
+            if not all(key in doubleml_dict["cluster_dict"].keys() for key in expected_keys_cluster):
+                raise ValueError(
+                    "The cluster_dict must contain the following keys: "
+                    + ", ".join(expected_keys_cluster)
+                    + ". Got: "
+                    + ", ".join(doubleml_dict["cluster_dict"].keys())
+                    + "."
+                )
 
-            self._cluster_dict = doubleml_dict['cluster_dict']
+            self._cluster_dict = doubleml_dict["cluster_dict"]
 
         return
 
     def _check_and_set_sensitivity_elements(self, doubleml_dict):
-        if not ("sensitivity_elements" in doubleml_dict.keys()):
+        if "sensitivity_elements" not in doubleml_dict.keys():
             sensitivity_implemented = False
             sensitivity_elements = None
 
         else:
-            if not isinstance(doubleml_dict['sensitivity_elements'], dict):
-                raise TypeError('sensitivity_elements must be a dictionary.')
-            expected_keys_sensitivity = ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2', 'riesz_rep']
-            if not all(key in doubleml_dict['sensitivity_elements'].keys() for key in expected_keys_sensitivity):
-                raise ValueError('The sensitivity_elements dict must contain the following '
-                                 'keys: ' + ', '.join(expected_keys_sensitivity))
+            if not isinstance(doubleml_dict["sensitivity_elements"], dict):
+                raise TypeError("sensitivity_elements must be a dictionary.")
+            expected_keys_sensitivity = ["sigma2", "nu2", "psi_sigma2", "psi_nu2", "riesz_rep"]
+            if not all(key in doubleml_dict["sensitivity_elements"].keys() for key in expected_keys_sensitivity):
+                raise ValueError(
+                    "The sensitivity_elements dict must contain the following keys: " + ", ".join(expected_keys_sensitivity)
+                )
 
             for key in expected_keys_sensitivity:
-                if not isinstance(doubleml_dict['sensitivity_elements'][key], np.ndarray):
-                    raise TypeError(f'The sensitivity element {key} must be a numpy array.')
+                if not isinstance(doubleml_dict["sensitivity_elements"][key], np.ndarray):
+                    raise TypeError(f"The sensitivity element {key} must be a numpy array.")
 
             # set sensitivity elements
             sensitivity_implemented = True
             sensitivity_elements = {
-                'sigma2': doubleml_dict['sensitivity_elements']['sigma2'],
-                'nu2': doubleml_dict['sensitivity_elements']['nu2'],
-                'psi_sigma2': doubleml_dict['sensitivity_elements']['psi_sigma2'],
-                'psi_nu2': doubleml_dict['sensitivity_elements']['psi_nu2'],
-                'riesz_rep': doubleml_dict['sensitivity_elements']['riesz_rep'],
+                "sigma2": doubleml_dict["sensitivity_elements"]["sigma2"],
+                "nu2": doubleml_dict["sensitivity_elements"]["nu2"],
+                "psi_sigma2": doubleml_dict["sensitivity_elements"]["psi_sigma2"],
+                "psi_nu2": doubleml_dict["sensitivity_elements"]["psi_nu2"],
+                "riesz_rep": doubleml_dict["sensitivity_elements"]["riesz_rep"],
             }
 
         self._sensitivity_implemented = sensitivity_implemented
@@ -993,49 +1013,70 @@ def _check_framework_shapes(self):
         score_dim = (self._n_obs, self._n_thetas, self.n_rep)
         # check if all sizes match
         if self._thetas.shape != (self._n_thetas,):
-            raise ValueError(f'The shape of thetas does not match the expected shape ({self._n_thetas},).')
+            raise ValueError(f"The shape of thetas does not match the expected shape ({self._n_thetas},).")
         if self._ses.shape != (self._n_thetas,):
-            raise ValueError(f'The shape of ses does not match the expected shape ({self._n_thetas},).')
+            raise ValueError(f"The shape of ses does not match the expected shape ({self._n_thetas},).")
         if self._all_thetas.shape != (self._n_thetas, self._n_rep):
-            raise ValueError(f'The shape of all_thetas does not match the expected shape ({self._n_thetas}, {self._n_rep}).')
+            raise ValueError(f"The shape of all_thetas does not match the expected shape ({self._n_thetas}, {self._n_rep}).")
         if self._all_ses.shape != (self._n_thetas, self._n_rep):
-            raise ValueError(f'The shape of all_ses does not match the expected shape ({self._n_thetas}, {self._n_rep}).')
+            raise ValueError(f"The shape of all_ses does not match the expected shape ({self._n_thetas}, {self._n_rep}).")
         if self._var_scaling_factors.shape != (self._n_thetas,):
-            raise ValueError(f'The shape of var_scaling_factors does not match the expected shape ({self._n_thetas},).')
+            raise ValueError(f"The shape of var_scaling_factors does not match the expected shape ({self._n_thetas},).")
         # dimension of scaled_psi is n_obs x n_thetas x n_rep (per default)
         if self._scaled_psi.shape != score_dim:
-            raise ValueError(('The shape of scaled_psi does not match the expected '
-                              f'shape ({self._n_obs}, {self._n_thetas}, {self._n_rep}).'))
+            raise ValueError(
+                (
+                    "The shape of scaled_psi does not match the expected "
+                    f"shape ({self._n_obs}, {self._n_thetas}, {self._n_rep})."
+                )
+            )
 
         if self._sensitivity_implemented:
-            if self._sensitivity_elements['sigma2'].shape != (1, self._n_thetas, self.n_rep):
-                raise ValueError('The shape of sigma2 does not match the expected shape '
-                                 f'(1, {self._n_thetas}, {self._n_rep}).')
-            if self._sensitivity_elements['nu2'].shape != (1, self._n_thetas, self.n_rep):
-                raise ValueError(f'The shape of nu2 does not match the expected shape (1, {self._n_thetas}, {self._n_rep}).')
-            if self._sensitivity_elements['psi_sigma2'].shape != score_dim:
-                raise ValueError(('The shape of psi_sigma2 does not match the expected '
-                                 f'shape ({self._n_obs}, {self._n_thetas}, {self._n_rep}).'))
-            if self._sensitivity_elements['psi_nu2'].shape != score_dim:
-                raise ValueError(('The shape of psi_nu2 does not match the expected '
-                                 f'shape ({self._n_obs}, {self._n_thetas}, {self._n_rep}).'))
-            if self._sensitivity_elements['riesz_rep'].shape != score_dim:
-                raise ValueError(('The shape of riesz_rep does not match the expected '
-                                 f'shape ({self._n_obs}, {self._n_thetas}, {self._n_rep}).'))
+            if self._sensitivity_elements["sigma2"].shape != (1, self._n_thetas, self.n_rep):
+                raise ValueError(
+                    f"The shape of sigma2 does not match the expected shape (1, {self._n_thetas}, {self._n_rep})."
+                )
+            if self._sensitivity_elements["nu2"].shape != (1, self._n_thetas, self.n_rep):
+                raise ValueError(f"The shape of nu2 does not match the expected shape (1, {self._n_thetas}, {self._n_rep}).")
+            if self._sensitivity_elements["psi_sigma2"].shape != score_dim:
+                raise ValueError(
+                    (
+                        "The shape of psi_sigma2 does not match the expected "
+                        f"shape ({self._n_obs}, {self._n_thetas}, {self._n_rep})."
+                    )
+                )
+            if self._sensitivity_elements["psi_nu2"].shape != score_dim:
+                raise ValueError(
+                    (
+                        "The shape of psi_nu2 does not match the expected "
+                        f"shape ({self._n_obs}, {self._n_thetas}, {self._n_rep})."
+                    )
+                )
+            if self._sensitivity_elements["riesz_rep"].shape != score_dim:
+                raise ValueError(
+                    (
+                        "The shape of riesz_rep does not match the expected "
+                        f"shape ({self._n_obs}, {self._n_thetas}, {self._n_rep})."
+                    )
+                )
 
         return None
 
     def _check_treatment_names(self, treatment_names):
         if not isinstance(treatment_names, list):
-            raise TypeError('treatment_names must be a list. '
-                            f'Got {str(treatment_names)} of type {str(type(treatment_names))}.')
+            raise TypeError(
+                f"treatment_names must be a list. Got {str(treatment_names)} of type {str(type(treatment_names))}."
+            )
         is_str = [isinstance(name, str) for name in treatment_names]
         if not all(is_str):
-            raise TypeError('treatment_names must be a list of strings. '
-                            f'At least one element is not a string: {str(treatment_names)}.')
+            raise TypeError(
+                f"treatment_names must be a list of strings. At least one element is not a string: {str(treatment_names)}."
+            )
         if len(treatment_names) != self._n_thetas:
-            raise ValueError('The length of treatment_names does not match the number of treatments. '
-                             f'Got {self._n_thetas} treatments and {len(treatment_names)} treatment names.')
+            raise ValueError(
+                "The length of treatment_names does not match the number of treatments. "
+                f"Got {self._n_thetas} treatments and {len(treatment_names)} treatment names."
+            )
         return None
 
 
@@ -1044,10 +1085,10 @@ def concat(objs):
     Concatenate DoubleMLFramework objects.
     """
     if len(objs) == 0:
-        raise TypeError('Need at least one object to concatenate.')
+        raise TypeError("Need at least one object to concatenate.")
 
     if not all(isinstance(obj, DoubleMLFramework) for obj in objs):
-        raise TypeError('All objects must be of type DoubleMLFramework.')
+        raise TypeError("All objects must be of type DoubleMLFramework.")
 
     # check on internal consitency of objects
     _ = [obj._check_framework_shapes() for obj in objs]
@@ -1063,27 +1104,27 @@ def concat(objs):
     ses = np.concatenate([obj.ses for obj in objs], axis=0)
 
     if any(obj._is_cluster_data for obj in objs):
-        raise NotImplementedError('concat not yet implemented with clustering.')
+        raise NotImplementedError("concat not yet implemented with clustering.")
     else:
         is_cluster_data = False
 
     doubleml_dict = {
-        'thetas': thetas,
-        'ses': ses,
-        'all_thetas': all_thetas,
-        'all_ses': all_ses,
-        'var_scaling_factors': var_scaling_factors,
-        'scaled_psi': scaled_psi,
-        'is_cluster_data': is_cluster_data,
+        "thetas": thetas,
+        "ses": ses,
+        "all_thetas": all_thetas,
+        "all_ses": all_ses,
+        "var_scaling_factors": var_scaling_factors,
+        "scaled_psi": scaled_psi,
+        "is_cluster_data": is_cluster_data,
     }
 
     if all(obj._sensitivity_implemented for obj in objs):
         sensitivity_elements = {}
-        for key in ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2', 'riesz_rep']:
+        for key in ["sigma2", "nu2", "psi_sigma2", "psi_nu2", "riesz_rep"]:
             assert all(key in obj._sensitivity_elements.keys() for obj in objs)
             sensitivity_elements[key] = np.concatenate([obj._sensitivity_elements[key] for obj in objs], axis=1)
 
-        doubleml_dict['sensitivity_elements'] = sensitivity_elements
+        doubleml_dict["sensitivity_elements"] = sensitivity_elements
 
     new_obj = DoubleMLFramework(doubleml_dict)
 
diff --git a/doubleml/double_ml_score_mixins.py b/doubleml/double_ml_score_mixins.py
index b6a003221..57dd6e623 100644
--- a/doubleml/double_ml_score_mixins.py
+++ b/doubleml/double_ml_score_mixins.py
@@ -1,13 +1,11 @@
 import copy
-
-import numpy as np
-
 import warnings
+from abc import abstractmethod
 
+import numpy as np
 from scipy.optimize import fmin_l_bfgs_b, root_scalar
-from .utils._estimation import _get_bracket_guess
 
-from abc import abstractmethod
+from .utils._estimation import _get_bracket_guess
 
 
 class LinearScoreMixin:
@@ -27,36 +25,37 @@ class LinearScoreMixin:
     `score functions <https://docs.doubleml.org/stable/guide/scores.html>`_ and on
     `variance estimation <https://docs.doubleml.org/stable/guide/se_confint.html>`_ in the DoubleML user guide.
     """
-    _score_type = 'linear'
+
+    _score_type = "linear"
 
     @property
     def _score_element_names(self):
-        return ['psi_a', 'psi_b']
+        return ["psi_a", "psi_b"]
 
     def _compute_score(self, psi_elements, coef):
-        psi = psi_elements['psi_a'] * coef + psi_elements['psi_b']
+        psi = psi_elements["psi_a"] * coef + psi_elements["psi_b"]
         return psi
 
     def _compute_score_deriv(self, psi_elements, coef):
-        return psi_elements['psi_a']
+        return psi_elements["psi_a"]
 
     def _est_coef(self, psi_elements, smpls=None, scaling_factor=None, inds=None):
-        psi_a = psi_elements['psi_a']
-        psi_b = psi_elements['psi_b']
+        psi_a = psi_elements["psi_a"]
+        psi_b = psi_elements["psi_b"]
         if inds is not None:
             psi_a = psi_a[inds]
             psi_b = psi_b[inds]
 
         if not self._is_cluster_data:
-            coef = - np.mean(psi_b) / np.mean(psi_a)
+            coef = -np.mean(psi_b) / np.mean(psi_a)
         # for cluster we need the smpls and the scaling factors
         else:
             assert smpls is not None
             assert scaling_factor is not None
             assert inds is None
             # if we have clustered data and dml2 the solution is the root of a weighted sum
-            psi_a_subsample_mean = 0.
-            psi_b_subsample_mean = 0.
+            psi_a_subsample_mean = 0.0
+            psi_b_subsample_mean = 0.0
             for i_fold, (_, test_index) in enumerate(smpls):
                 psi_a_subsample_mean += scaling_factor[i_fold] * np.sum(psi_a[test_index])
                 psi_b_subsample_mean += scaling_factor[i_fold] * np.sum(psi_b[test_index])
@@ -83,7 +82,8 @@ class NonLinearScoreMixin:
     ``_compute_score_deriv``, which should implement the evaluation of the derivative of the score function
     :math:`\\frac{\\partial}{\\partial \\theta} \\psi(W; \\theta, \\eta)`, need to be added model-specifically.
     """
-    _score_type = 'nonlinear'
+
+    _score_type = "nonlinear"
     _coef_start_val = np.nan
     _coef_bounds = None
 
@@ -121,7 +121,7 @@ def _aggregate_obs(psi):
 
             # if we have clustered data the solution is the root of a weighted sum
             else:
-                psi_mean = 0.
+                psi_mean = 0.0
                 for i_fold, (_, test_index) in enumerate(smpls):
                     psi_mean += scaling_factor[i_fold] * np.sum(psi[test_index])
 
@@ -145,71 +145,68 @@ def score_deriv(theta):
             bounded = (self._coef_bounds[0] > -np.inf) & (self._coef_bounds[1] < np.inf)
 
         if not bounded:
-            root_res = root_scalar(score,
-                                   x0=self._coef_start_val,
-                                   fprime=score_deriv,
-                                   method='newton')
+            root_res = root_scalar(score, x0=self._coef_start_val, fprime=score_deriv, method="newton")
             theta_hat = root_res.root
             if not root_res.converged:
                 score_val = score(theta_hat)
-                warnings.warn('Could not find a root of the score function.\n '
-                              f'Flag: {root_res.flag}.\n'
-                              f'Score value found is {score_val} '
-                              f'for parameter theta equal to {theta_hat}.')
+                warnings.warn(
+                    "Could not find a root of the score function.\n "
+                    f"Flag: {root_res.flag}.\n"
+                    f"Score value found is {score_val} "
+                    f"for parameter theta equal to {theta_hat}."
+                )
         else:
             signs_different, bracket_guess = _get_bracket_guess(score, self._coef_start_val, self._coef_bounds)
 
             if signs_different:
-                root_res = root_scalar(score,
-                                       bracket=bracket_guess,
-                                       method='brentq')
+                root_res = root_scalar(score, bracket=bracket_guess, method="brentq")
                 theta_hat = root_res.root
             else:
                 # try to find an alternative start value
                 def score_squared(theta):
                     res = np.power(np.mean(self._compute_score(psi_elements, theta)), 2)
                     return res
+
                 # def score_squared_deriv(theta, inds):
                 #     res = 2 * np.mean(self._compute_score(psi_elements, theta, inds)) * \
                 #           np.mean(self._compute_score_deriv(psi_elements, theta, inds))
                 #     return res
-                alt_coef_start, _, _ = fmin_l_bfgs_b(score_squared,
-                                                     self._coef_start_val,
-                                                     approx_grad=True,
-                                                     bounds=[self._coef_bounds])
+                alt_coef_start, _, _ = fmin_l_bfgs_b(
+                    score_squared, self._coef_start_val, approx_grad=True, bounds=[self._coef_bounds]
+                )
                 signs_different, bracket_guess = _get_bracket_guess(score, alt_coef_start, self._coef_bounds)
 
                 if signs_different:
-                    root_res = root_scalar(score,
-                                           bracket=bracket_guess,
-                                           method='brentq')
+                    root_res = root_scalar(score, bracket=bracket_guess, method="brentq")
                     theta_hat = root_res.root
                 else:
                     score_val_sign = np.sign(score(alt_coef_start))
                     if score_val_sign > 0:
                         theta_hat_array, score_val, _ = fmin_l_bfgs_b(
-                            score,
-                            self._coef_start_val,
-                            approx_grad=True,
-                            bounds=[self._coef_bounds])
+                            score, self._coef_start_val, approx_grad=True, bounds=[self._coef_bounds]
+                        )
                         theta_hat = theta_hat_array.item()
-                        warnings.warn('Could not find a root of the score function.\n '
-                                      f'Minimum score value found is {score_val} '
-                                      f'for parameter theta equal to {theta_hat}.\n '
-                                      'No theta found such that the score function evaluates to a negative value.')
+                        warnings.warn(
+                            "Could not find a root of the score function.\n "
+                            f"Minimum score value found is {score_val} "
+                            f"for parameter theta equal to {theta_hat}.\n "
+                            "No theta found such that the score function evaluates to a negative value."
+                        )
                     else:
+
                         def neg_score(theta):
-                            res = - np.mean(self._compute_score(psi_elements, theta))
+                            res = -np.mean(self._compute_score(psi_elements, theta))
                             return res
+
                         theta_hat_array, neg_score_val, _ = fmin_l_bfgs_b(
-                            neg_score,
-                            self._coef_start_val,
-                            approx_grad=True,
-                            bounds=[self._coef_bounds])
+                            neg_score, self._coef_start_val, approx_grad=True, bounds=[self._coef_bounds]
+                        )
                         theta_hat = theta_hat_array.item()
-                        warnings.warn('Could not find a root of the score function. '
-                                      f'Maximum score value found is {-1*neg_score_val} '
-                                      f'for parameter theta equal to {theta_hat}. '
-                                      'No theta found such that the score function evaluates to a positive value.')
+                        warnings.warn(
+                            "Could not find a root of the score function. "
+                            f"Maximum score value found is {-1 * neg_score_val} "
+                            f"for parameter theta equal to {theta_hat}. "
+                            "No theta found such that the score function evaluates to a positive value."
+                        )
 
         return theta_hat
diff --git a/doubleml/irm/__init__.py b/doubleml/irm/__init__.py
index 62ecccd0e..a48cfe35b 100644
--- a/doubleml/irm/__init__.py
+++ b/doubleml/irm/__init__.py
@@ -2,6 +2,4 @@
 The :mod:`doubleml.irm` module implements double machine learning estimates based on interactive regression models.
 """
 
-__all__ = [
-
-]
+__all__ = []
diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py
index 91e028d17..0a42da9a0 100644
--- a/doubleml/irm/apo.py
+++ b/doubleml/irm/apo.py
@@ -1,18 +1,21 @@
-import numpy as np
-import pandas as pd
 import warnings
 
+import numpy as np
+import pandas as pd
 from sklearn.utils import check_X_y
 
 from ..double_ml import DoubleML
-
-from ..utils.blp import DoubleMLBLP
 from ..double_ml_score_mixins import LinearScoreMixin
-
-from ..utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls, _cond_targets, _trimm, \
-    _normalize_ipw
-from ..utils._checks import _check_score, _check_trimming, _check_weights, _check_finite_predictions, \
-    _check_is_propensity, _check_binary_predictions
+from ..utils._checks import (
+    _check_binary_predictions,
+    _check_finite_predictions,
+    _check_is_propensity,
+    _check_score,
+    _check_trimming,
+    _check_weights,
+)
+from ..utils._estimation import _cond_targets, _dml_cv_predict, _dml_tune, _get_cond_smpls, _normalize_ipw, _trimm
+from ..utils.blp import DoubleMLBLP
 
 
 class DoubleMLAPO(LinearScoreMixin, DoubleML):
@@ -75,31 +78,30 @@ class DoubleMLAPO(LinearScoreMixin, DoubleML):
         Default is ``True``.
 
     """
-    def __init__(self,
-                 obj_dml_data,
-                 ml_g,
-                 ml_m,
-                 treatment_level,
-                 n_folds=5,
-                 n_rep=1,
-                 score='APO',
-                 weights=None,
-                 normalize_ipw=False,
-                 trimming_rule='truncate',
-                 trimming_threshold=1e-2,
-                 draw_sample_splitting=True):
-        super().__init__(obj_dml_data,
-                         n_folds,
-                         n_rep,
-                         score,
-                         draw_sample_splitting)
+
+    def __init__(
+        self,
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        treatment_level,
+        n_folds=5,
+        n_rep=1,
+        score="APO",
+        weights=None,
+        normalize_ipw=False,
+        trimming_rule="truncate",
+        trimming_threshold=1e-2,
+        draw_sample_splitting=True,
+    ):
+        super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
 
         # set up treatment level and check data
         self._treatment_level = treatment_level
         self._treated = self._dml_data.d == self._treatment_level
 
         self._check_data(self._dml_data)
-        valid_scores = ['APO']
+        valid_scores = ["APO"]
         _check_score(self.score, valid_scores, allow_callable=False)
 
         # set stratication for resampling
@@ -107,23 +109,26 @@ def __init__(self,
         if draw_sample_splitting:
             self.draw_sample_splitting()
 
-        ml_g_is_classifier = self._check_learner(ml_g, 'ml_g', regressor=True, classifier=True)
-        _ = self._check_learner(ml_m, 'ml_m', regressor=False, classifier=True)
-        self._learner = {'ml_g': ml_g, 'ml_m': ml_m}
+        ml_g_is_classifier = self._check_learner(ml_g, "ml_g", regressor=True, classifier=True)
+        _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True)
+        self._learner = {"ml_g": ml_g, "ml_m": ml_m}
         if ml_g_is_classifier:
             if obj_dml_data.binary_outcome:
-                self._predict_method = {'ml_g': 'predict_proba', 'ml_m': 'predict_proba'}
+                self._predict_method = {"ml_g": "predict_proba", "ml_m": "predict_proba"}
             else:
-                raise ValueError(f'The ml_g learner {str(ml_g)} was identified as classifier '
-                                 'but the outcome variable is not binary with values 0 and 1.')
+                raise ValueError(
+                    f"The ml_g learner {str(ml_g)} was identified as classifier "
+                    "but the outcome variable is not binary with values 0 and 1."
+                )
         else:
-            self._predict_method = {'ml_g': 'predict', 'ml_m': 'predict_proba'}
+            self._predict_method = {"ml_g": "predict", "ml_m": "predict_proba"}
         self._initialize_ml_nuisance_params()
 
         self._normalize_ipw = normalize_ipw
         if not isinstance(self.normalize_ipw, bool):
-            raise TypeError('Normalization indicator has to be boolean. ' +
-                            f'Object of type {str(type(self.normalize_ipw))} passed.')
+            raise TypeError(
+                "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed."
+            )
         self._trimming_rule = trimming_rule
         self._trimming_threshold = trimming_threshold
         _check_trimming(self._trimming_rule, self._trimming_threshold)
@@ -178,105 +183,118 @@ def weights(self):
         return self._weights
 
     def _initialize_ml_nuisance_params(self):
-        valid_learner = ['ml_g0', 'ml_g1', 'ml_m']
-        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
-                        for learner in valid_learner}
+        valid_learner = ["ml_g0", "ml_g1", "ml_m"]
+        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in valid_learner}
 
     def _initialize_weights(self, weights):
         if weights is None:
             weights = np.ones(self._dml_data.n_obs)
         if isinstance(weights, np.ndarray):
-            self._weights = {'weights': weights}
+            self._weights = {"weights": weights}
         else:
             assert isinstance(weights, dict)
             self._weights = weights
 
     def _get_weights(self):
         # standard case for APO/ATE
-        weights = self._weights['weights']
-        if 'weights_bar' not in self._weights.keys():
-            weights_bar = self._weights['weights']
+        weights = self._weights["weights"]
+        if "weights_bar" not in self._weights.keys():
+            weights_bar = self._weights["weights"]
         else:
-            weights_bar = self._weights['weights_bar'][:, self._i_rep]
+            weights_bar = self._weights["weights_bar"][:, self._i_rep]
 
         return weights, weights_bar
 
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
         # use the treated indicator to get the correct sample splits
-        x, treated = check_X_y(x, self.treated,
-                               force_all_finite=False)
+        x, treated = check_X_y(x, self.treated, force_all_finite=False)
 
         # get train indices for d == treatment_level
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, treated)
-        g0_external = external_predictions['ml_g0'] is not None
-        g1_external = external_predictions['ml_g1'] is not None
-        m_external = external_predictions['ml_m'] is not None
+        g0_external = external_predictions["ml_g0"] is not None
+        g1_external = external_predictions["ml_g1"] is not None
+        m_external = external_predictions["ml_m"] is not None
 
         # nuisance g (g0 only relevant for sensitivity analysis)
         if g0_external:
             # use external predictions
-            g_hat0 = {'preds': external_predictions['ml_g0'],
-                      'targets': _cond_targets(y, cond_sample=(treated == 0)),
-                      'models': None}
+            g_hat0 = {
+                "preds": external_predictions["ml_g0"],
+                "targets": _cond_targets(y, cond_sample=(treated == 0)),
+                "models": None,
+            }
         else:
-            g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv,
-                                     est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
-                                     return_models=return_models)
-            _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
-            g_hat0['targets'] = _cond_targets(g_hat0['targets'], cond_sample=(treated == 0))
+            g_hat0 = _dml_cv_predict(
+                self._learner["ml_g"],
+                x,
+                y,
+                smpls=smpls_d0,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_g0"),
+                method=self._predict_method["ml_g"],
+                return_models=return_models,
+            )
+            _check_finite_predictions(g_hat0["preds"], self._learner["ml_g"], "ml_g", smpls)
+            g_hat0["targets"] = _cond_targets(g_hat0["targets"], cond_sample=(treated == 0))
 
         if self._dml_data.binary_outcome:
-            _check_binary_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', self._dml_data.y_col)
+            _check_binary_predictions(g_hat0["preds"], self._learner["ml_g"], "ml_g", self._dml_data.y_col)
 
         if g1_external:
             # use external predictions
-            g_hat1 = {'preds': external_predictions['ml_g1'],
-                      'targets': _cond_targets(y, cond_sample=(treated == 1)),
-                      'models': None}
+            g_hat1 = {
+                "preds": external_predictions["ml_g1"],
+                "targets": _cond_targets(y, cond_sample=(treated == 1)),
+                "models": None,
+            }
         else:
-            g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
-                                     est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
-                                     return_models=return_models)
-            _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
+            g_hat1 = _dml_cv_predict(
+                self._learner["ml_g"],
+                x,
+                y,
+                smpls=smpls_d1,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_g1"),
+                method=self._predict_method["ml_g"],
+                return_models=return_models,
+            )
+            _check_finite_predictions(g_hat1["preds"], self._learner["ml_g"], "ml_g", smpls)
             # adjust target values to consider only compatible subsamples
-            g_hat1['targets'] = _cond_targets(g_hat1['targets'], cond_sample=(treated == 1))
+            g_hat1["targets"] = _cond_targets(g_hat1["targets"], cond_sample=(treated == 1))
 
         if self._dml_data.binary_outcome:
-            _check_binary_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', self._dml_data.y_col)
+            _check_binary_predictions(g_hat1["preds"], self._learner["ml_g"], "ml_g", self._dml_data.y_col)
 
         # nuisance m
         if m_external:
             # use external predictions
-            m_hat = {'preds': external_predictions['ml_m'],
-                     'targets': treated,
-                     'models': None}
+            m_hat = {"preds": external_predictions["ml_m"], "targets": treated, "models": None}
         else:
-            m_hat = _dml_cv_predict(self._learner['ml_m'], x, treated, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                    return_models=return_models)
-            _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
-            _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
+            m_hat = _dml_cv_predict(
+                self._learner["ml_m"],
+                x,
+                treated,
+                smpls=smpls,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_m"),
+                method=self._predict_method["ml_m"],
+                return_models=return_models,
+            )
+            _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
+            _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
 
         # also trimm external predictions
-        m_hat['preds'] = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
-
-        psi_a, psi_b = self._score_elements(y, treated, g_hat0['preds'], g_hat1['preds'],
-                                            m_hat['preds'], smpls)
-        psi_elements = {'psi_a': psi_a,
-                        'psi_b': psi_b}
-
-        preds = {'predictions': {'ml_g0': g_hat0['preds'],
-                                 'ml_g1': g_hat1['preds'],
-                                 'ml_m': m_hat['preds']},
-                 'targets': {'ml_g0': g_hat0['targets'],
-                             'ml_g1': g_hat1['targets'],
-                             'ml_m': m_hat['targets']},
-                 'models': {'ml_g0': g_hat0['models'],
-                            'ml_g1': g_hat1['models'],
-                            'ml_m': m_hat['models']}
-                 }
+        m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
+
+        psi_a, psi_b = self._score_elements(y, treated, g_hat0["preds"], g_hat1["preds"], m_hat["preds"], smpls)
+        psi_elements = {"psi_a": psi_a, "psi_b": psi_b}
+
+        preds = {
+            "predictions": {"ml_g0": g_hat0["preds"], "ml_g1": g_hat1["preds"], "ml_m": m_hat["preds"]},
+            "targets": {"ml_g0": g_hat0["targets"], "ml_g1": g_hat1["targets"], "ml_m": m_hat["targets"]},
+            "models": {"ml_g0": g_hat0["models"], "ml_g1": g_hat1["models"], "ml_m": m_hat["models"]},
+        }
         return psi_elements, preds
 
     def _score_elements(self, y, treated, g_hat0, g_hat1, m_hat, smpls):
@@ -297,13 +315,13 @@ def _sensitivity_element_est(self, preds):
         y = self._dml_data.y
         treated = self.treated
 
-        m_hat = preds['predictions']['ml_m']
-        g_hat0 = preds['predictions']['ml_g0']
-        g_hat1 = preds['predictions']['ml_g1']
+        m_hat = preds["predictions"]["ml_m"]
+        g_hat0 = preds["predictions"]["ml_g0"]
+        g_hat1 = preds["predictions"]["ml_g1"]
 
         weights, weights_bar = self._get_weights()
 
-        sigma2_score_element = np.square(y - np.multiply(treated, g_hat1) - np.multiply(1.0-treated, g_hat0))
+        sigma2_score_element = np.square(y - np.multiply(treated, g_hat1) - np.multiply(1.0 - treated, g_hat0))
         sigma2 = np.mean(sigma2_score_element)
         psi_sigma2 = sigma2_score_element - sigma2
 
@@ -315,77 +333,101 @@ def _sensitivity_element_est(self, preds):
         nu2 = np.mean(nu2_score_element)
         psi_nu2 = nu2_score_element - nu2
 
-        element_dict = {'sigma2': sigma2,
-                        'nu2': nu2,
-                        'psi_sigma2': psi_sigma2,
-                        'psi_nu2': psi_nu2,
-                        'riesz_rep': rr,
-                        }
+        element_dict = {
+            "sigma2": sigma2,
+            "nu2": nu2,
+            "psi_sigma2": psi_sigma2,
+            "psi_nu2": psi_nu2,
+            "riesz_rep": rr,
+        }
         return element_dict
 
-    def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                         search_mode, n_iter_randomized_search):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, treated = check_X_y(x, self.treated,
-                               force_all_finite=False)
+    def _nuisance_tuning(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, treated = check_X_y(x, self.treated, force_all_finite=False)
         # get train indices for d == 0 and d == 1
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, treated)
 
         if scoring_methods is None:
-            scoring_methods = {'ml_g': None,
-                               'ml_m': None}
+            scoring_methods = {"ml_g": None, "ml_m": None}
 
         train_inds = [train_index for (train_index, _) in smpls]
         train_inds_d0 = [train_index for (train_index, _) in smpls_d0]
         train_inds_d1 = [train_index for (train_index, _) in smpls_d1]
-        g0_tune_res = _dml_tune(y, x, train_inds_d0,
-                                self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'],
-                                n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        g1_tune_res = _dml_tune(y, x, train_inds_d1,
-                                self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'],
-                                n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-
-        m_tune_res = _dml_tune(treated, x, train_inds,
-                               self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        g0_tune_res = _dml_tune(
+            y,
+            x,
+            train_inds_d0,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        g1_tune_res = _dml_tune(
+            y,
+            x,
+            train_inds_d1,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+
+        m_tune_res = _dml_tune(
+            treated,
+            x,
+            train_inds,
+            self._learner["ml_m"],
+            param_grids["ml_m"],
+            scoring_methods["ml_m"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
 
         g0_best_params = [xx.best_params_ for xx in g0_tune_res]
         g1_best_params = [xx.best_params_ for xx in g1_tune_res]
         m_best_params = [xx.best_params_ for xx in m_tune_res]
 
-        params = {'ml_g0': g0_best_params,
-                  'ml_g1': g1_best_params,
-                  'ml_m': m_best_params}
-        tune_res = {'g0_tune': g0_tune_res,
-                    'g1_tune': g1_tune_res,
-                    'm_tune': m_tune_res}
+        params = {"ml_g0": g0_best_params, "ml_g1": g1_best_params, "ml_m": m_best_params}
+        tune_res = {"g0_tune": g0_tune_res, "g1_tune": g1_tune_res, "m_tune": m_tune_res}
 
-        res = {'params': params,
-               'tune_res': tune_res}
+        res = {"params": params, "tune_res": tune_res}
 
         return res
 
     def _check_data(self, obj_dml_data):
         if len(obj_dml_data.d_cols) > 1:
-            raise ValueError('Only one treatment variable is allowed. ' +
-                             f'Got {len(obj_dml_data.d_cols)} treatment variables.')
+            raise ValueError(
+                "Only one treatment variable is allowed. " + f"Got {len(obj_dml_data.d_cols)} treatment variables."
+            )
 
         if obj_dml_data.z_cols is not None:
-            raise ValueError('Incompatible data. ' +
-                             ' and '.join(obj_dml_data.z_cols) +
-                             ' have been set as instrumental variable(s).')
+            raise ValueError(
+                "Incompatible data. " + " and ".join(obj_dml_data.z_cols) + " have been set as instrumental variable(s)."
+            )
 
         # check if treatment level is valid
         if np.sum(self.treated) < 5:
             raise ValueError(
-                'The number of treated observations is less than 5. ' +
-                f'Number of treated observations: {np.sum(self.treated)} for treatment level {self.treatment_level}.'
+                "The number of treated observations is less than 5. "
+                + f"Number of treated observations: {np.sum(self.treated)} for treatment level {self.treatment_level}."
             )
 
         if np.mean(self.treated) <= 0.05:
-            warnings.warn(f'The proportion of observations with treatment level {self.treatment_level} is less than 5%.'
-                          f' Got {np.mean(self.treated) * 100:.2f}%.')
+            warnings.warn(
+                f"The proportion of observations with treatment level {self.treatment_level} is less than 5%."
+                f" Got {np.mean(self.treated) * 100:.2f}%."
+            )
 
         return
 
@@ -411,17 +453,15 @@ def capo(self, basis, is_gate=False, **kwargs):
         model : :class:`doubleML.DoubleMLBLP`
             Best linear Predictor model.
         """
-        valid_score = ['APO']
+        valid_score = ["APO"]
         if self.score not in valid_score:
-            raise ValueError('Invalid score ' + self.score + '. ' +
-                             'Valid score ' + ' or '.join(valid_score) + '.')
+            raise ValueError("Invalid score " + self.score + ". " + "Valid score " + " or ".join(valid_score) + ".")
 
         if self.n_rep != 1:
-            raise NotImplementedError('Only implemented for one repetition. ' +
-                                      f'Number of repetitions is {str(self.n_rep)}.')
+            raise NotImplementedError("Only implemented for one repetition. " + f"Number of repetitions is {str(self.n_rep)}.")
 
         # define the orthogonal signal
-        orth_signal = self.psi_elements['psi_b'].reshape(-1)
+        orth_signal = self.psi_elements["psi_b"].reshape(-1)
         # fit the best linear predictor
         model = DoubleMLBLP(orth_signal, basis=basis, is_gate=is_gate)
         model.fit(**kwargs)
@@ -447,18 +487,19 @@ def gapo(self, groups, **kwargs):
             Best linear Predictor model for group average potential outcomes.
         """
         if not isinstance(groups, pd.DataFrame):
-            raise TypeError('Groups must be of DataFrame type. '
-                            f'Groups of type {str(type(groups))} was passed.')
+            raise TypeError(f"Groups must be of DataFrame type. Groups of type {str(type(groups))} was passed.")
 
         if not all(groups.dtypes == bool) or all(groups.dtypes == int):
             if groups.shape[1] == 1:
-                groups = pd.get_dummies(groups, prefix='Group', prefix_sep='_')
+                groups = pd.get_dummies(groups, prefix="Group", prefix_sep="_")
             else:
-                raise TypeError('Columns of groups must be of bool type or int type (dummy coded). '
-                                'Alternatively, groups should only contain one column.')
+                raise TypeError(
+                    "Columns of groups must be of bool type or int type (dummy coded). "
+                    "Alternatively, groups should only contain one column."
+                )
 
         if any(groups.sum(0) <= 5):
-            warnings.warn('At least one group effect is estimated with less than 6 observations.')
+            warnings.warn("At least one group effect is estimated with less than 6 observations.")
 
         model = self.capo(groups, is_gate=True, **kwargs)
         return model
diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py
index 2a6b5ce1a..9fe5617db 100644
--- a/doubleml/irm/apos.py
+++ b/doubleml/irm/apos.py
@@ -1,40 +1,39 @@
-import numpy as np
-import pandas as pd
 import copy
 from collections.abc import Iterable
 
-from sklearn.base import clone
-
+import numpy as np
+import pandas as pd
 from joblib import Parallel, delayed
+from sklearn.base import clone
 
 from ..double_ml import DoubleML
-from ..double_ml_data import DoubleMLData, DoubleMLClusterData
-from .apo import DoubleMLAPO
+from ..double_ml_data import DoubleMLClusterData, DoubleMLData
 from ..double_ml_framework import concat
-
-from ..utils.resampling import DoubleMLResampling
+from ..utils._checks import _check_sample_splitting, _check_score, _check_trimming, _check_weights
 from ..utils._descriptive import generate_summary
-from ..utils._checks import _check_score, _check_trimming, _check_weights, _check_sample_splitting
 from ..utils.gain_statistics import gain_statistics
+from ..utils.resampling import DoubleMLResampling
+from .apo import DoubleMLAPO
 
 
 class DoubleMLAPOS:
-    """Double machine learning for interactive regression models with multiple discrete treatments.
-    """
-    def __init__(self,
-                 obj_dml_data,
-                 ml_g,
-                 ml_m,
-                 treatment_levels,
-                 n_folds=5,
-                 n_rep=1,
-                 score='APO',
-                 weights=None,
-                 normalize_ipw=False,
-                 trimming_rule='truncate',
-                 trimming_threshold=1e-2,
-                 draw_sample_splitting=True):
-
+    """Double machine learning for interactive regression models with multiple discrete treatments."""
+
+    def __init__(
+        self,
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        treatment_levels,
+        n_folds=5,
+        n_rep=1,
+        score="APO",
+        weights=None,
+        normalize_ipw=False,
+        trimming_rule="truncate",
+        trimming_threshold=1e-2,
+        draw_sample_splitting=True,
+    ):
         self._dml_data = obj_dml_data
         self._is_cluster_data = isinstance(obj_dml_data, DoubleMLClusterData)
         self._check_data(self._dml_data)
@@ -52,7 +51,7 @@ def __init__(self,
 
         # check score
         self._score = score
-        valid_scores = ['APO']
+        valid_scores = ["APO"]
         _check_score(self.score, valid_scores, allow_callable=False)
 
         # initialize framework which is constructed after the fit method is called
@@ -64,20 +63,23 @@ def __init__(self,
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
         if not isinstance(self.normalize_ipw, bool):
-            raise TypeError('Normalization indicator has to be boolean. ' +
-                            f'Object of type {str(type(self.normalize_ipw))} passed.')
+            raise TypeError(
+                "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed."
+            )
 
-        ml_g_is_classifier = DoubleML._check_learner(ml_g, 'ml_g', regressor=True, classifier=True)
-        _ = DoubleML._check_learner(ml_m, 'ml_m', regressor=False, classifier=True)
-        self._learner = {'ml_g': clone(ml_g), 'ml_m': clone(ml_m)}
+        ml_g_is_classifier = DoubleML._check_learner(ml_g, "ml_g", regressor=True, classifier=True)
+        _ = DoubleML._check_learner(ml_m, "ml_m", regressor=False, classifier=True)
+        self._learner = {"ml_g": clone(ml_g), "ml_m": clone(ml_m)}
         if ml_g_is_classifier:
             if obj_dml_data.binary_outcome:
-                self._predict_method = {'ml_g': 'predict_proba', 'ml_m': 'predict_proba'}
+                self._predict_method = {"ml_g": "predict_proba", "ml_m": "predict_proba"}
             else:
-                raise ValueError(f'The ml_g learner {str(ml_g)} was identified as classifier '
-                                 'but the outcome variable is not binary with values 0 and 1.')
+                raise ValueError(
+                    f"The ml_g learner {str(ml_g)} was identified as classifier "
+                    "but the outcome variable is not binary with values 0 and 1."
+                )
         else:
-            self._predict_method = {'ml_g': 'predict', 'ml_m': 'predict_proba'}
+            self._predict_method = {"ml_g": "predict", "ml_m": "predict_proba"}
 
         # APO weights
         _check_weights(weights, score="ATE", n_obs=obj_dml_data.n_obs, n_rep=self.n_rep)
@@ -93,10 +95,9 @@ def __init__(self,
 
     def __str__(self):
         class_name = self.__class__.__name__
-        header = f'================== {class_name} Object ==================\n'
+        header = f"================== {class_name} Object ==================\n"
         fit_summary = str(self.summary)
-        res = header + \
-            '\n------------------ Fit summary       ------------------\n' + fit_summary
+        res = header + "\n------------------ Fit summary       ------------------\n" + fit_summary
         return res
 
     @property
@@ -258,8 +259,10 @@ def smpls(self):
         The partition used for cross-fitting.
         """
         if self._smpls is None:
-            err_msg = ('Sample splitting not specified. Draw samples via .draw_sample splitting(). ' +
-                       'External samples not implemented yet.')
+            err_msg = (
+                "Sample splitting not specified. Draw samples via .draw_sample splitting(). "
+                + "External samples not implemented yet."
+            )
             raise ValueError(err_msg)
         return self._smpls
 
@@ -321,12 +324,11 @@ def summary(self):
         A summary for the estimated causal effect after calling :meth:`fit`.
         """
         if self.framework is None:
-            col_names = ['coef', 'std err', 't', 'P>|t|']
+            col_names = ["coef", "std err", "t", "P>|t|"]
             df_summary = pd.DataFrame(columns=col_names)
         else:
             ci = self.confint()
-            df_summary = generate_summary(self.coef, self.se, self.t_stat,
-                                          self.pval, ci, self._treatment_levels)
+            df_summary = generate_summary(self.coef, self.se, self.t_stat, self.pval, ci, self._treatment_levels)
         return df_summary
 
     @property
@@ -340,7 +342,7 @@ def sensitivity_summary(self):
             Summary for the sensitivity analysis.
         """
         if self._framework is None:
-            raise ValueError('Apply sensitivity_analysis() before sensitivity_summary.')
+            raise ValueError("Apply sensitivity_analysis() before sensitivity_summary.")
         else:
             sensitivity_summary = self._framework.sensitivity_summary
         return sensitivity_summary
@@ -387,14 +389,9 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_
             ext_pred_dict = None
 
         # parallel estimation of the models
-        parallel = Parallel(n_jobs=n_jobs_models, verbose=0, pre_dispatch='2*n_jobs')
+        parallel = Parallel(n_jobs=n_jobs_models, verbose=0, pre_dispatch="2*n_jobs")
         fitted_models = parallel(
-            delayed(self._fit_model)(
-                i_level,
-                n_jobs_cv,
-                store_predictions,
-                store_models,
-                ext_pred_dict)
+            delayed(self._fit_model)(i_level, n_jobs_cv, store_predictions, store_models, ext_pred_dict)
             for i_level in range(self.n_treatment_levels)
         )
 
@@ -431,14 +428,14 @@ def confint(self, joint=False, level=0.95):
         """
 
         if self.framework is None:
-            raise ValueError('Apply fit() before confint().')
+            raise ValueError("Apply fit() before confint().")
 
         df_ci = self.framework.confint(joint=joint, level=level)
         df_ci.set_index(pd.Index(self._treatment_levels), inplace=True)
 
         return df_ci
 
-    def bootstrap(self, method='normal', n_rep_boot=500):
+    def bootstrap(self, method="normal", n_rep_boot=500):
         """
         Multiplier bootstrap for DoubleML models.
 
@@ -456,7 +453,7 @@ def bootstrap(self, method='normal', n_rep_boot=500):
         self : object
         """
         if self._framework is None:
-            raise ValueError('Apply fit() before bootstrap().')
+            raise ValueError("Apply fit() before bootstrap().")
         self._framework.bootstrap(method=method, n_rep_boot=n_rep_boot)
 
         return self
@@ -499,19 +496,24 @@ def sensitivity_analysis(self, cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95, null_h
         """
 
         if self._framework is None:
-            raise ValueError('Apply fit() before sensitivity_analysis().')
-        self._framework.sensitivity_analysis(
-            cf_y=cf_y,
-            cf_d=cf_d,
-            rho=rho,
-            level=level,
-            null_hypothesis=null_hypothesis
-        )
+            raise ValueError("Apply fit() before sensitivity_analysis().")
+        self._framework.sensitivity_analysis(cf_y=cf_y, cf_d=cf_d, rho=rho, level=level, null_hypothesis=null_hypothesis)
 
         return self
 
-    def sensitivity_plot(self, idx_treatment=0, value='theta', rho=1.0, level=0.95, null_hypothesis=0.0,
-                         include_scenario=True, benchmarks=None, fill=True, grid_bounds=(0.15, 0.15), grid_size=100):
+    def sensitivity_plot(
+        self,
+        idx_treatment=0,
+        value="theta",
+        rho=1.0,
+        level=0.95,
+        null_hypothesis=0.0,
+        include_scenario=True,
+        benchmarks=None,
+        fill=True,
+        grid_bounds=(0.15, 0.15),
+        grid_size=100,
+    ):
         """
         Contour plot of the sensivity with respect to latent/confounding variables.
 
@@ -565,7 +567,7 @@ def sensitivity_plot(self, idx_treatment=0, value='theta', rho=1.0, level=0.95,
             Plotly figure of the sensitivity contours.
         """
         if self._framework is None:
-            raise ValueError('Apply fit() before sensitivity_plot().')
+            raise ValueError("Apply fit() before sensitivity_plot().")
         fig = self._framework.sensitivity_plot(
             idx_treatment=idx_treatment,
             value=value,
@@ -576,7 +578,7 @@ def sensitivity_plot(self, idx_treatment=0, value='theta', rho=1.0, level=0.95,
             benchmarks=benchmarks,
             fill=fill,
             grid_bounds=grid_bounds,
-            grid_size=grid_size
+            grid_size=grid_size,
         )
 
         return fig
@@ -594,18 +596,20 @@ def sensitivity_benchmark(self, benchmarking_set, fit_args=None):
 
         # input checks
         if self.sensitivity_elements is None:
-            raise NotImplementedError(f'Sensitivity analysis not yet implemented for {self.__class__.__name__}.')
+            raise NotImplementedError(f"Sensitivity analysis not yet implemented for {self.__class__.__name__}.")
         if not isinstance(benchmarking_set, list):
-            raise TypeError('benchmarking_set must be a list. '
-                            f'{str(benchmarking_set)} of type {type(benchmarking_set)} was passed.')
+            raise TypeError(
+                f"benchmarking_set must be a list. {str(benchmarking_set)} of type {type(benchmarking_set)} was passed."
+            )
         if len(benchmarking_set) == 0:
-            raise ValueError('benchmarking_set must not be empty.')
+            raise ValueError("benchmarking_set must not be empty.")
         if not set(benchmarking_set) <= set(x_list_long):
-            raise ValueError(f"benchmarking_set must be a subset of features {str(self._dml_data.x_cols)}. "
-                             f'{str(benchmarking_set)} was passed.')
+            raise ValueError(
+                f"benchmarking_set must be a subset of features {str(self._dml_data.x_cols)}. "
+                f"{str(benchmarking_set)} was passed."
+            )
         if fit_args is not None and not isinstance(fit_args, dict):
-            raise TypeError('fit_args must be a dict. '
-                            f'{str(fit_args)} of type {type(fit_args)} was passed.')
+            raise TypeError(f"fit_args must be a dict. {str(fit_args)} of type {type(fit_args)} was passed.")
 
         # refit short form of the model
         x_list_short = [x for x in x_list_long if x not in benchmarking_set]
@@ -631,10 +635,9 @@ def draw_sample_splitting(self):
         -------
         self : object
         """
-        obj_dml_resampling = DoubleMLResampling(n_folds=self.n_folds,
-                                                n_rep=self.n_rep,
-                                                n_obs=self._dml_data.n_obs,
-                                                stratify=self._dml_data.d)
+        obj_dml_resampling = DoubleMLResampling(
+            n_folds=self.n_folds, n_rep=self.n_rep, n_obs=self._dml_data.n_obs, stratify=self._dml_data.d
+        )
         self._smpls = obj_dml_resampling.split_samples()
 
         return self
@@ -690,7 +693,8 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None):
         >>> dml_plr_obj.set_sample_splitting(smpls)
         """
         self._smpls, self._smpls_cluster, self._n_rep, self._n_folds = _check_sample_splitting(
-            all_smpls, all_smpls_cluster, self._dml_data, self._is_cluster_data)
+            all_smpls, all_smpls_cluster, self._dml_data, self._is_cluster_data
+        )
 
         self._modellist = self._initialize_models()
 
@@ -716,16 +720,18 @@ def causal_contrast(self, reference_levels):
         """
 
         if self.framework is None:
-            raise ValueError('Apply fit() before causal_contrast().')
+            raise ValueError("Apply fit() before causal_contrast().")
         if self.n_treatment_levels == 1:
-            raise ValueError('Only one treatment level. No causal contrast can be computed.')
+            raise ValueError("Only one treatment level. No causal contrast can be computed.")
         is_iterable = isinstance(reference_levels, Iterable)
         if not is_iterable:
             reference_levels = [reference_levels]
         is_treatment_level_subset = set(reference_levels).issubset(set(self.treatment_levels))
         if not is_treatment_level_subset:
-            raise ValueError('Invalid reference_levels. reference_levels has to be an iterable subset of treatment_levels or '
-                             'a single treatment level.')
+            raise ValueError(
+                "Invalid reference_levels. reference_levels has to be an iterable subset of treatment_levels or "
+                "a single treatment level."
+            )
 
         skip_index = []
         all_treatment_names = []
@@ -735,24 +741,31 @@ def causal_contrast(self, reference_levels):
             ref_framework = self.modellist[i_ref_lvl].framework
 
             skip_index += [i_ref_lvl]
-            all_acc_frameworks += [model.framework - ref_framework for i, model in
-                                   enumerate(self.modellist) if i not in skip_index]
-            all_treatment_names += [f"{self.treatment_levels[i]} vs {self.treatment_levels[i_ref_lvl]}" for
-                                    i in range(self.n_treatment_levels) if i not in skip_index]
+            all_acc_frameworks += [
+                model.framework - ref_framework for i, model in enumerate(self.modellist) if i not in skip_index
+            ]
+            all_treatment_names += [
+                f"{self.treatment_levels[i]} vs {self.treatment_levels[i_ref_lvl]}"
+                for i in range(self.n_treatment_levels)
+                if i not in skip_index
+            ]
 
         acc = concat(all_acc_frameworks)
         acc.treatment_names = all_treatment_names
         return acc
 
     def _fit_model(self, i_level, n_jobs_cv=None, store_predictions=True, store_models=False, external_predictions_dict=None):
-
         model = self.modellist[i_level]
         if external_predictions_dict is not None:
             external_predictions = external_predictions_dict[self.treatment_levels[i_level]]
         else:
             external_predictions = None
-        model.fit(n_jobs_cv=n_jobs_cv, store_predictions=store_predictions, store_models=store_models,
-                  external_predictions=external_predictions)
+        model.fit(
+            n_jobs_cv=n_jobs_cv,
+            store_predictions=store_predictions,
+            store_models=store_models,
+            external_predictions=external_predictions,
+        )
         return model
 
     def _check_treatment_levels(self, treatment_levels):
@@ -763,36 +776,44 @@ def _check_treatment_levels(self, treatment_levels):
             treatment_level_list = [t_lvl for t_lvl in treatment_levels]
         is_d_subset = set(treatment_level_list).issubset(set(self._all_treatment_levels))
         if not is_d_subset:
-            raise ValueError('Invalid reference_levels. reference_levels has to be an iterable subset or '
-                             'a single element of the unique treatment levels in the data.')
+            raise ValueError(
+                "Invalid reference_levels. reference_levels has to be an iterable subset or "
+                "a single element of the unique treatment levels in the data."
+            )
         return treatment_level_list
 
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
-            raise TypeError('The data must be of DoubleMLData or DoubleMLClusterData type.')
+            raise TypeError("The data must be of DoubleMLData or DoubleMLClusterData type.")
         if obj_dml_data.z is not None:
-            raise ValueError('The data must not contain instrumental variables.')
+            raise ValueError("The data must not contain instrumental variables.")
         return
 
     def _check_external_predictions(self, external_predictions):
         expected_keys = self.treatment_levels
         if not isinstance(external_predictions, dict):
-            raise TypeError('external_predictions must be a dictionary. ' +
-                            f'Object of type {type(external_predictions)} passed.')
+            raise TypeError(
+                "external_predictions must be a dictionary. " + f"Object of type {type(external_predictions)} passed."
+            )
 
         if not set(external_predictions.keys()).issubset(set(expected_keys)):
-            raise ValueError('external_predictions must be a subset of all treatment levels. ' +
-                             f'Expected keys: {set(expected_keys)}. ' +
-                             f'Passed keys: {set(external_predictions.keys())}.')
+            raise ValueError(
+                "external_predictions must be a subset of all treatment levels. "
+                + f"Expected keys: {set(expected_keys)}. "
+                + f"Passed keys: {set(external_predictions.keys())}."
+            )
 
-        expected_learner_keys = ['ml_g0', 'ml_g1', 'ml_m']
+        expected_learner_keys = ["ml_g0", "ml_g1", "ml_m"]
         for key, value in external_predictions.items():
             if not isinstance(value, dict):
-                raise TypeError(f'external_predictions[{key}] must be a dictionary. ' +
-                                f'Object of type {type(value)} passed.')
+                raise TypeError(
+                    f"external_predictions[{key}] must be a dictionary. " + f"Object of type {type(value)} passed."
+                )
             if not set(value.keys()).issubset(set(expected_learner_keys)):
-                raise ValueError(f'external_predictions[{key}] must be a subset of {set(expected_learner_keys)}. ' +
-                                 f'Passed keys: {set(value.keys())}.')
+                raise ValueError(
+                    f"external_predictions[{key}] must be a subset of {set(expected_learner_keys)}. "
+                    + f"Passed keys: {set(value.keys())}."
+                )
 
         return
 
@@ -801,11 +822,11 @@ def _rename_external_predictions(self, external_predictions):
         ext_pred_dict = {treatment_level: {d_col: {}} for treatment_level in self.treatment_levels}
         for treatment_level in self.treatment_levels:
             if "ml_g1" in external_predictions[treatment_level]:
-                ext_pred_dict[treatment_level][d_col]['ml_g1'] = external_predictions[treatment_level]['ml_g1']
+                ext_pred_dict[treatment_level][d_col]["ml_g1"] = external_predictions[treatment_level]["ml_g1"]
             if "ml_m" in external_predictions[treatment_level]:
-                ext_pred_dict[treatment_level][d_col]['ml_m'] = external_predictions[treatment_level]['ml_m']
+                ext_pred_dict[treatment_level][d_col]["ml_m"] = external_predictions[treatment_level]["ml_m"]
             if "ml_g0" in external_predictions[treatment_level]:
-                ext_pred_dict[treatment_level][d_col]['ml_g0'] = external_predictions[treatment_level]['ml_g0']
+                ext_pred_dict[treatment_level][d_col]["ml_g0"] = external_predictions[treatment_level]["ml_g0"]
 
         return ext_pred_dict
 
@@ -821,24 +842,21 @@ def _initialize_weights(self, weights):
     def _initialize_models(self):
         modellist = [None] * self.n_treatment_levels
         kwargs = {
-            'obj_dml_data': self._dml_data,
-            'ml_g': self._learner['ml_g'],
-            'ml_m': self._learner['ml_m'],
-            'score': self.score,
-            'n_folds': self.n_folds,
-            'n_rep': self.n_rep,
-            'weights': self.weights,
-            'trimming_rule': self.trimming_rule,
-            'trimming_threshold': self.trimming_threshold,
-            'normalize_ipw': self.normalize_ipw,
-            'draw_sample_splitting': False
+            "obj_dml_data": self._dml_data,
+            "ml_g": self._learner["ml_g"],
+            "ml_m": self._learner["ml_m"],
+            "score": self.score,
+            "n_folds": self.n_folds,
+            "n_rep": self.n_rep,
+            "weights": self.weights,
+            "trimming_rule": self.trimming_rule,
+            "trimming_threshold": self.trimming_threshold,
+            "normalize_ipw": self.normalize_ipw,
+            "draw_sample_splitting": False,
         }
         for i_level in range(self.n_treatment_levels):
             # initialize models for all levels
-            model = DoubleMLAPO(
-                treatment_level=self._treatment_levels[i_level],
-                **kwargs
-            )
+            model = DoubleMLAPO(treatment_level=self._treatment_levels[i_level], **kwargs)
 
             # synchronize the sample splitting
             model.set_sample_splitting(all_smpls=self.smpls)
diff --git a/doubleml/irm/cvar.py b/doubleml/irm/cvar.py
index c92cb9657..6190b0789 100644
--- a/doubleml/irm/cvar.py
+++ b/doubleml/irm/cvar.py
@@ -1,15 +1,29 @@
 import numpy as np
 from sklearn.base import clone
-from sklearn.utils import check_X_y
 from sklearn.model_selection import StratifiedKFold, train_test_split
+from sklearn.utils import check_X_y
 
 from ..double_ml import DoubleML
-from ..double_ml_score_mixins import LinearScoreMixin
-from ..utils._estimation import _dml_cv_predict, _trimm, _predict_zero_one_propensity, \
-    _normalize_ipw, _dml_tune, _get_bracket_guess, _solve_ipw_score, _cond_targets
 from ..double_ml_data import DoubleMLData
-from ..utils._checks import _check_score, _check_trimming, _check_zero_one_treatment, _check_treatment, \
-    _check_contains_iv, _check_quantile
+from ..double_ml_score_mixins import LinearScoreMixin
+from ..utils._checks import (
+    _check_contains_iv,
+    _check_quantile,
+    _check_score,
+    _check_treatment,
+    _check_trimming,
+    _check_zero_one_treatment,
+)
+from ..utils._estimation import (
+    _cond_targets,
+    _dml_cv_predict,
+    _dml_tune,
+    _get_bracket_guess,
+    _normalize_ipw,
+    _predict_zero_one_propensity,
+    _solve_ipw_score,
+    _trimm,
+)
 
 
 class DoubleMLCVAR(LinearScoreMixin, DoubleML):
@@ -82,38 +96,37 @@ class DoubleMLCVAR(LinearScoreMixin, DoubleML):
     d  1.591441  0.095781  16.615498  5.382582e-62  1.403715  1.779167
     """
 
-    def __init__(self,
-                 obj_dml_data,
-                 ml_g,
-                 ml_m,
-                 treatment=1,
-                 quantile=0.5,
-                 n_folds=5,
-                 n_rep=1,
-                 score='CVaR',
-                 normalize_ipw=True,
-                 trimming_rule='truncate',
-                 trimming_threshold=1e-2,
-                 draw_sample_splitting=True):
-        super().__init__(obj_dml_data,
-                         n_folds,
-                         n_rep,
-                         score,
-                         draw_sample_splitting)
+    def __init__(
+        self,
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        treatment=1,
+        quantile=0.5,
+        n_folds=5,
+        n_rep=1,
+        score="CVaR",
+        normalize_ipw=True,
+        trimming_rule="truncate",
+        trimming_threshold=1e-2,
+        draw_sample_splitting=True,
+    ):
+        super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
 
         self._quantile = quantile
         self._treatment = treatment
         self._normalize_ipw = normalize_ipw
 
         self._check_data(self._dml_data)
-        valid_score = ['CVaR']
+        valid_score = ["CVaR"]
         _check_score(self.score, valid_score, allow_callable=False)
         _check_quantile(self.quantile)
         _check_treatment(self.treatment)
 
         if not isinstance(self.normalize_ipw, bool):
-            raise TypeError('Normalization indicator has to be boolean. ' +
-                            f'Object of type {str(type(self.normalize_ipw))} passed.')
+            raise TypeError(
+                "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed."
+            )
 
         # initialize starting values and bounds
         self._coef_bounds = (self._dml_data.y.min(), self._dml_data.y.max())
@@ -130,10 +143,10 @@ def __init__(self,
         self._trimming_threshold = trimming_threshold
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
-        _ = self._check_learner(ml_g, 'ml_g', regressor=True, classifier=False)
-        _ = self._check_learner(ml_m, 'ml_m', regressor=False, classifier=True)
-        self._learner = {'ml_g': clone(ml_g), 'ml_m': clone(ml_m)}
-        self._predict_method = {'ml_g': 'predict', 'ml_m': 'predict_proba'}
+        _ = self._check_learner(ml_g, "ml_g", regressor=True, classifier=False)
+        _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True)
+        self._learner = {"ml_g": clone(ml_g), "ml_m": clone(ml_m)}
+        self._predict_method = {"ml_g": "predict", "ml_m": "predict_proba"}
 
         self._initialize_ml_nuisance_params()
 
@@ -187,24 +200,23 @@ def _score_elements(self, y, d, g_hat, m_hat, pq_est):
         return psi_a, psi_b
 
     def _initialize_ml_nuisance_params(self):
-        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
-                        for learner in ['ml_g', 'ml_m']}
+        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in ["ml_g", "ml_m"]}
 
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
 
         # initialize nuisance predictions, targets and models
-        g_hat = {'models': None,
-                 'targets': np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
-                 'preds': np.full(shape=self._dml_data.n_obs, fill_value=np.nan)
-                 }
-        m_hat = {'models': None,
-                 'targets': np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
-                 'preds': np.full(shape=self._dml_data.n_obs, fill_value=np.nan)
-                 }
+        g_hat = {
+            "models": None,
+            "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+        }
+        m_hat = {
+            "models": None,
+            "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+        }
 
         # initialize models
         fitted_models = {}
@@ -212,8 +224,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             # set nuisance model parameters
             est_params = self._get_params(learner)
             if est_params is not None:
-                fitted_models[learner] = [clone(self._learner[learner]).set_params(**est_params[i_fold])
-                                          for i_fold in range(self.n_folds)]
+                fitted_models[learner] = [
+                    clone(self._learner[learner]).set_params(**est_params[i_fold]) for i_fold in range(self.n_folds)
+                ]
             else:
                 fitted_models[learner] = [clone(self._learner[learner]) for i_fold in range(self.n_folds)]
 
@@ -224,19 +237,21 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             test_inds = smpls[i_fold][1]
 
             # start nested crossfitting
-            train_inds_1, train_inds_2 = train_test_split(train_inds, test_size=0.5,
-                                                          random_state=42, stratify=d[train_inds])
-            smpls_prelim = [(train, test) for train, test in
-                            StratifiedKFold(n_splits=self.n_folds).split(X=train_inds_1, y=d[train_inds_1])]
+            train_inds_1, train_inds_2 = train_test_split(train_inds, test_size=0.5, random_state=42, stratify=d[train_inds])
+            smpls_prelim = [
+                (train, test)
+                for train, test in StratifiedKFold(n_splits=self.n_folds).split(X=train_inds_1, y=d[train_inds_1])
+            ]
 
             d_train_1 = d[train_inds_1]
             y_train_1 = y[train_inds_1]
             x_train_1 = x[train_inds_1, :]
 
             # get a copy of ml_m as a preliminary learner
-            ml_m_prelim = clone(fitted_models['ml_m'][i_fold])
-            m_hat_prelim = _dml_cv_predict(ml_m_prelim, x_train_1, d_train_1,
-                                           method='predict_proba', smpls=smpls_prelim)['preds']
+            ml_m_prelim = clone(fitted_models["ml_m"][i_fold])
+            m_hat_prelim = _dml_cv_predict(ml_m_prelim, x_train_1, d_train_1, method="predict_proba", smpls=smpls_prelim)[
+                "preds"
+            ]
 
             m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold)
 
@@ -268,62 +283,57 @@ def ipw_score(theta):
             # only consider values with the right treatment status and fit the model
             dx_treat_train_2 = x_train_2[d_train_2 == self.treatment, :]
             g_target_train_2_d = g_target_train_2[d_train_2 == self.treatment]
-            fitted_models['ml_g'][i_fold].fit(dx_treat_train_2, g_target_train_2_d)
+            fitted_models["ml_g"][i_fold].fit(dx_treat_train_2, g_target_train_2_d)
 
             # predict nuisance values on the test data and the corresponding targets
-            g_hat['preds'][test_inds] = fitted_models['ml_g'][i_fold].predict(x_test)
-            g_hat['targets'][test_inds] = g_target[test_inds]
+            g_hat["preds"][test_inds] = fitted_models["ml_g"][i_fold].predict(x_test)
+            g_hat["targets"][test_inds] = g_target[test_inds]
 
             # refit the propensity score on the whole training set
-            fitted_models['ml_m'][i_fold].fit(x[train_inds, :], d[train_inds])
-            m_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_m'][i_fold], x_test)
+            fitted_models["ml_m"][i_fold].fit(x[train_inds, :], d[train_inds])
+            m_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_m"][i_fold], x_test)
 
         # set target for propensity score
-        m_hat['targets'] = d
+        m_hat["targets"] = d
 
         # set the target for g to be a float and only relevant values
-        g_hat['targets'] = _cond_targets(g_hat['targets'], cond_sample=(d == self.treatment))
+        g_hat["targets"] = _cond_targets(g_hat["targets"], cond_sample=(d == self.treatment))
 
         if return_models:
-            g_hat['models'] = fitted_models['ml_g']
-            m_hat['models'] = fitted_models['ml_m']
+            g_hat["models"] = fitted_models["ml_g"]
+            m_hat["models"] = fitted_models["ml_m"]
 
         # clip propensities and normalize ipw weights
-        m_hat['preds'] = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
+        m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
 
         # this is not done in the score to be equivalent to PQ models
         if self._normalize_ipw:
-            m_hat_adj = _normalize_ipw(m_hat['preds'], d)
+            m_hat_adj = _normalize_ipw(m_hat["preds"], d)
         else:
-            m_hat_adj = m_hat['preds']
+            m_hat_adj = m_hat["preds"]
 
         if self.treatment == 0:
             m_hat_adj = 1 - m_hat_adj
 
         # use the average of the ipw estimates to approximate the potential quantile for U (p.4 Kallus et. al)
         pq_est = np.mean(ipw_vec)
-        psi_a, psi_b = self._score_elements(y, d, g_hat['preds'], m_hat_adj, pq_est)
-        psi_elements = {'psi_a': psi_a,
-                        'psi_b': psi_b}
-        preds = {'predictions': {'ml_g': g_hat['preds'],
-                                 'ml_m': m_hat['preds']},
-                 'targets': {'ml_g': g_hat['targets'],
-                             'ml_m': m_hat['targets']},
-                 'models': {'ml_g': g_hat['models'],
-                            'ml_m': m_hat['models']}
-                 }
+        psi_a, psi_b = self._score_elements(y, d, g_hat["preds"], m_hat_adj, pq_est)
+        psi_elements = {"psi_a": psi_a, "psi_b": psi_b}
+        preds = {
+            "predictions": {"ml_g": g_hat["preds"], "ml_m": m_hat["preds"]},
+            "targets": {"ml_g": g_hat["targets"], "ml_m": m_hat["targets"]},
+            "models": {"ml_g": g_hat["models"], "ml_m": m_hat["models"]},
+        }
         return psi_elements, preds
 
-    def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                         search_mode, n_iter_randomized_search):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
+    def _nuisance_tuning(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
 
         if scoring_methods is None:
-            scoring_methods = {'ml_g': None,
-                               'ml_m': None}
+            scoring_methods = {"ml_g": None, "ml_m": None}
 
         train_inds = [train_index for (train_index, _) in smpls]
         train_inds_treat = [np.intersect1d(np.where(d == self.treatment)[0], train) for train, _ in smpls]
@@ -333,31 +343,47 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
         g_target_1 = np.ones_like(y) * quantile_approx
         g_target_2 = (y - self.quantile * quantile_approx) / (1 - self.quantile)
         g_target_approx = np.max(np.column_stack((g_target_1, g_target_2)), 1)
-        g_tune_res = _dml_tune(g_target_approx, x, train_inds_treat,
-                               self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-
-        m_tune_res = _dml_tune(d, x, train_inds,
-                               self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        g_tune_res = _dml_tune(
+            g_target_approx,
+            x,
+            train_inds_treat,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+
+        m_tune_res = _dml_tune(
+            d,
+            x,
+            train_inds,
+            self._learner["ml_m"],
+            param_grids["ml_m"],
+            scoring_methods["ml_m"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
 
         g_best_params = [xx.best_params_ for xx in g_tune_res]
         m_best_params = [xx.best_params_ for xx in m_tune_res]
 
-        params = {'ml_g': g_best_params,
-                  'ml_m': m_best_params}
-        tune_res = {'g_tune': g_tune_res,
-                    'm_tune': m_tune_res}
+        params = {"ml_g": g_best_params, "ml_m": m_best_params}
+        tune_res = {"g_tune": g_tune_res, "m_tune": m_tune_res}
 
-        res = {'params': params,
-               'tune_res': tune_res}
+        res = {"params": params, "tune_res": tune_res}
 
         return res
 
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
-            raise TypeError('The data must be of DoubleMLData type. '
-                            f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.')
+            raise TypeError(
+                f"The data must be of DoubleMLData type. {str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+            )
         _check_contains_iv(obj_dml_data)
         _check_zero_one_treatment(self)
         return
diff --git a/doubleml/irm/iivm.py b/doubleml/irm/iivm.py
index c2f85dd4d..e9dbf8ff1 100644
--- a/doubleml/irm/iivm.py
+++ b/doubleml/irm/iivm.py
@@ -5,10 +5,14 @@
 from ..double_ml import DoubleML
 from ..double_ml_data import DoubleMLData
 from ..double_ml_score_mixins import LinearScoreMixin
-
-from ..utils._estimation import _dml_cv_predict, _get_cond_smpls, _dml_tune, _trimm, _normalize_ipw
-from ..utils._checks import _check_score, _check_trimming, _check_finite_predictions, _check_is_propensity, \
-    _check_binary_predictions
+from ..utils._checks import (
+    _check_binary_predictions,
+    _check_finite_predictions,
+    _check_is_propensity,
+    _check_score,
+    _check_trimming,
+)
+from ..utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls, _normalize_ipw, _trimm
 
 
 class DoubleMLIIVM(LinearScoreMixin, DoubleML):
@@ -117,27 +121,26 @@ class DoubleMLIIVM(LinearScoreMixin, DoubleML):
 
         \\theta_0 = \\frac{\\mathbb{E}[g_0(1, X)] - \\mathbb{E}[g_0(0,X)]}{\\mathbb{E}[r_0(1, X)] - \\mathbb{E}[r_0(0,X)]}.
     """
-    def __init__(self,
-                 obj_dml_data,
-                 ml_g,
-                 ml_m,
-                 ml_r,
-                 n_folds=5,
-                 n_rep=1,
-                 score='LATE',
-                 subgroups=None,
-                 normalize_ipw=False,
-                 trimming_rule='truncate',
-                 trimming_threshold=1e-2,
-                 draw_sample_splitting=True):
-        super().__init__(obj_dml_data,
-                         n_folds,
-                         n_rep,
-                         score,
-                         draw_sample_splitting)
+
+    def __init__(
+        self,
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        ml_r,
+        n_folds=5,
+        n_rep=1,
+        score="LATE",
+        subgroups=None,
+        normalize_ipw=False,
+        trimming_rule="truncate",
+        trimming_threshold=1e-2,
+        draw_sample_splitting=True,
+    ):
+        super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
 
         self._check_data(self._dml_data)
-        valid_scores = ['LATE']
+        valid_scores = ["LATE"]
         _check_score(self.score, valid_scores, allow_callable=True)
 
         # set stratication for resampling
@@ -145,45 +148,50 @@ def __init__(self,
         if draw_sample_splitting:
             self.draw_sample_splitting()
 
-        ml_g_is_classifier = self._check_learner(ml_g, 'ml_g', regressor=True, classifier=True)
-        _ = self._check_learner(ml_m, 'ml_m', regressor=False, classifier=True)
-        _ = self._check_learner(ml_r, 'ml_r', regressor=False, classifier=True)
-        self._learner = {'ml_g': ml_g, 'ml_m': ml_m, 'ml_r': ml_r}
+        ml_g_is_classifier = self._check_learner(ml_g, "ml_g", regressor=True, classifier=True)
+        _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True)
+        _ = self._check_learner(ml_r, "ml_r", regressor=False, classifier=True)
+        self._learner = {"ml_g": ml_g, "ml_m": ml_m, "ml_r": ml_r}
         self._normalize_ipw = normalize_ipw
         if ml_g_is_classifier:
             if obj_dml_data.binary_outcome:
-                self._predict_method = {'ml_g': 'predict_proba', 'ml_m': 'predict_proba', 'ml_r': 'predict_proba'}
+                self._predict_method = {"ml_g": "predict_proba", "ml_m": "predict_proba", "ml_r": "predict_proba"}
             else:
-                raise ValueError(f'The ml_g learner {str(ml_g)} was identified as classifier '
-                                 'but the outcome variable is not binary with values 0 and 1.')
+                raise ValueError(
+                    f"The ml_g learner {str(ml_g)} was identified as classifier "
+                    "but the outcome variable is not binary with values 0 and 1."
+                )
         else:
-            self._predict_method = {'ml_g': 'predict', 'ml_m': 'predict_proba', 'ml_r': 'predict_proba'}
+            self._predict_method = {"ml_g": "predict", "ml_m": "predict_proba", "ml_r": "predict_proba"}
         self._initialize_ml_nuisance_params()
 
         if not isinstance(self.normalize_ipw, bool):
-            raise TypeError('Normalization indicator has to be boolean. ' +
-                            f'Object of type {str(type(self.normalize_ipw))} passed.')
+            raise TypeError(
+                "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed."
+            )
         self._trimming_rule = trimming_rule
         self._trimming_threshold = trimming_threshold
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
         if subgroups is None:
             # this is the default for subgroups; via None to prevent a mutable default argument
-            subgroups = {'always_takers': True, 'never_takers': True}
+            subgroups = {"always_takers": True, "never_takers": True}
         else:
             if not isinstance(subgroups, dict):
-                raise TypeError('Invalid subgroups ' + str(subgroups) + '. ' +
-                                'subgroups must be of type dictionary.')
-            if (not all(k in subgroups for k in ['always_takers', 'never_takers']))\
-                    | (not all(k in ['always_takers', 'never_takers'] for k in subgroups)):
-                raise ValueError('Invalid subgroups ' + str(subgroups) + '. ' +
-                                 'subgroups must be a dictionary with keys always_takers and never_takers.')
-            if not isinstance(subgroups['always_takers'], bool):
-                raise TypeError("subgroups['always_takers'] must be True or False. "
-                                f'Got {str(subgroups["always_takers"])}.')
-            if not isinstance(subgroups['never_takers'], bool):
-                raise TypeError("subgroups['never_takers'] must be True or False. "
-                                f'Got {str(subgroups["never_takers"])}.')
+                raise TypeError("Invalid subgroups " + str(subgroups) + ". " + "subgroups must be of type dictionary.")
+            if (not all(k in subgroups for k in ["always_takers", "never_takers"])) | (
+                not all(k in ["always_takers", "never_takers"] for k in subgroups)
+            ):
+                raise ValueError(
+                    "Invalid subgroups "
+                    + str(subgroups)
+                    + ". "
+                    + "subgroups must be a dictionary with keys always_takers and never_takers."
+                )
+            if not isinstance(subgroups["always_takers"], bool):
+                raise TypeError(f"subgroups['always_takers'] must be True or False. Got {str(subgroups['always_takers'])}.")
+            if not isinstance(subgroups["never_takers"], bool):
+                raise TypeError(f"subgroups['never_takers'] must be True or False. Got {str(subgroups['never_takers'])}.")
         self.subgroups = subgroups
         self._external_predictions_implemented = True
 
@@ -209,29 +217,33 @@ def trimming_threshold(self):
         return self._trimming_threshold
 
     def _initialize_ml_nuisance_params(self):
-        valid_learner = ['ml_g0', 'ml_g1', 'ml_m', 'ml_r0', 'ml_r1']
-        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
-                        for learner in valid_learner}
+        valid_learner = ["ml_g0", "ml_g1", "ml_m", "ml_r0", "ml_r1"]
+        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in valid_learner}
 
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
-            raise TypeError('The data must be of DoubleMLData type. '
-                            f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.')
-        one_treat = (obj_dml_data.n_treat == 1)
-        binary_treat = (type_of_target(obj_dml_data.d) == 'binary')
+            raise TypeError(
+                f"The data must be of DoubleMLData type. {str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+            )
+        one_treat = obj_dml_data.n_treat == 1
+        binary_treat = type_of_target(obj_dml_data.d) == "binary"
         zero_one_treat = np.all((np.power(obj_dml_data.d, 2) - obj_dml_data.d) == 0)
         if not (one_treat & binary_treat & zero_one_treat):
-            raise ValueError('Incompatible data. '
-                             'To fit an IIVM model with DML '
-                             'exactly one binary variable with values 0 and 1 '
-                             'needs to be specified as treatment variable.')
-        one_instr = (obj_dml_data.n_instr == 1)
-        err_msg = ('Incompatible data. '
-                   'To fit an IIVM model with DML '
-                   'exactly one binary variable with values 0 and 1 '
-                   'needs to be specified as instrumental variable.')
+            raise ValueError(
+                "Incompatible data. "
+                "To fit an IIVM model with DML "
+                "exactly one binary variable with values 0 and 1 "
+                "needs to be specified as treatment variable."
+            )
+        one_instr = obj_dml_data.n_instr == 1
+        err_msg = (
+            "Incompatible data. "
+            "To fit an IIVM model with DML "
+            "exactly one binary variable with values 0 and 1 "
+            "needs to be specified as instrumental variable."
+        )
         if one_instr:
-            binary_instr = (type_of_target(obj_dml_data.z) == 'binary')
+            binary_instr = type_of_target(obj_dml_data.z) == "binary"
             zero_one_instr = np.all((np.power(obj_dml_data.z, 2) - obj_dml_data.z) == 0)
             if not (one_instr & binary_instr & zero_one_instr):
                 raise ValueError(err_msg)
@@ -240,123 +252,151 @@ def _check_data(self, obj_dml_data):
         return
 
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, z = check_X_y(x, np.ravel(self._dml_data.z),
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, z = check_X_y(x, np.ravel(self._dml_data.z), force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
 
         # get train indices for z == 0 and z == 1
         smpls_z0, smpls_z1 = _get_cond_smpls(smpls, z)
 
         # nuisance g
-        if external_predictions['ml_g0'] is not None:
-            g_hat0 = {'preds': external_predictions['ml_g0'],
-                      'targets': None,
-                      'models': None}
+        if external_predictions["ml_g0"] is not None:
+            g_hat0 = {"preds": external_predictions["ml_g0"], "targets": None, "models": None}
         else:
-            g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_z0, n_jobs=n_jobs_cv,
-                                     est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
-                                     return_models=return_models)
-            _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
+            g_hat0 = _dml_cv_predict(
+                self._learner["ml_g"],
+                x,
+                y,
+                smpls=smpls_z0,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_g0"),
+                method=self._predict_method["ml_g"],
+                return_models=return_models,
+            )
+            _check_finite_predictions(g_hat0["preds"], self._learner["ml_g"], "ml_g", smpls)
             # adjust target values to consider only compatible subsamples
-            g_hat0['targets'] = g_hat0['targets'].astype(float)
-            g_hat0['targets'][z == 1] = np.nan
+            g_hat0["targets"] = g_hat0["targets"].astype(float)
+            g_hat0["targets"][z == 1] = np.nan
 
         if self._dml_data.binary_outcome:
-            _check_binary_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', self._dml_data.y_col)
-            _check_is_propensity(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls, eps=1e-12)
+            _check_binary_predictions(g_hat0["preds"], self._learner["ml_g"], "ml_g", self._dml_data.y_col)
+            _check_is_propensity(g_hat0["preds"], self._learner["ml_g"], "ml_g", smpls, eps=1e-12)
 
-        if external_predictions['ml_g1'] is not None:
-            g_hat1 = {'preds': external_predictions['ml_g1'],
-                      'targets': None,
-                      'models': None}
+        if external_predictions["ml_g1"] is not None:
+            g_hat1 = {"preds": external_predictions["ml_g1"], "targets": None, "models": None}
         else:
-            g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_z1, n_jobs=n_jobs_cv,
-                                     est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
-                                     return_models=return_models)
-            _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
+            g_hat1 = _dml_cv_predict(
+                self._learner["ml_g"],
+                x,
+                y,
+                smpls=smpls_z1,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_g1"),
+                method=self._predict_method["ml_g"],
+                return_models=return_models,
+            )
+            _check_finite_predictions(g_hat1["preds"], self._learner["ml_g"], "ml_g", smpls)
             # adjust target values to consider only compatible subsamples
-            g_hat1['targets'] = g_hat1['targets'].astype(float)
-            g_hat1['targets'][z == 0] = np.nan
+            g_hat1["targets"] = g_hat1["targets"].astype(float)
+            g_hat1["targets"][z == 0] = np.nan
 
         if self._dml_data.binary_outcome:
-            _check_binary_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', self._dml_data.y_col)
-            _check_is_propensity(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls, eps=1e-12)
+            _check_binary_predictions(g_hat1["preds"], self._learner["ml_g"], "ml_g", self._dml_data.y_col)
+            _check_is_propensity(g_hat1["preds"], self._learner["ml_g"], "ml_g", smpls, eps=1e-12)
 
         # nuisance m
-        if external_predictions['ml_m'] is not None:
-            m_hat = {'preds': external_predictions['ml_m'],
-                     'targets': None,
-                     'models': None}
+        if external_predictions["ml_m"] is not None:
+            m_hat = {"preds": external_predictions["ml_m"], "targets": None, "models": None}
         else:
-            m_hat = _dml_cv_predict(self._learner['ml_m'], x, z, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                    return_models=return_models)
-            _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
-            _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
+            m_hat = _dml_cv_predict(
+                self._learner["ml_m"],
+                x,
+                z,
+                smpls=smpls,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_m"),
+                method=self._predict_method["ml_m"],
+                return_models=return_models,
+            )
+            _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
+            _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
         # also trimm external predictions
-        m_hat['preds'] = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
+        m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
 
         # nuisance r
-        r0 = external_predictions['ml_r0'] is not None
-        if self.subgroups['always_takers']:
+        r0 = external_predictions["ml_r0"] is not None
+        if self.subgroups["always_takers"]:
             if r0:
-                r_hat0 = {'preds': external_predictions['ml_r0'],
-                          'targets': None,
-                          'models': None}
+                r_hat0 = {"preds": external_predictions["ml_r0"], "targets": None, "models": None}
             else:
-                r_hat0 = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls_z0, n_jobs=n_jobs_cv,
-                                         est_params=self._get_params('ml_r0'), method=self._predict_method['ml_r'],
-                                         return_models=return_models)
+                r_hat0 = _dml_cv_predict(
+                    self._learner["ml_r"],
+                    x,
+                    d,
+                    smpls=smpls_z0,
+                    n_jobs=n_jobs_cv,
+                    est_params=self._get_params("ml_r0"),
+                    method=self._predict_method["ml_r"],
+                    return_models=return_models,
+                )
         else:
-            r_hat0 = {'preds': np.zeros_like(d), 'targets': np.zeros_like(d), 'models': None}
+            r_hat0 = {"preds": np.zeros_like(d), "targets": np.zeros_like(d), "models": None}
         if not r0:
-            _check_finite_predictions(r_hat0['preds'], self._learner['ml_r'], 'ml_r', smpls)
+            _check_finite_predictions(r_hat0["preds"], self._learner["ml_r"], "ml_r", smpls)
             # adjust target values to consider only compatible subsamples
-            r_hat0['targets'] = r_hat0['targets'].astype(float)
-            r_hat0['targets'][z == 1] = np.nan
+            r_hat0["targets"] = r_hat0["targets"].astype(float)
+            r_hat0["targets"][z == 1] = np.nan
 
-        r1 = external_predictions['ml_r1'] is not None
-        if self.subgroups['never_takers']:
+        r1 = external_predictions["ml_r1"] is not None
+        if self.subgroups["never_takers"]:
             if r1:
-                r_hat1 = {'preds': external_predictions['ml_r1'],
-                          'targets': None,
-                          'models': None}
+                r_hat1 = {"preds": external_predictions["ml_r1"], "targets": None, "models": None}
             else:
-                r_hat1 = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls_z1, n_jobs=n_jobs_cv,
-                                         est_params=self._get_params('ml_r1'), method=self._predict_method['ml_r'],
-                                         return_models=return_models)
+                r_hat1 = _dml_cv_predict(
+                    self._learner["ml_r"],
+                    x,
+                    d,
+                    smpls=smpls_z1,
+                    n_jobs=n_jobs_cv,
+                    est_params=self._get_params("ml_r1"),
+                    method=self._predict_method["ml_r"],
+                    return_models=return_models,
+                )
         else:
-            r_hat1 = {'preds': np.ones_like(d), 'targets': np.ones_like(d), 'models': None}
+            r_hat1 = {"preds": np.ones_like(d), "targets": np.ones_like(d), "models": None}
         if not r1:
-            _check_finite_predictions(r_hat1['preds'], self._learner['ml_r'], 'ml_r', smpls)
+            _check_finite_predictions(r_hat1["preds"], self._learner["ml_r"], "ml_r", smpls)
             # adjust target values to consider only compatible subsamples
-            r_hat1['targets'] = r_hat1['targets'].astype(float)
-            r_hat1['targets'][z == 0] = np.nan
-
-        psi_a, psi_b = self._score_elements(y, z, d,
-                                            g_hat0['preds'], g_hat1['preds'], m_hat['preds'],
-                                            r_hat0['preds'], r_hat1['preds'], smpls)
-        psi_elements = {'psi_a': psi_a,
-                        'psi_b': psi_b}
-        preds = {'predictions': {'ml_g0': g_hat0['preds'],
-                                 'ml_g1': g_hat1['preds'],
-                                 'ml_m': m_hat['preds'],
-                                 'ml_r0': r_hat0['preds'],
-                                 'ml_r1': r_hat1['preds']},
-                 'targets': {'ml_g0': g_hat0['targets'],
-                             'ml_g1': g_hat1['targets'],
-                             'ml_m': m_hat['targets'],
-                             'ml_r0': r_hat0['targets'],
-                             'ml_r1': r_hat1['targets']},
-                 'models': {'ml_g0': g_hat0['models'],
-                            'ml_g1': g_hat1['models'],
-                            'ml_m': m_hat['models'],
-                            'ml_r0': r_hat0['models'],
-                            'ml_r1': r_hat1['models']}
-                 }
+            r_hat1["targets"] = r_hat1["targets"].astype(float)
+            r_hat1["targets"][z == 0] = np.nan
+
+        psi_a, psi_b = self._score_elements(
+            y, z, d, g_hat0["preds"], g_hat1["preds"], m_hat["preds"], r_hat0["preds"], r_hat1["preds"], smpls
+        )
+        psi_elements = {"psi_a": psi_a, "psi_b": psi_b}
+        preds = {
+            "predictions": {
+                "ml_g0": g_hat0["preds"],
+                "ml_g1": g_hat1["preds"],
+                "ml_m": m_hat["preds"],
+                "ml_r0": r_hat0["preds"],
+                "ml_r1": r_hat1["preds"],
+            },
+            "targets": {
+                "ml_g0": g_hat0["targets"],
+                "ml_g1": g_hat1["targets"],
+                "ml_m": m_hat["targets"],
+                "ml_r0": r_hat0["targets"],
+                "ml_r1": r_hat1["targets"],
+            },
+            "models": {
+                "ml_g0": g_hat0["models"],
+                "ml_g1": g_hat1["models"],
+                "ml_m": m_hat["models"],
+                "ml_r0": r_hat0["models"],
+                "ml_r1": r_hat1["models"],
+            },
+        }
 
         return psi_elements, preds
 
@@ -373,64 +413,111 @@ def _score_elements(self, y, z, d, g_hat0, g_hat1, m_hat, r_hat0, r_hat1, smpls)
             m_hat_adj = m_hat
 
         if isinstance(self.score, str):
-            assert self.score == 'LATE'
-            psi_b = g_hat1 - g_hat0 \
-                + np.divide(np.multiply(z, u_hat1), m_hat_adj) \
-                - np.divide(np.multiply(1.0-z, u_hat0), 1.0 - m_hat_adj)
-            psi_a = -1*(r_hat1 - r_hat0
-                        + np.divide(np.multiply(z, w_hat1), m_hat_adj)
-                        - np.divide(np.multiply(1.0-z, w_hat0), 1.0 - m_hat_adj))
+            assert self.score == "LATE"
+            psi_b = (
+                g_hat1
+                - g_hat0
+                + np.divide(np.multiply(z, u_hat1), m_hat_adj)
+                - np.divide(np.multiply(1.0 - z, u_hat0), 1.0 - m_hat_adj)
+            )
+            psi_a = -1 * (
+                r_hat1
+                - r_hat0
+                + np.divide(np.multiply(z, w_hat1), m_hat_adj)
+                - np.divide(np.multiply(1.0 - z, w_hat0), 1.0 - m_hat_adj)
+            )
         else:
             assert callable(self.score)
-            psi_a, psi_b = self.score(y=y, z=z, d=d,
-                                      g_hat0=g_hat0, g_hat1=g_hat1, m_hat=m_hat_adj, r_hat0=r_hat0, r_hat1=r_hat1,
-                                      smpls=smpls)
+            psi_a, psi_b = self.score(
+                y=y, z=z, d=d, g_hat0=g_hat0, g_hat1=g_hat1, m_hat=m_hat_adj, r_hat0=r_hat0, r_hat1=r_hat1, smpls=smpls
+            )
 
         return psi_a, psi_b
 
-    def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                         search_mode, n_iter_randomized_search):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, z = check_X_y(x, np.ravel(self._dml_data.z),
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
+    def _nuisance_tuning(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, z = check_X_y(x, np.ravel(self._dml_data.z), force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
 
         # get train indices for z == 0 and z == 1
         smpls_z0, smpls_z1 = _get_cond_smpls(smpls, z)
 
         if scoring_methods is None:
-            scoring_methods = {'ml_g': None,
-                               'ml_m': None,
-                               'ml_r': None}
+            scoring_methods = {"ml_g": None, "ml_m": None, "ml_r": None}
 
         train_inds = [train_index for (train_index, _) in smpls]
         train_inds_z0 = [train_index for (train_index, _) in smpls_z0]
         train_inds_z1 = [train_index for (train_index, _) in smpls_z1]
 
-        g0_tune_res = _dml_tune(y, x, train_inds_z0,
-                                self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'],
-                                n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        g1_tune_res = _dml_tune(y, x, train_inds_z1,
-                                self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'],
-                                n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        m_tune_res = _dml_tune(z, x, train_inds,
-                               self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-
-        if self.subgroups['always_takers']:
-            r0_tune_res = _dml_tune(d, x, train_inds_z0,
-                                    self._learner['ml_r'], param_grids['ml_r'], scoring_methods['ml_r'],
-                                    n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        g0_tune_res = _dml_tune(
+            y,
+            x,
+            train_inds_z0,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        g1_tune_res = _dml_tune(
+            y,
+            x,
+            train_inds_z1,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        m_tune_res = _dml_tune(
+            z,
+            x,
+            train_inds,
+            self._learner["ml_m"],
+            param_grids["ml_m"],
+            scoring_methods["ml_m"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+
+        if self.subgroups["always_takers"]:
+            r0_tune_res = _dml_tune(
+                d,
+                x,
+                train_inds_z0,
+                self._learner["ml_r"],
+                param_grids["ml_r"],
+                scoring_methods["ml_r"],
+                n_folds_tune,
+                n_jobs_cv,
+                search_mode,
+                n_iter_randomized_search,
+            )
             r0_best_params = [xx.best_params_ for xx in r0_tune_res]
         else:
             r0_tune_res = None
             r0_best_params = [None] * len(smpls)
-        if self.subgroups['never_takers']:
-            r1_tune_res = _dml_tune(d, x, train_inds_z1,
-                                    self._learner['ml_r'], param_grids['ml_r'], scoring_methods['ml_r'],
-                                    n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        if self.subgroups["never_takers"]:
+            r1_tune_res = _dml_tune(
+                d,
+                x,
+                train_inds_z1,
+                self._learner["ml_r"],
+                param_grids["ml_r"],
+                scoring_methods["ml_r"],
+                n_folds_tune,
+                n_jobs_cv,
+                search_mode,
+                n_iter_randomized_search,
+            )
             r1_best_params = [xx.best_params_ for xx in r1_tune_res]
         else:
             r1_tune_res = None
@@ -440,20 +527,23 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
         g1_best_params = [xx.best_params_ for xx in g1_tune_res]
         m_best_params = [xx.best_params_ for xx in m_tune_res]
 
-        params = {'ml_g0': g0_best_params,
-                  'ml_g1': g1_best_params,
-                  'ml_m': m_best_params,
-                  'ml_r0': r0_best_params,
-                  'ml_r1': r1_best_params}
-
-        tune_res = {'g0_tune': g0_tune_res,
-                    'g1_tune': g1_tune_res,
-                    'm_tune': m_tune_res,
-                    'r0_tune': r0_tune_res,
-                    'r1_tune': r1_tune_res}
-
-        res = {'params': params,
-               'tune_res': tune_res}
+        params = {
+            "ml_g0": g0_best_params,
+            "ml_g1": g1_best_params,
+            "ml_m": m_best_params,
+            "ml_r0": r0_best_params,
+            "ml_r1": r1_best_params,
+        }
+
+        tune_res = {
+            "g0_tune": g0_tune_res,
+            "g1_tune": g1_tune_res,
+            "m_tune": m_tune_res,
+            "r0_tune": r0_tune_res,
+            "r1_tune": r1_tune_res,
+        }
+
+        res = {"params": params, "tune_res": tune_res}
 
         return res
 
diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py
index e5acd45d0..539608726 100644
--- a/doubleml/irm/irm.py
+++ b/doubleml/irm/irm.py
@@ -1,19 +1,25 @@
+import warnings
+
 import numpy as np
 import pandas as pd
-import warnings
 from sklearn.utils import check_X_y
 from sklearn.utils.multiclass import type_of_target
 
 from ..double_ml import DoubleML
-
-from ..utils.blp import DoubleMLBLP
-from ..utils.policytree import DoubleMLPolicyTree
 from ..double_ml_data import DoubleMLData
 from ..double_ml_score_mixins import LinearScoreMixin
-
-from ..utils._estimation import _dml_cv_predict, _get_cond_smpls, _dml_tune, _trimm, _normalize_ipw, _cond_targets
-from ..utils._checks import _check_score, _check_trimming, _check_finite_predictions, _check_is_propensity, _check_integer, \
-    _check_weights, _check_binary_predictions
+from ..utils._checks import (
+    _check_binary_predictions,
+    _check_finite_predictions,
+    _check_integer,
+    _check_is_propensity,
+    _check_score,
+    _check_trimming,
+    _check_weights,
+)
+from ..utils._estimation import _cond_targets, _dml_cv_predict, _dml_tune, _get_cond_smpls, _normalize_ipw, _trimm
+from ..utils.blp import DoubleMLBLP
+from ..utils.policytree import DoubleMLPolicyTree
 
 
 class DoubleMLIRM(LinearScoreMixin, DoubleML):
@@ -113,26 +119,25 @@ class DoubleMLIRM(LinearScoreMixin, DoubleML):
 
         \\theta_0 = \\mathbb{E}[g_0(1, X) - g_0(0,X) | D=1].
     """
-    def __init__(self,
-                 obj_dml_data,
-                 ml_g,
-                 ml_m,
-                 n_folds=5,
-                 n_rep=1,
-                 score='ATE',
-                 weights=None,
-                 normalize_ipw=False,
-                 trimming_rule='truncate',
-                 trimming_threshold=1e-2,
-                 draw_sample_splitting=True):
-        super().__init__(obj_dml_data,
-                         n_folds,
-                         n_rep,
-                         score,
-                         draw_sample_splitting)
+
+    def __init__(
+        self,
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        n_folds=5,
+        n_rep=1,
+        score="ATE",
+        weights=None,
+        normalize_ipw=False,
+        trimming_rule="truncate",
+        trimming_threshold=1e-2,
+        draw_sample_splitting=True,
+    ):
+        super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
 
         self._check_data(self._dml_data)
-        valid_scores = ['ATE', 'ATTE']
+        valid_scores = ["ATE", "ATTE"]
         _check_score(self.score, valid_scores, allow_callable=True)
 
         # set stratication for resampling
@@ -140,23 +145,26 @@ def __init__(self,
         if draw_sample_splitting:
             self.draw_sample_splitting()
 
-        ml_g_is_classifier = self._check_learner(ml_g, 'ml_g', regressor=True, classifier=True)
-        _ = self._check_learner(ml_m, 'ml_m', regressor=False, classifier=True)
-        self._learner = {'ml_g': ml_g, 'ml_m': ml_m}
+        ml_g_is_classifier = self._check_learner(ml_g, "ml_g", regressor=True, classifier=True)
+        _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True)
+        self._learner = {"ml_g": ml_g, "ml_m": ml_m}
         if ml_g_is_classifier:
             if obj_dml_data.binary_outcome:
-                self._predict_method = {'ml_g': 'predict_proba', 'ml_m': 'predict_proba'}
+                self._predict_method = {"ml_g": "predict_proba", "ml_m": "predict_proba"}
             else:
-                raise ValueError(f'The ml_g learner {str(ml_g)} was identified as classifier '
-                                 'but the outcome variable is not binary with values 0 and 1.')
+                raise ValueError(
+                    f"The ml_g learner {str(ml_g)} was identified as classifier "
+                    "but the outcome variable is not binary with values 0 and 1."
+                )
         else:
-            self._predict_method = {'ml_g': 'predict', 'ml_m': 'predict_proba'}
+            self._predict_method = {"ml_g": "predict", "ml_m": "predict_proba"}
         self._initialize_ml_nuisance_params()
 
         self._normalize_ipw = normalize_ipw
         if not isinstance(self.normalize_ipw, bool):
-            raise TypeError('Normalization indicator has to be boolean. ' +
-                            f'Object of type {str(type(self.normalize_ipw))} passed.')
+            raise TypeError(
+                "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed."
+            )
         self._trimming_rule = trimming_rule
         self._trimming_threshold = trimming_threshold
         _check_trimming(self._trimming_rule, self._trimming_threshold)
@@ -196,138 +204,142 @@ def weights(self):
         return self._weights
 
     def _initialize_ml_nuisance_params(self):
-        valid_learner = ['ml_g0', 'ml_g1', 'ml_m']
-        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
-                        for learner in valid_learner}
+        valid_learner = ["ml_g0", "ml_g1", "ml_m"]
+        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in valid_learner}
 
     def _initialize_weights(self, weights):
         if weights is None:
             weights = np.ones(self._dml_data.n_obs)
         if isinstance(weights, np.ndarray):
-            self._weights = {'weights': weights}
+            self._weights = {"weights": weights}
         else:
             assert isinstance(weights, dict)
             self._weights = weights
 
     def _get_weights(self, m_hat=None):
         # standard case for ATE
-        if self.score == 'ATE':
-            weights = self._weights['weights']
-            if 'weights_bar' not in self._weights.keys():
-                weights_bar = self._weights['weights']
+        if self.score == "ATE":
+            weights = self._weights["weights"]
+            if "weights_bar" not in self._weights.keys():
+                weights_bar = self._weights["weights"]
             else:
-                weights_bar = self._weights['weights_bar'][:, self._i_rep]
+                weights_bar = self._weights["weights_bar"][:, self._i_rep]
         else:
             # special case for ATTE
-            assert self.score == 'ATTE'
+            assert self.score == "ATTE"
             assert m_hat is not None
-            subgroup = self._weights['weights'] * self._dml_data.d
+            subgroup = self._weights["weights"] * self._dml_data.d
             subgroup_probability = np.mean(subgroup)
             weights = np.divide(subgroup, subgroup_probability)
 
-            weights_bar = np.divide(
-                np.multiply(m_hat, self._weights['weights']),
-                subgroup_probability)
+            weights_bar = np.divide(np.multiply(m_hat, self._weights["weights"]), subgroup_probability)
 
         return weights, weights_bar
 
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
-            raise TypeError('The data must be of DoubleMLData type. '
-                            f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.')
+            raise TypeError(
+                f"The data must be of DoubleMLData type. {str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+            )
         if obj_dml_data.z_cols is not None:
-            raise ValueError('Incompatible data. ' +
-                             ' and '.join(obj_dml_data.z_cols) +
-                             ' have been set as instrumental variable(s). '
-                             'To fit an interactive IV regression model use DoubleMLIIVM instead of DoubleMLIRM.')
-        one_treat = (obj_dml_data.n_treat == 1)
-        binary_treat = (type_of_target(obj_dml_data.d) == 'binary')
+            raise ValueError(
+                "Incompatible data. " + " and ".join(obj_dml_data.z_cols) + " have been set as instrumental variable(s). "
+                "To fit an interactive IV regression model use DoubleMLIIVM instead of DoubleMLIRM."
+            )
+        one_treat = obj_dml_data.n_treat == 1
+        binary_treat = type_of_target(obj_dml_data.d) == "binary"
         zero_one_treat = np.all((np.power(obj_dml_data.d, 2) - obj_dml_data.d) == 0)
         if not (one_treat & binary_treat & zero_one_treat):
-            raise ValueError('Incompatible data. '
-                             'To fit an IRM model with DML '
-                             'exactly one binary variable with values 0 and 1 '
-                             'needs to be specified as treatment variable.')
+            raise ValueError(
+                "Incompatible data. "
+                "To fit an IRM model with DML "
+                "exactly one binary variable with values 0 and 1 "
+                "needs to be specified as treatment variable."
+            )
         return
 
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
         # get train indices for d == 0 and d == 1
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)
-        g0_external = external_predictions['ml_g0'] is not None
-        g1_external = external_predictions['ml_g1'] is not None
-        m_external = external_predictions['ml_m'] is not None
+        g0_external = external_predictions["ml_g0"] is not None
+        g1_external = external_predictions["ml_g1"] is not None
+        m_external = external_predictions["ml_m"] is not None
 
         # nuisance g
         if g0_external:
             # use external predictions
-            g_hat0 = {'preds': external_predictions['ml_g0'],
-                      'targets': None,
-                      'models': None}
+            g_hat0 = {"preds": external_predictions["ml_g0"], "targets": None, "models": None}
         else:
-            g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv,
-                                     est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
-                                     return_models=return_models)
-            _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
-            g_hat0['targets'] = _cond_targets(g_hat0['targets'], cond_sample=(d == 0))
+            g_hat0 = _dml_cv_predict(
+                self._learner["ml_g"],
+                x,
+                y,
+                smpls=smpls_d0,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_g0"),
+                method=self._predict_method["ml_g"],
+                return_models=return_models,
+            )
+            _check_finite_predictions(g_hat0["preds"], self._learner["ml_g"], "ml_g", smpls)
+            g_hat0["targets"] = _cond_targets(g_hat0["targets"], cond_sample=(d == 0))
 
             if self._dml_data.binary_outcome:
-                _check_binary_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', self._dml_data.y_col)
+                _check_binary_predictions(g_hat0["preds"], self._learner["ml_g"], "ml_g", self._dml_data.y_col)
 
         if g1_external:
             # use external predictions
-            g_hat1 = {'preds': external_predictions['ml_g1'],
-                      'targets': None,
-                      'models': None}
+            g_hat1 = {"preds": external_predictions["ml_g1"], "targets": None, "models": None}
         else:
-            g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
-                                     est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
-                                     return_models=return_models)
-            _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
+            g_hat1 = _dml_cv_predict(
+                self._learner["ml_g"],
+                x,
+                y,
+                smpls=smpls_d1,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_g1"),
+                method=self._predict_method["ml_g"],
+                return_models=return_models,
+            )
+            _check_finite_predictions(g_hat1["preds"], self._learner["ml_g"], "ml_g", smpls)
             # adjust target values to consider only compatible subsamples
-            g_hat1['targets'] = _cond_targets(g_hat1['targets'], cond_sample=(d == 1))
+            g_hat1["targets"] = _cond_targets(g_hat1["targets"], cond_sample=(d == 1))
 
-        if self._dml_data.binary_outcome & (self.score != 'ATTE'):
-            _check_binary_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', self._dml_data.y_col)
+        if self._dml_data.binary_outcome & (self.score != "ATTE"):
+            _check_binary_predictions(g_hat1["preds"], self._learner["ml_g"], "ml_g", self._dml_data.y_col)
 
         # nuisance m
         if m_external:
             # use external predictions
-            m_hat = {'preds': external_predictions['ml_m'],
-                     'targets': None,
-                     'models': None}
+            m_hat = {"preds": external_predictions["ml_m"], "targets": None, "models": None}
         else:
-            m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                    return_models=return_models)
-            _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
-            _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
+            m_hat = _dml_cv_predict(
+                self._learner["ml_m"],
+                x,
+                d,
+                smpls=smpls,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_m"),
+                method=self._predict_method["ml_m"],
+                return_models=return_models,
+            )
+            _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
+            _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
         # also trimm external predictions
-        m_hat['preds'] = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
-
-        psi_a, psi_b = self._score_elements(y, d,
-                                            g_hat0['preds'], g_hat1['preds'], m_hat['preds'],
-                                            smpls)
-        psi_elements = {'psi_a': psi_a,
-                        'psi_b': psi_b}
-        preds = {'predictions': {'ml_g0': g_hat0['preds'],
-                                 'ml_g1': g_hat1['preds'],
-                                 'ml_m': m_hat['preds']},
-                 'targets': {'ml_g0': g_hat0['targets'],
-                             'ml_g1': g_hat1['targets'],
-                             'ml_m': m_hat['targets']},
-                 'models': {'ml_g0': g_hat0['models'],
-                            'ml_g1': g_hat1['models'],
-                            'ml_m': m_hat['models']}
-                 }
+        m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
+
+        psi_a, psi_b = self._score_elements(y, d, g_hat0["preds"], g_hat1["preds"], m_hat["preds"], smpls)
+        psi_elements = {"psi_a": psi_a, "psi_b": psi_b}
+        preds = {
+            "predictions": {"ml_g0": g_hat0["preds"], "ml_g1": g_hat1["preds"], "ml_m": m_hat["preds"]},
+            "targets": {"ml_g0": g_hat0["targets"], "ml_g1": g_hat1["targets"], "ml_m": m_hat["targets"]},
+            "models": {"ml_g0": g_hat0["models"], "ml_g1": g_hat1["models"], "ml_m": m_hat["models"]},
+        }
 
         return psi_elements, preds
 
     def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls):
-
         if self.normalize_ipw:
             m_hat_adj = _normalize_ipw(m_hat, d)
         else:
@@ -337,22 +349,19 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls):
         u_hat0 = y - g_hat0
         u_hat1 = y - g_hat1
 
-        if (self.score == 'ATE') or (self.score == 'ATTE'):
+        if (self.score == "ATE") or (self.score == "ATTE"):
             weights, weights_bar = self._get_weights(m_hat=m_hat_adj)
-            psi_b = weights * (g_hat1 - g_hat0) \
-                + weights_bar * (
-                    np.divide(np.multiply(d, u_hat1), m_hat_adj)
-                    - np.divide(np.multiply(1.0-d, u_hat0), 1.0 - m_hat_adj))
-            if self.score == 'ATE':
+            psi_b = weights * (g_hat1 - g_hat0) + weights_bar * (
+                np.divide(np.multiply(d, u_hat1), m_hat_adj) - np.divide(np.multiply(1.0 - d, u_hat0), 1.0 - m_hat_adj)
+            )
+            if self.score == "ATE":
                 psi_a = np.full_like(m_hat_adj, -1.0)
             else:
-                assert self.score == 'ATTE'
+                assert self.score == "ATTE"
                 psi_a = -1.0 * weights
         else:
             assert callable(self.score)
-            psi_a, psi_b = self.score(y=y, d=d,
-                                      g_hat0=g_hat0, g_hat1=g_hat1, m_hat=m_hat_adj,
-                                      smpls=smpls)
+            psi_a, psi_b = self.score(y=y, d=d, g_hat0=g_hat0, g_hat1=g_hat1, m_hat=m_hat_adj, smpls=smpls)
 
         return psi_a, psi_b
 
@@ -361,73 +370,94 @@ def _sensitivity_element_est(self, preds):
         y = self._dml_data.y
         d = self._dml_data.d
 
-        m_hat = preds['predictions']['ml_m']
-        g_hat0 = preds['predictions']['ml_g0']
-        g_hat1 = preds['predictions']['ml_g1']
+        m_hat = preds["predictions"]["ml_m"]
+        g_hat0 = preds["predictions"]["ml_g0"]
+        g_hat1 = preds["predictions"]["ml_g1"]
 
         # use weights make this extendable
         weights, weights_bar = self._get_weights(m_hat=m_hat)
 
-        sigma2_score_element = np.square(y - np.multiply(d, g_hat1) - np.multiply(1.0-d, g_hat0))
+        sigma2_score_element = np.square(y - np.multiply(d, g_hat1) - np.multiply(1.0 - d, g_hat0))
         sigma2 = np.mean(sigma2_score_element)
         psi_sigma2 = sigma2_score_element - sigma2
 
         # calc m(W,alpha) and Riesz representer
-        m_alpha = np.multiply(weights, np.multiply(weights_bar, (np.divide(1.0, m_hat) + np.divide(1.0, 1.0-m_hat))))
-        rr = np.multiply(weights_bar, (np.divide(d, m_hat) - np.divide(1.0-d, 1.0-m_hat)))
+        m_alpha = np.multiply(weights, np.multiply(weights_bar, (np.divide(1.0, m_hat) + np.divide(1.0, 1.0 - m_hat))))
+        rr = np.multiply(weights_bar, (np.divide(d, m_hat) - np.divide(1.0 - d, 1.0 - m_hat)))
 
         nu2_score_element = np.multiply(2.0, m_alpha) - np.square(rr)
         nu2 = np.mean(nu2_score_element)
         psi_nu2 = nu2_score_element - nu2
 
-        element_dict = {'sigma2': sigma2,
-                        'nu2': nu2,
-                        'psi_sigma2': psi_sigma2,
-                        'psi_nu2': psi_nu2,
-                        'riesz_rep': rr,
-                        }
+        element_dict = {
+            "sigma2": sigma2,
+            "nu2": nu2,
+            "psi_sigma2": psi_sigma2,
+            "psi_nu2": psi_nu2,
+            "riesz_rep": rr,
+        }
         return element_dict
 
-    def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                         search_mode, n_iter_randomized_search):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
+    def _nuisance_tuning(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
         # get train indices for d == 0 and d == 1
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)
 
         if scoring_methods is None:
-            scoring_methods = {'ml_g': None,
-                               'ml_m': None}
+            scoring_methods = {"ml_g": None, "ml_m": None}
 
         train_inds = [train_index for (train_index, _) in smpls]
         train_inds_d0 = [train_index for (train_index, _) in smpls_d0]
         train_inds_d1 = [train_index for (train_index, _) in smpls_d1]
-        g0_tune_res = _dml_tune(y, x, train_inds_d0,
-                                self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'],
-                                n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        g1_tune_res = _dml_tune(y, x, train_inds_d1,
-                                self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'],
-                                n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-
-        m_tune_res = _dml_tune(d, x, train_inds,
-                               self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        g0_tune_res = _dml_tune(
+            y,
+            x,
+            train_inds_d0,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        g1_tune_res = _dml_tune(
+            y,
+            x,
+            train_inds_d1,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+
+        m_tune_res = _dml_tune(
+            d,
+            x,
+            train_inds,
+            self._learner["ml_m"],
+            param_grids["ml_m"],
+            scoring_methods["ml_m"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
 
         g0_best_params = [xx.best_params_ for xx in g0_tune_res]
         g1_best_params = [xx.best_params_ for xx in g1_tune_res]
         m_best_params = [xx.best_params_ for xx in m_tune_res]
 
-        params = {'ml_g0': g0_best_params,
-                  'ml_g1': g1_best_params,
-                  'ml_m': m_best_params}
-        tune_res = {'g0_tune': g0_tune_res,
-                    'g1_tune': g1_tune_res,
-                    'm_tune': m_tune_res}
+        params = {"ml_g0": g0_best_params, "ml_g1": g1_best_params, "ml_m": m_best_params}
+        tune_res = {"g0_tune": g0_tune_res, "g1_tune": g1_tune_res, "m_tune": m_tune_res}
 
-        res = {'params': params,
-               'tune_res': tune_res}
+        res = {"params": params, "tune_res": tune_res}
 
         return res
 
@@ -453,17 +483,15 @@ def cate(self, basis, is_gate=False, **kwargs):
         model : :class:`doubleML.DoubleMLBLP`
             Best linear Predictor model.
         """
-        valid_score = ['ATE']
+        valid_score = ["ATE"]
         if self.score not in valid_score:
-            raise ValueError('Invalid score ' + self.score + '. ' +
-                             'Valid score ' + ' or '.join(valid_score) + '.')
+            raise ValueError("Invalid score " + self.score + ". " + "Valid score " + " or ".join(valid_score) + ".")
 
         if self.n_rep != 1:
-            raise NotImplementedError('Only implemented for one repetition. ' +
-                                      f'Number of repetitions is {str(self.n_rep)}.')
+            raise NotImplementedError("Only implemented for one repetition. " + f"Number of repetitions is {str(self.n_rep)}.")
 
         # define the orthogonal signal
-        orth_signal = self.psi_elements['psi_b'].reshape(-1)
+        orth_signal = self.psi_elements["psi_b"].reshape(-1)
         # fit the best linear predictor
         model = DoubleMLBLP(orth_signal, basis=basis, is_gate=is_gate)
         model.fit(**kwargs)
@@ -489,18 +517,19 @@ def gate(self, groups, **kwargs):
             Best linear Predictor model for Group Effects.
         """
         if not isinstance(groups, pd.DataFrame):
-            raise TypeError('Groups must be of DataFrame type. '
-                            f'Groups of type {str(type(groups))} was passed.')
+            raise TypeError(f"Groups must be of DataFrame type. Groups of type {str(type(groups))} was passed.")
 
         if not all(groups.dtypes == bool) or all(groups.dtypes == int):
             if groups.shape[1] == 1:
-                groups = pd.get_dummies(groups, prefix='Group', prefix_sep='_')
+                groups = pd.get_dummies(groups, prefix="Group", prefix_sep="_")
             else:
-                raise TypeError('Columns of groups must be of bool type or int type (dummy coded). '
-                                'Alternatively, groups should only contain one column.')
+                raise TypeError(
+                    "Columns of groups must be of bool type or int type (dummy coded). "
+                    "Alternatively, groups should only contain one column."
+                )
 
         if any(groups.sum(0) <= 5):
-            warnings.warn('At least one group effect is estimated with less than 6 observations.')
+            warnings.warn("At least one group effect is estimated with less than 6 observations.")
 
         model = self.cate(groups, is_gate=True, **kwargs)
         return model
@@ -530,22 +559,19 @@ def policy_tree(self, features, depth=2, **tree_params):
         model : :class:`doubleML.DoubleMLPolicyTree`
             Policy tree model.
         """
-        valid_score = ['ATE']
+        valid_score = ["ATE"]
         if self.score not in valid_score:
-            raise ValueError('Invalid score ' + self.score + '. ' +
-                             'Valid score ' + ' or '.join(valid_score) + '.')
+            raise ValueError("Invalid score " + self.score + ". " + "Valid score " + " or ".join(valid_score) + ".")
 
         if self.n_rep != 1:
-            raise NotImplementedError('Only implemented for one repetition. ' +
-                                      f'Number of repetitions is {str(self.n_rep)}.')
+            raise NotImplementedError("Only implemented for one repetition. " + f"Number of repetitions is {str(self.n_rep)}.")
 
         _check_integer(depth, "Depth", 0)
 
         if not isinstance(features, pd.DataFrame):
-            raise TypeError('Covariates must be of DataFrame type. '
-                            f'Covariates of type {str(type(features))} was passed.')
+            raise TypeError(f"Covariates must be of DataFrame type. Covariates of type {str(type(features))} was passed.")
 
-        orth_signal = self.psi_elements['psi_b'].reshape(-1)
+        orth_signal = self.psi_elements["psi_b"].reshape(-1)
 
         model = DoubleMLPolicyTree(orth_signal, depth=depth, features=features, **tree_params).fit()
 
diff --git a/doubleml/irm/lpq.py b/doubleml/irm/lpq.py
index 31a522f38..dd912aec2 100644
--- a/doubleml/irm/lpq.py
+++ b/doubleml/irm/lpq.py
@@ -1,25 +1,24 @@
 import numpy as np
-from sklearn.utils.multiclass import type_of_target
 from sklearn.base import clone
-from sklearn.utils import check_X_y
 from sklearn.model_selection import StratifiedKFold, train_test_split
+from sklearn.utils import check_X_y
+from sklearn.utils.multiclass import type_of_target
 
 from ..double_ml import DoubleML
-from ..double_ml_score_mixins import NonLinearScoreMixin
 from ..double_ml_data import DoubleMLData
-
+from ..double_ml_score_mixins import NonLinearScoreMixin
+from ..utils._checks import _check_quantile, _check_score, _check_treatment, _check_trimming, _check_zero_one_treatment
 from ..utils._estimation import (
-    _dml_cv_predict,
-    _trimm,
-    _predict_zero_one_propensity,
     _cond_targets,
-    _get_bracket_guess,
     _default_kde,
-    _normalize_ipw,
+    _dml_cv_predict,
     _dml_tune,
+    _get_bracket_guess,
+    _normalize_ipw,
+    _predict_zero_one_propensity,
     _solve_ipw_score,
+    _trimm,
 )
-from ..utils._checks import _check_score, _check_trimming, _check_zero_one_treatment, _check_treatment, _check_quantile
 
 
 class DoubleMLLPQ(NonLinearScoreMixin, DoubleML):
@@ -114,10 +113,7 @@ def __init__(
         trimming_threshold=1e-2,
         draw_sample_splitting=True,
     ):
-        super().__init__(obj_dml_data,
-                         n_folds,
-                         n_rep, score,
-                         draw_sample_splitting)
+        super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
 
         self._quantile = quantile
         self._treatment = treatment
@@ -125,7 +121,7 @@ def __init__(
             self._kde = _default_kde
         else:
             if not callable(kde):
-                raise TypeError("kde should be either a callable or None. " "%r was passed." % kde)
+                raise TypeError("kde should be either a callable or None. %r was passed." % kde)
             self._kde = kde
         self._normalize_ipw = normalize_ipw
 
@@ -386,10 +382,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
 
                 # preliminary propensity for z
                 ml_m_z_prelim = clone(fitted_models["ml_m_z"][i_fold])
-                m_z_hat_prelim = _dml_cv_predict(ml_m_z_prelim, x_train_1, z_train_1,
-                                                 method="predict_proba", smpls=smpls_prelim)[
-                    "preds"
-                ]
+                m_z_hat_prelim = _dml_cv_predict(
+                    ml_m_z_prelim, x_train_1, z_train_1, method="predict_proba", smpls=smpls_prelim
+                )["preds"]
 
                 m_z_hat_prelim = _trimm(m_z_hat_prelim, self.trimming_rule, self.trimming_threshold)
                 if self._normalize_ipw:
@@ -515,7 +510,8 @@ def ipw_score(theta):
         # this could be adjusted to be compatible with dml1
         # estimate final nuisance parameter
         comp_prob_hat = np.mean(
-            m_d_z1_hat["preds"] - m_d_z0_hat["preds"]
+            m_d_z1_hat["preds"]
+            - m_d_z0_hat["preds"]
             + z / m_z_hat_adj * (d - m_d_z1_hat["preds"])
             - (1 - z) / (1 - m_z_hat_adj) * (d - m_d_z0_hat["preds"])
         )
@@ -664,7 +660,7 @@ def _nuisance_tuning(
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
             raise TypeError(
-                "The data must be of DoubleMLData type. " f"{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+                f"The data must be of DoubleMLData type. {str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
             )
         _check_zero_one_treatment(self)
         one_instr = obj_dml_data.n_instr == 1
diff --git a/doubleml/irm/pq.py b/doubleml/irm/pq.py
index 4f83f748a..cef152e44 100644
--- a/doubleml/irm/pq.py
+++ b/doubleml/irm/pq.py
@@ -1,30 +1,29 @@
 import numpy as np
 from sklearn.base import clone
-from sklearn.utils import check_X_y
 from sklearn.model_selection import StratifiedKFold, train_test_split
+from sklearn.utils import check_X_y
 
 from ..double_ml import DoubleML
-from ..double_ml_score_mixins import NonLinearScoreMixin
 from ..double_ml_data import DoubleMLData
-
+from ..double_ml_score_mixins import NonLinearScoreMixin
+from ..utils._checks import (
+    _check_contains_iv,
+    _check_quantile,
+    _check_score,
+    _check_treatment,
+    _check_trimming,
+    _check_zero_one_treatment,
+)
 from ..utils._estimation import (
+    _cond_targets,
+    _default_kde,
     _dml_cv_predict,
-    _trimm,
-    _predict_zero_one_propensity,
+    _dml_tune,
     _get_bracket_guess,
-    _default_kde,
     _normalize_ipw,
-    _dml_tune,
+    _predict_zero_one_propensity,
     _solve_ipw_score,
-    _cond_targets,
-)
-from ..utils._checks import (
-    _check_score,
-    _check_trimming,
-    _check_zero_one_treatment,
-    _check_treatment,
-    _check_contains_iv,
-    _check_quantile,
+    _trimm,
 )
 
 
@@ -105,25 +104,23 @@ class DoubleMLPQ(NonLinearScoreMixin, DoubleML):
     d  0.553878  0.149858  3.696011  0.000219  0.260161  0.847595
     """
 
-    def __init__(self,
-                 obj_dml_data,
-                 ml_g,
-                 ml_m,
-                 treatment=1,
-                 quantile=0.5,
-                 n_folds=5,
-                 n_rep=1,
-                 score='PQ',
-                 normalize_ipw=True,
-                 kde=None,
-                 trimming_rule='truncate',
-                 trimming_threshold=1e-2,
-                 draw_sample_splitting=True):
-        super().__init__(obj_dml_data,
-                         n_folds,
-                         n_rep,
-                         score,
-                         draw_sample_splitting)
+    def __init__(
+        self,
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        treatment=1,
+        quantile=0.5,
+        n_folds=5,
+        n_rep=1,
+        score="PQ",
+        normalize_ipw=True,
+        kde=None,
+        trimming_rule="truncate",
+        trimming_threshold=1e-2,
+        draw_sample_splitting=True,
+    ):
+        super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
 
         self._quantile = quantile
         self._treatment = treatment
@@ -131,8 +128,7 @@ def __init__(self,
             self._kde = _default_kde
         else:
             if not callable(kde):
-                raise TypeError("kde should be either a callable or None. "
-                                "%r was passed." % kde)
+                raise TypeError("kde should be either a callable or None. %r was passed." % kde)
             self._kde = kde
 
         self._normalize_ipw = normalize_ipw
@@ -375,14 +371,14 @@ def ipw_score(theta):
             m_hat["models"] = fitted_models["ml_m"]
 
         # clip propensities and normalize ipw weights
-        m_hat['preds'] = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
+        m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
 
         # this is not done in the score to save computation due to multiple score evaluations
         # to be able to evaluate the raw models the m_hat['preds'] are not changed
         if self._normalize_ipw:
-            m_hat_adj = _normalize_ipw(m_hat['preds'], d)
+            m_hat_adj = _normalize_ipw(m_hat["preds"], d)
         else:
-            m_hat_adj = m_hat['preds']
+            m_hat_adj = m_hat["preds"]
 
         if self.treatment == 0:
             m_hat_adj = 1 - m_hat_adj
@@ -452,7 +448,7 @@ def _nuisance_tuning(
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
             raise TypeError(
-                "The data must be of DoubleMLData type. " f"{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+                f"The data must be of DoubleMLData type. {str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
             )
         _check_contains_iv(obj_dml_data)
         _check_zero_one_treatment(self)
diff --git a/doubleml/irm/qte.py b/doubleml/irm/qte.py
index 2a212d77d..786635ba7 100644
--- a/doubleml/irm/qte.py
+++ b/doubleml/irm/qte.py
@@ -1,21 +1,17 @@
 import numpy as np
 import pandas as pd
-
-from sklearn.base import clone
-
 from joblib import Parallel, delayed
+from sklearn.base import clone
 
-from ..double_ml_data import DoubleMLData, DoubleMLClusterData
-from .pq import DoubleMLPQ
-from .lpq import DoubleMLLPQ
-from .cvar import DoubleMLCVAR
+from ..double_ml_data import DoubleMLClusterData, DoubleMLData
 from ..double_ml_framework import concat
-
+from ..utils._checks import _check_sample_splitting, _check_score, _check_trimming, _check_zero_one_treatment
+from ..utils._descriptive import generate_summary
 from ..utils._estimation import _default_kde
 from ..utils.resampling import DoubleMLResampling
-from ..utils._checks import _check_score, _check_trimming, _check_zero_one_treatment, _check_sample_splitting
-
-from ..utils._descriptive import generate_summary
+from .cvar import DoubleMLCVAR
+from .lpq import DoubleMLLPQ
+from .pq import DoubleMLPQ
 
 
 class DoubleMLQTE:
@@ -90,22 +86,24 @@ class DoubleMLQTE:
     0.50  0.449150  0.192539  2.332782  0.019660  0.071782  0.826519
     0.75  0.709606  0.193308  3.670867  0.000242  0.330731  1.088482
     """
-    def __init__(self,
-                 obj_dml_data,
-                 ml_g,
-                 ml_m=None,
-                 quantiles=0.5,
-                 n_folds=5,
-                 n_rep=1,
-                 score='PQ',
-                 normalize_ipw=True,
-                 kde=None,
-                 trimming_rule='truncate',
-                 trimming_threshold=1e-2,
-                 draw_sample_splitting=True):
 
+    def __init__(
+        self,
+        obj_dml_data,
+        ml_g,
+        ml_m=None,
+        quantiles=0.5,
+        n_folds=5,
+        n_rep=1,
+        score="PQ",
+        normalize_ipw=True,
+        kde=None,
+        trimming_rule="truncate",
+        trimming_threshold=1e-2,
+        draw_sample_splitting=True,
+    ):
         self._dml_data = obj_dml_data
-        self._quantiles = np.asarray(quantiles).reshape((-1, ))
+        self._quantiles = np.asarray(quantiles).reshape((-1,))
         self._check_quantile()
         self._n_quantiles = len(self._quantiles)
 
@@ -113,8 +111,7 @@ def __init__(self,
             self._kde = _default_kde
         else:
             if not callable(kde):
-                raise TypeError('kde should be either a callable or None. '
-                                '%r was passed.' % kde)
+                raise TypeError("kde should be either a callable or None. %r was passed." % kde)
             self._kde = kde
 
         self._normalize_ipw = normalize_ipw
@@ -123,7 +120,7 @@ def __init__(self,
 
         # check score
         self._score = score
-        valid_scores = ['PQ', 'LPQ', 'CVaR']
+        valid_scores = ["PQ", "LPQ", "CVaR"]
         _check_score(self.score, valid_scores, allow_callable=False)
 
         # check data
@@ -141,11 +138,12 @@ def __init__(self,
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
         if not isinstance(self.normalize_ipw, bool):
-            raise TypeError('Normalization indicator has to be boolean. ' +
-                            f'Object of type {str(type(self.normalize_ipw))} passed.')
+            raise TypeError(
+                "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed."
+            )
 
-        self._learner = {'ml_g': clone(ml_g), 'ml_m': clone(ml_m)}
-        self._predict_method = {'ml_g': 'predict_proba', 'ml_m': 'predict_proba'}
+        self._learner = {"ml_g": clone(ml_g), "ml_m": clone(ml_m)}
+        self._predict_method = {"ml_g": "predict_proba", "ml_m": "predict_proba"}
 
         # perform sample splitting
         self._smpls = None
@@ -156,10 +154,9 @@ def __init__(self,
 
     def __str__(self):
         class_name = self.__class__.__name__
-        header = f'================== {class_name} Object ==================\n'
+        header = f"================== {class_name} Object ==================\n"
         fit_summary = str(self.summary)
-        res = header + \
-            '\n------------------ Fit summary       ------------------\n' + fit_summary
+        res = header + "\n------------------ Fit summary       ------------------\n" + fit_summary
         return res
 
     @property
@@ -204,8 +201,10 @@ def smpls(self):
         The partition used for cross-fitting.
         """
         if self._smpls is None:
-            err_msg = ('Sample splitting not specified. Either draw samples via .draw_sample splitting() ' +
-                       'or set external samples via .set_sample_splitting().')
+            err_msg = (
+                "Sample splitting not specified. Either draw samples via .draw_sample splitting() "
+                + "or set external samples via .set_sample_splitting()."
+            )
             raise ValueError(err_msg)
         return self._smpls
 
@@ -358,12 +357,11 @@ def summary(self):
         A summary for the estimated causal effect after calling :meth:`fit`.
         """
         if self.framework is None:
-            col_names = ['coef', 'std err', 't', 'P>|t|']
+            col_names = ["coef", "std err", "t", "P>|t|"]
             df_summary = pd.DataFrame(columns=col_names)
         else:
             ci = self.confint()
-            df_summary = generate_summary(self.coef, self.se, self.t_stat,
-                                          self.pval, ci, self.quantiles)
+            df_summary = generate_summary(self.coef, self.se, self.t_stat, self.pval, ci, self.quantiles)
         return df_summary
 
     def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_models=False, external_predictions=None):
@@ -399,9 +397,11 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_
             raise NotImplementedError(f"External predictions not implemented for {self.__class__.__name__}.")
 
         # parallel estimation of the quantiles
-        parallel = Parallel(n_jobs=n_jobs_models, verbose=0, pre_dispatch='2*n_jobs')
-        fitted_models = parallel(delayed(self._fit_quantile)(i_quant, n_jobs_cv, store_predictions, store_models)
-                                 for i_quant in range(self.n_quantiles))
+        parallel = Parallel(n_jobs=n_jobs_models, verbose=0, pre_dispatch="2*n_jobs")
+        fitted_models = parallel(
+            delayed(self._fit_quantile)(i_quant, n_jobs_cv, store_predictions, store_models)
+            for i_quant in range(self.n_quantiles)
+        )
 
         # combine the estimates and scores
         framework_list = [None] * self.n_quantiles
@@ -412,15 +412,14 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_
             self._modellist_1[i_quant] = fitted_models[i_quant][1]
 
             # set up the framework
-            framework_list[i_quant] = self._modellist_1[i_quant].framework - \
-                self._modellist_0[i_quant].framework
+            framework_list[i_quant] = self._modellist_1[i_quant].framework - self._modellist_0[i_quant].framework
 
         # aggregate all frameworks
         self._framework = concat(framework_list)
 
         return self
 
-    def bootstrap(self, method='normal', n_rep_boot=500):
+    def bootstrap(self, method="normal", n_rep_boot=500):
         """
         Multiplier bootstrap for DoubleML models.
 
@@ -438,7 +437,7 @@ def bootstrap(self, method='normal', n_rep_boot=500):
         self : object
         """
         if self._framework is None:
-            raise ValueError('Apply fit() before bootstrap().')
+            raise ValueError("Apply fit() before bootstrap().")
         self._framework.bootstrap(method=method, n_rep_boot=n_rep_boot)
 
         return self
@@ -454,10 +453,9 @@ def draw_sample_splitting(self):
         -------
         self : object
         """
-        obj_dml_resampling = DoubleMLResampling(n_folds=self.n_folds,
-                                                n_rep=self.n_rep,
-                                                n_obs=self._dml_data.n_obs,
-                                                stratify=self._dml_data.d)
+        obj_dml_resampling = DoubleMLResampling(
+            n_folds=self.n_folds, n_rep=self.n_rep, n_obs=self._dml_data.n_obs, stratify=self._dml_data.d
+        )
         self._smpls = obj_dml_resampling.split_samples()
         # initialize all models
         self._modellist_0, self._modellist_1 = self._initialize_models()
@@ -523,7 +521,8 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None):
         >>> dml_plr_obj.set_sample_splitting(smpls)
         """
         self._smpls, self._smpls_cluster, self._n_rep, self._n_folds = _check_sample_splitting(
-            all_smpls, all_smpls_cluster, self._dml_data, self._is_cluster_data)
+            all_smpls, all_smpls_cluster, self._dml_data, self._is_cluster_data
+        )
 
         # initialize all models
         self._modellist_0, self._modellist_1 = self._initialize_models()
@@ -551,14 +550,14 @@ def confint(self, joint=False, level=0.95):
         """
 
         if self.framework is None:
-            raise ValueError('Apply fit() before confint().')
+            raise ValueError("Apply fit() before confint().")
 
         df_ci = self.framework.confint(joint=joint, level=level)
         df_ci.set_index(pd.Index(self._quantiles), inplace=True)
 
         return df_ci
 
-    def p_adjust(self, method='romano-wolf'):
+    def p_adjust(self, method="romano-wolf"):
         """
         Multiple testing adjustment for DoubleML models.
 
@@ -577,7 +576,7 @@ def p_adjust(self, method='romano-wolf'):
         """
 
         if self.framework is None:
-            raise ValueError('Apply fit() before p_adjust().')
+            raise ValueError("Apply fit() before p_adjust().")
 
         p_val, _ = self.framework.p_adjust(method=method)
         p_val.set_index(pd.Index(self._quantiles), inplace=True)
@@ -585,7 +584,6 @@ def p_adjust(self, method='romano-wolf'):
         return p_val
 
     def _fit_quantile(self, i_quant, n_jobs_cv=None, store_predictions=True, store_models=False):
-
         model_0 = self.modellist_0[i_quant]
         model_1 = self.modellist_1[i_quant]
 
@@ -596,59 +594,42 @@ def _fit_quantile(self, i_quant, n_jobs_cv=None, store_predictions=True, store_m
 
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
-            raise TypeError('The data must be of DoubleMLData type. '
-                            f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.')
+            raise TypeError(
+                f"The data must be of DoubleMLData type. {str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+            )
         _check_zero_one_treatment(self)
         return
 
     def _check_quantile(self):
         if np.any(self.quantiles <= 0) | np.any(self.quantiles >= 1):
-            raise ValueError('Quantiles have be between 0 or 1. ' +
-                             f'Quantiles {str(self.quantiles)} passed.')
+            raise ValueError("Quantiles have be between 0 or 1. " + f"Quantiles {str(self.quantiles)} passed.")
 
     def _initialize_models(self):
         modellist_0 = [None] * self.n_quantiles
         modellist_1 = [None] * self.n_quantiles
         kwargs = {
-            'obj_dml_data': self._dml_data,
-            'ml_g': self._learner['ml_g'],
-            'ml_m': self._learner['ml_m'],
-            'n_folds': self.n_folds,
-            'n_rep': self.n_rep,
-            'trimming_rule': self.trimming_rule,
-            'trimming_threshold': self.trimming_threshold,
-            'normalize_ipw': self.normalize_ipw,
-            'draw_sample_splitting': False
+            "obj_dml_data": self._dml_data,
+            "ml_g": self._learner["ml_g"],
+            "ml_m": self._learner["ml_m"],
+            "n_folds": self.n_folds,
+            "n_rep": self.n_rep,
+            "trimming_rule": self.trimming_rule,
+            "trimming_threshold": self.trimming_threshold,
+            "normalize_ipw": self.normalize_ipw,
+            "draw_sample_splitting": False,
         }
         for i_quant in range(self.n_quantiles):
-
             # initialize models for both potential quantiles
-            if self.score == 'PQ':
-                model_0 = DoubleMLPQ(quantile=self._quantiles[i_quant],
-                                     treatment=0,
-                                     kde=self.kde,
-                                     **kwargs)
-                model_1 = DoubleMLPQ(quantile=self._quantiles[i_quant],
-                                     treatment=1,
-                                     kde=self.kde,
-                                     **kwargs)
-            elif self.score == 'LPQ':
-                model_0 = DoubleMLLPQ(quantile=self._quantiles[i_quant],
-                                      treatment=0,
-                                      kde=self.kde,
-                                      **kwargs)
-                model_1 = DoubleMLLPQ(quantile=self._quantiles[i_quant],
-                                      treatment=1,
-                                      kde=self.kde,
-                                      **kwargs)
-
-            elif self.score == 'CVaR':
-                model_0 = DoubleMLCVAR(quantile=self._quantiles[i_quant],
-                                       treatment=0,
-                                       **kwargs)
-                model_1 = DoubleMLCVAR(quantile=self._quantiles[i_quant],
-                                       treatment=1,
-                                       **kwargs)
+            if self.score == "PQ":
+                model_0 = DoubleMLPQ(quantile=self._quantiles[i_quant], treatment=0, kde=self.kde, **kwargs)
+                model_1 = DoubleMLPQ(quantile=self._quantiles[i_quant], treatment=1, kde=self.kde, **kwargs)
+            elif self.score == "LPQ":
+                model_0 = DoubleMLLPQ(quantile=self._quantiles[i_quant], treatment=0, kde=self.kde, **kwargs)
+                model_1 = DoubleMLLPQ(quantile=self._quantiles[i_quant], treatment=1, kde=self.kde, **kwargs)
+
+            elif self.score == "CVaR":
+                model_0 = DoubleMLCVAR(quantile=self._quantiles[i_quant], treatment=0, **kwargs)
+                model_1 = DoubleMLCVAR(quantile=self._quantiles[i_quant], treatment=1, **kwargs)
 
             # synchronize the sample splitting
             model_0.set_sample_splitting(all_smpls=self.smpls)
diff --git a/doubleml/irm/ssm.py b/doubleml/irm/ssm.py
index c93116c17..c63d2076f 100644
--- a/doubleml/irm/ssm.py
+++ b/doubleml/irm/ssm.py
@@ -1,23 +1,16 @@
-from sklearn.utils import check_X_y
-from sklearn.base import clone
-from sklearn.model_selection import train_test_split
-import numpy as np
 import copy
 import warnings
 
+import numpy as np
+from sklearn.base import clone
+from sklearn.model_selection import train_test_split
+from sklearn.utils import check_X_y
+
 from ..double_ml import DoubleML
 from ..double_ml_data import DoubleMLData
-from ..utils._estimation import (
-    _trimm,
-    _dml_cv_predict,
-    _dml_tune,
-    _get_cond_smpls_2d,
-    _predict_zero_one_propensity)
-from ..utils._checks import (
-    _check_finite_predictions,
-    _check_trimming,
-    _check_score)
 from ..double_ml_score_mixins import LinearScoreMixin
+from ..utils._checks import _check_finite_predictions, _check_score, _check_trimming
+from ..utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d, _predict_zero_one_propensity, _trimm
 
 
 class DoubleMLSSM(LinearScoreMixin, DoubleML):
@@ -105,23 +98,21 @@ class DoubleMLSSM(LinearScoreMixin, DoubleML):
     Potential outcomes Y(0) and Y(1) are estimated and ATE is returned as E[Y(1) - Y(0)].
     """
 
-    def __init__(self,
-                 obj_dml_data,
-                 ml_g,
-                 ml_pi,
-                 ml_m,
-                 n_folds=5,
-                 n_rep=1,
-                 score='missing-at-random',
-                 normalize_ipw=False,
-                 trimming_rule='truncate',
-                 trimming_threshold=1e-2,
-                 draw_sample_splitting=True):
-        super().__init__(obj_dml_data,
-                         n_folds,
-                         n_rep,
-                         score,
-                         draw_sample_splitting)
+    def __init__(
+        self,
+        obj_dml_data,
+        ml_g,
+        ml_pi,
+        ml_m,
+        n_folds=5,
+        n_rep=1,
+        score="missing-at-random",
+        normalize_ipw=False,
+        trimming_rule="truncate",
+        trimming_threshold=1e-2,
+        draw_sample_splitting=True,
+    ):
+        super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
 
         self._external_predictions_implemented = False
         self._sensitivity_implemented = False
@@ -132,37 +123,38 @@ def __init__(self,
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
         self._check_data(self._dml_data)
-        _check_score(self.score, ['missing-at-random', 'nonignorable'])
+        _check_score(self.score, ["missing-at-random", "nonignorable"])
 
         # for both score function stratification by d and s is viable
         self._strata = self._dml_data.d.reshape(-1, 1) + 2 * self._dml_data.s.reshape(-1, 1)
         if draw_sample_splitting:
             self.draw_sample_splitting()
 
-        ml_g_is_classifier = self._check_learner(ml_g, 'ml_g', regressor=True, classifier=True)
-        _ = self._check_learner(ml_pi, 'ml_pi', regressor=False, classifier=True)
-        _ = self._check_learner(ml_m, 'ml_m', regressor=False, classifier=True)
-
-        self._learner = {'ml_g': clone(ml_g),
-                         'ml_pi': clone(ml_pi),
-                         'ml_m': clone(ml_m),
-                         }
-        self._predict_method = {'ml_g': 'predict',
-                                'ml_pi': 'predict_proba',
-                                'ml_m': 'predict_proba'
-                                }
+        ml_g_is_classifier = self._check_learner(ml_g, "ml_g", regressor=True, classifier=True)
+        _ = self._check_learner(ml_pi, "ml_pi", regressor=False, classifier=True)
+        _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True)
+
+        self._learner = {
+            "ml_g": clone(ml_g),
+            "ml_pi": clone(ml_pi),
+            "ml_m": clone(ml_m),
+        }
+        self._predict_method = {"ml_g": "predict", "ml_pi": "predict_proba", "ml_m": "predict_proba"}
         if ml_g_is_classifier:
             if self._dml_data._check_binary_outcome():
-                self._predict_method['ml_g'] = 'predict_proba'
+                self._predict_method["ml_g"] = "predict_proba"
             else:
-                raise ValueError(f'The ml_g learner {str(ml_g)} was identified as classifier '
-                                 'but the outcome is not binary with values 0 and 1.')
+                raise ValueError(
+                    f"The ml_g learner {str(ml_g)} was identified as classifier "
+                    "but the outcome is not binary with values 0 and 1."
+                )
 
         self._initialize_ml_nuisance_params()
 
         if not isinstance(self.normalize_ipw, bool):
-            raise TypeError('Normalization indicator has to be boolean. ' +
-                            f'Object of type {str(type(self.normalize_ipw))} passed.')
+            raise TypeError(
+                "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed."
+            )
 
     @property
     def normalize_ipw(self):
@@ -186,24 +178,26 @@ def trimming_threshold(self):
         return self._trimming_threshold
 
     def _initialize_ml_nuisance_params(self):
-        valid_learner = ['ml_g_d0', 'ml_g_d1',
-                         'ml_pi', 'ml_m']
-        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in
-                        valid_learner}
+        valid_learner = ["ml_g_d0", "ml_g_d1", "ml_pi", "ml_m"]
+        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in valid_learner}
 
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
-            raise TypeError('The data must be of DoubleMLData type. '
-                            f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.')
-        if obj_dml_data.z_cols is not None and self._score == 'missing-at-random':
-            warnings.warn(' and '.join(obj_dml_data.z_cols) +
-                          ' have been set as instrumental variable(s). '
-                          'You are estimating the effect under the assumption of data missing at random. \
-                             Instrumental variables will not be used in estimation.')
-        if obj_dml_data.z_cols is None and self._score == 'nonignorable':
-            raise ValueError('Sample selection by nonignorable nonresponse was set but instrumental variable \
+            raise TypeError(
+                f"The data must be of DoubleMLData type. {str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+            )
+        if obj_dml_data.z_cols is not None and self._score == "missing-at-random":
+            warnings.warn(
+                " and ".join(obj_dml_data.z_cols) + " have been set as instrumental variable(s). "
+                "You are estimating the effect under the assumption of data missing at random. \
+                             Instrumental variables will not be used in estimation."
+            )
+        if obj_dml_data.z_cols is None and self._score == "nonignorable":
+            raise ValueError(
+                "Sample selection by nonignorable nonresponse was set but instrumental variable \
                              is None. To estimate treatment effect under nonignorable nonresponse, \
-                             specify an instrument for the selection variable.')
+                             specify an instrument for the selection variable."
+            )
         return
 
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
@@ -211,7 +205,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
         x, s = check_X_y(x, self._dml_data.s, force_all_finite=False)
 
-        if self._score == 'nonignorable':
+        if self._score == "nonignorable":
             z, _ = check_X_y(self._dml_data.z, y, force_all_finite=False)
             dx = np.column_stack((x, d, z))
         else:
@@ -219,49 +213,75 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
 
         _, smpls_d0_s1, _, smpls_d1_s1 = _get_cond_smpls_2d(smpls, d, s)
 
-        if self._score == 'missing-at-random':
-            pi_hat = _dml_cv_predict(self._learner['ml_pi'], dx, s, smpls=smpls, n_jobs=n_jobs_cv,
-                                     est_params=self._get_params('ml_pi'), method=self._predict_method['ml_pi'],
-                                     return_models=return_models)
-            pi_hat['targets'] = pi_hat['targets'].astype(float)
-            _check_finite_predictions(pi_hat['preds'], self._learner['ml_pi'], 'ml_pi', smpls)
+        if self._score == "missing-at-random":
+            pi_hat = _dml_cv_predict(
+                self._learner["ml_pi"],
+                dx,
+                s,
+                smpls=smpls,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_pi"),
+                method=self._predict_method["ml_pi"],
+                return_models=return_models,
+            )
+            pi_hat["targets"] = pi_hat["targets"].astype(float)
+            _check_finite_predictions(pi_hat["preds"], self._learner["ml_pi"], "ml_pi", smpls)
 
             # propensity score m
-            m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                    return_models=return_models)
-            m_hat['targets'] = m_hat['targets'].astype(float)
-            _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
+            m_hat = _dml_cv_predict(
+                self._learner["ml_m"],
+                x,
+                d,
+                smpls=smpls,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_m"),
+                method=self._predict_method["ml_m"],
+                return_models=return_models,
+            )
+            m_hat["targets"] = m_hat["targets"].astype(float)
+            _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
 
             # conditional outcome
-            g_hat_d1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1_s1, n_jobs=n_jobs_cv,
-                                       est_params=self._get_params('ml_g_d1'), method=self._predict_method['ml_g'],
-                                       return_models=return_models)
-            g_hat_d1['targets'] = g_hat_d1['targets'].astype(float)
-            _check_finite_predictions(g_hat_d1['preds'], self._learner['ml_g'], 'ml_g_d1', smpls)
-
-            g_hat_d0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0_s1, n_jobs=n_jobs_cv,
-                                       est_params=self._get_params('ml_g_d0'), method=self._predict_method['ml_g'],
-                                       return_models=return_models)
-            g_hat_d0['targets'] = g_hat_d0['targets'].astype(float)
-            _check_finite_predictions(g_hat_d0['preds'], self._learner['ml_g'], 'ml_g_d0', smpls)
+            g_hat_d1 = _dml_cv_predict(
+                self._learner["ml_g"],
+                x,
+                y,
+                smpls=smpls_d1_s1,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_g_d1"),
+                method=self._predict_method["ml_g"],
+                return_models=return_models,
+            )
+            g_hat_d1["targets"] = g_hat_d1["targets"].astype(float)
+            _check_finite_predictions(g_hat_d1["preds"], self._learner["ml_g"], "ml_g_d1", smpls)
+
+            g_hat_d0 = _dml_cv_predict(
+                self._learner["ml_g"],
+                x,
+                y,
+                smpls=smpls_d0_s1,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_g_d0"),
+                method=self._predict_method["ml_g"],
+                return_models=return_models,
+            )
+            g_hat_d0["targets"] = g_hat_d0["targets"].astype(float)
+            _check_finite_predictions(g_hat_d0["preds"], self._learner["ml_g"], "ml_g_d0", smpls)
 
         else:
-            assert self._score == 'nonignorable'
+            assert self._score == "nonignorable"
             # initialize nuisance predictions, targets and models
-            g_hat_d1 = {'models': None,
-                        'targets': np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
-                        'preds': np.full(shape=self._dml_data.n_obs, fill_value=np.nan)
-                        }
+            g_hat_d1 = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
             g_hat_d0 = copy.deepcopy(g_hat_d1)
             pi_hat = copy.deepcopy(g_hat_d1)
             m_hat = copy.deepcopy(g_hat_d1)
 
             # pi_hat - used for preliminary estimation of propensity score pi, overwritten in each iteration
-            pi_hat_prelim = {'models': None,
-                             'targets': [],
-                             'preds': []
-                             }
+            pi_hat_prelim = {"models": None, "targets": [], "preds": []}
 
             # initialize models
             fitted_models = {}
@@ -269,8 +289,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 # set nuisance model parameters
                 est_params = self._get_params(learner)
 
-                if learner == 'ml_g_d1' or learner == 'ml_g_d0':
-                    nuisance = 'ml_g'
+                if learner == "ml_g_d1" or learner == "ml_g_d0":
+                    nuisance = "ml_g"
                 else:
                     nuisance = learner
 
@@ -298,90 +318,91 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 dx_train_1 = dx[train_inds_1, :]
 
                 # fit propensity score for selection on first part of training set
-                fitted_models['ml_pi'][i_fold].fit(dx_train_1, s_train_1)
-                pi_hat_prelim['preds'] = _predict_zero_one_propensity(fitted_models['ml_pi'][i_fold], dx)
-                pi_hat_prelim['targets'] = s
+                fitted_models["ml_pi"][i_fold].fit(dx_train_1, s_train_1)
+                pi_hat_prelim["preds"] = _predict_zero_one_propensity(fitted_models["ml_pi"][i_fold], dx)
+                pi_hat_prelim["targets"] = s
 
                 # predictions for small pi in denominator
-                pi_hat['preds'][test_inds] = pi_hat_prelim['preds'][test_inds]
-                pi_hat['targets'][test_inds] = s[test_inds]
+                pi_hat["preds"][test_inds] = pi_hat_prelim["preds"][test_inds]
+                pi_hat["targets"][test_inds] = s[test_inds]
 
                 # add predicted selection propensity to covariates
-                xpi = np.column_stack((x, pi_hat_prelim['preds']))
+                xpi = np.column_stack((x, pi_hat_prelim["preds"]))
 
                 # estimate propensity score m using the second training sample
                 xpi_train_2 = xpi[train_inds_2, :]
                 d_train_2 = d[train_inds_2]
                 xpi_test = xpi[test_inds, :]
 
-                fitted_models['ml_m'][i_fold].fit(xpi_train_2, d_train_2)
+                fitted_models["ml_m"][i_fold].fit(xpi_train_2, d_train_2)
 
-                m_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_m'][i_fold], xpi_test)
-                m_hat['targets'][test_inds] = d[test_inds]
+                m_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_m"][i_fold], xpi_test)
+                m_hat["targets"][test_inds] = d[test_inds]
 
                 # estimate conditional outcome g on second training sample - treatment
-                s1_d1_train_2_indices = np.intersect1d(np.where(d == 1)[0],
-                                                       np.intersect1d(np.where(s == 1)[0], train_inds_2))
+                s1_d1_train_2_indices = np.intersect1d(np.where(d == 1)[0], np.intersect1d(np.where(s == 1)[0], train_inds_2))
                 xpi_s1_d1_train_2 = xpi[s1_d1_train_2_indices, :]
                 y_s1_d1_train_2 = y[s1_d1_train_2_indices]
 
-                fitted_models['ml_g_d1'][i_fold].fit(xpi_s1_d1_train_2, y_s1_d1_train_2)
+                fitted_models["ml_g_d1"][i_fold].fit(xpi_s1_d1_train_2, y_s1_d1_train_2)
 
                 # predict conditional outcome
-                g_hat_d1['preds'][test_inds] = fitted_models['ml_g_d1'][i_fold].predict(xpi_test)
-                g_hat_d1['targets'][test_inds] = y[test_inds]
+                g_hat_d1["preds"][test_inds] = fitted_models["ml_g_d1"][i_fold].predict(xpi_test)
+                g_hat_d1["targets"][test_inds] = y[test_inds]
 
                 # estimate conditional outcome on second training sample - control
-                s1_d0_train_2_indices = np.intersect1d(np.where(d == 0)[0],
-                                                       np.intersect1d(np.where(s == 1)[0], train_inds_2))
+                s1_d0_train_2_indices = np.intersect1d(np.where(d == 0)[0], np.intersect1d(np.where(s == 1)[0], train_inds_2))
                 xpi_s1_d0_train_2 = xpi[s1_d0_train_2_indices, :]
                 y_s1_d0_train_2 = y[s1_d0_train_2_indices]
 
-                fitted_models['ml_g_d0'][i_fold].fit(xpi_s1_d0_train_2, y_s1_d0_train_2)
+                fitted_models["ml_g_d0"][i_fold].fit(xpi_s1_d0_train_2, y_s1_d0_train_2)
 
                 # predict conditional outcome
-                g_hat_d0['preds'][test_inds] = fitted_models['ml_g_d0'][i_fold].predict(xpi_test)
-                g_hat_d0['targets'][test_inds] = y[test_inds]
+                g_hat_d0["preds"][test_inds] = fitted_models["ml_g_d0"][i_fold].predict(xpi_test)
+                g_hat_d0["targets"][test_inds] = y[test_inds]
 
                 if return_models:
-                    g_hat_d1['models'] = fitted_models['ml_g_d1']
-                    g_hat_d0['models'] = fitted_models['ml_g_d0']
-                    pi_hat['models'] = fitted_models['ml_pi']
-                    m_hat['models'] = fitted_models['ml_m']
+                    g_hat_d1["models"] = fitted_models["ml_g_d1"]
+                    g_hat_d0["models"] = fitted_models["ml_g_d0"]
+                    pi_hat["models"] = fitted_models["ml_pi"]
+                    m_hat["models"] = fitted_models["ml_m"]
 
-        m_hat['preds'] = _trimm(m_hat['preds'], self._trimming_rule, self._trimming_threshold)
+        m_hat["preds"] = _trimm(m_hat["preds"], self._trimming_rule, self._trimming_threshold)
 
         # treatment indicator
-        dtreat = (d == 1)
-        dcontrol = (d == 0)
-
-        psi_a, psi_b = self._score_elements(dtreat, dcontrol, g_hat_d1['preds'],
-                                            g_hat_d0['preds'],
-                                            pi_hat['preds'],
-                                            m_hat['preds'],
-                                            s, y)
-
-        psi_elements = {'psi_a': psi_a,
-                        'psi_b': psi_b}
-
-        preds = {'predictions': {'ml_g_d0': g_hat_d0['preds'],
-                                 'ml_g_d1': g_hat_d1['preds'],
-                                 'ml_pi': pi_hat['preds'],
-                                 'ml_m': m_hat['preds']},
-                 'targets': {'ml_g_d0': g_hat_d0['targets'],
-                             'ml_g_d1': g_hat_d1['targets'],
-                             'ml_pi': pi_hat['targets'],
-                             'ml_m': m_hat['targets']},
-                 'models': {'ml_g_d0': g_hat_d0['models'],
-                            'ml_g_d1': g_hat_d1['models'],
-                            'ml_pi': pi_hat['models'],
-                            'ml_m': m_hat['models']}
-                 }
+        dtreat = d == 1
+        dcontrol = d == 0
+
+        psi_a, psi_b = self._score_elements(
+            dtreat, dcontrol, g_hat_d1["preds"], g_hat_d0["preds"], pi_hat["preds"], m_hat["preds"], s, y
+        )
+
+        psi_elements = {"psi_a": psi_a, "psi_b": psi_b}
+
+        preds = {
+            "predictions": {
+                "ml_g_d0": g_hat_d0["preds"],
+                "ml_g_d1": g_hat_d1["preds"],
+                "ml_pi": pi_hat["preds"],
+                "ml_m": m_hat["preds"],
+            },
+            "targets": {
+                "ml_g_d0": g_hat_d0["targets"],
+                "ml_g_d1": g_hat_d1["targets"],
+                "ml_pi": pi_hat["targets"],
+                "ml_m": m_hat["targets"],
+            },
+            "models": {
+                "ml_g_d0": g_hat_d0["models"],
+                "ml_g_d1": g_hat_d1["models"],
+                "ml_pi": pi_hat["models"],
+                "ml_m": m_hat["models"],
+            },
+        }
 
         return psi_elements, preds
 
-    def _score_elements(self, dtreat, dcontrol, g_d1, g_d0,
-                        pi, m, s, y):
+    def _score_elements(self, dtreat, dcontrol, g_d1, g_d0, pi, m, s, y):
         # psi_a
         psi_a = -1
 
@@ -401,25 +422,22 @@ def _score_elements(self, dtreat, dcontrol, g_d1, g_d0,
 
         return psi_a, psi_b
 
-    def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                         search_mode, n_iter_randomized_search):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
+    def _nuisance_tuning(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
         # time indicator is used for selection (selection not available in DoubleMLData yet)
         x, s = check_X_y(x, self._dml_data.s, force_all_finite=False)
 
-        if self._score == 'nonignorable':
+        if self._score == "nonignorable":
             z, _ = check_X_y(self._dml_data.z, y, force_all_finite=False)
             dx = np.column_stack((x, d, z))
         else:
             dx = np.column_stack((x, d))
 
         if scoring_methods is None:
-            scoring_methods = {'ml_g': None,
-                               'ml_pi': None,
-                               'ml_m': None}
+            scoring_methods = {"ml_g": None, "ml_pi": None, "ml_m": None}
 
         # nuisance training sets conditional on d
         _, smpls_d0_s1, _, smpls_d1_s1 = _get_cond_smpls_2d(smpls, d, s)
@@ -428,36 +446,65 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
         train_inds_d1_s1 = [train_index for (train_index, _) in smpls_d1_s1]
 
         # hyperparameter tuning for ML
-        g_d0_tune_res = _dml_tune(y, x, train_inds_d0_s1,
-                                  self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'],
-                                  n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        g_d1_tune_res = _dml_tune(y, x, train_inds_d1_s1,
-                                  self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'],
-                                  n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        pi_tune_res = _dml_tune(s, dx, train_inds,
-                                self._learner['ml_pi'], param_grids['ml_pi'], scoring_methods['ml_pi'],
-                                n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        m_tune_res = _dml_tune(d, x, train_inds,
-                               self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        g_d0_tune_res = _dml_tune(
+            y,
+            x,
+            train_inds_d0_s1,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        g_d1_tune_res = _dml_tune(
+            y,
+            x,
+            train_inds_d1_s1,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        pi_tune_res = _dml_tune(
+            s,
+            dx,
+            train_inds,
+            self._learner["ml_pi"],
+            param_grids["ml_pi"],
+            scoring_methods["ml_pi"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        m_tune_res = _dml_tune(
+            d,
+            x,
+            train_inds,
+            self._learner["ml_m"],
+            param_grids["ml_m"],
+            scoring_methods["ml_m"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
 
         g_d0_best_params = [xx.best_params_ for xx in g_d0_tune_res]
         g_d1_best_params = [xx.best_params_ for xx in g_d1_tune_res]
         pi_best_params = [xx.best_params_ for xx in pi_tune_res]
         m_best_params = [xx.best_params_ for xx in m_tune_res]
 
-        params = {'ml_g_d0': g_d0_best_params,
-                  'ml_g_d1': g_d1_best_params,
-                  'ml_pi': pi_best_params,
-                  'ml_m': m_best_params}
+        params = {"ml_g_d0": g_d0_best_params, "ml_g_d1": g_d1_best_params, "ml_pi": pi_best_params, "ml_m": m_best_params}
 
-        tune_res = {'g_d0_tune': g_d0_tune_res,
-                    'g_d1_tune': g_d1_tune_res,
-                    'pi_tune': pi_tune_res,
-                    'm_tune': m_tune_res}
+        tune_res = {"g_d0_tune": g_d0_tune_res, "g_d1_tune": g_d1_tune_res, "pi_tune": pi_tune_res, "m_tune": m_tune_res}
 
-        res = {'params': params,
-               'tune_res': tune_res}
+        res = {"params": params, "tune_res": tune_res}
 
         return res
 
diff --git a/doubleml/irm/tests/_utils_apo_manual.py b/doubleml/irm/tests/_utils_apo_manual.py
index e22f80ffe..3b74051f3 100644
--- a/doubleml/irm/tests/_utils_apo_manual.py
+++ b/doubleml/irm/tests/_utils_apo_manual.py
@@ -1,19 +1,30 @@
 import numpy as np
 from sklearn.base import clone, is_classifier
 
-from ...tests._utils_boot import boot_manual, draw_weights
 from ...tests._utils import fit_predict, fit_predict_proba, tune_grid_search
-
-from ...utils._estimation import _normalize_ipw
+from ...tests._utils_boot import boot_manual, draw_weights
 from ...utils._checks import _check_is_propensity
+from ...utils._estimation import _normalize_ipw
 
 
-def fit_apo(y, x, d,
-            learner_g, learner_m, treatment_level, all_smpls, score,
-            n_rep=1, g0_params=None, g1_params=None, m_params=None,
-            normalize_ipw=False, trimming_threshold=1e-2):
+def fit_apo(
+    y,
+    x,
+    d,
+    learner_g,
+    learner_m,
+    treatment_level,
+    all_smpls,
+    score,
+    n_rep=1,
+    g0_params=None,
+    g1_params=None,
+    m_params=None,
+    normalize_ipw=False,
+    trimming_threshold=1e-2,
+):
     n_obs = len(y)
-    treated = (d == treatment_level)
+    treated = d == treatment_level
 
     thetas = np.zeros(n_rep)
     ses = np.zeros(n_rep)
@@ -23,65 +34,84 @@ def fit_apo(y, x, d,
 
     for i_rep in range(n_rep):
         smpls = all_smpls[i_rep]
-        g_hat0, g_hat1, m_hat = fit_nuisance_apo(y, x, d, treated,
-                                                 learner_g, learner_m, smpls, score,
-                                                 g0_params=g0_params, g1_params=g1_params, m_params=m_params,
-                                                 trimming_threshold=trimming_threshold)
+        g_hat0, g_hat1, m_hat = fit_nuisance_apo(
+            y,
+            x,
+            d,
+            treated,
+            learner_g,
+            learner_m,
+            smpls,
+            score,
+            g0_params=g0_params,
+            g1_params=g1_params,
+            m_params=m_params,
+            trimming_threshold=trimming_threshold,
+        )
 
         all_g_hat0.append(g_hat0)
         all_g_hat1.append(g_hat1)
         all_m_hat.append(m_hat)
 
-        thetas[i_rep], ses[i_rep] = apo_dml2(y, x, d, treated,
-                                             g_hat0, g_hat1, m_hat,
-                                             smpls, score, normalize_ipw)
+        thetas[i_rep], ses[i_rep] = apo_dml2(y, x, d, treated, g_hat0, g_hat1, m_hat, smpls, score, normalize_ipw)
 
     theta = np.median(thetas)
     se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs)
 
-    res = {'theta': theta, 'se': se,
-           'thetas': thetas, 'ses': ses,
-           'all_g_hat0': all_g_hat0, 'all_g_hat1': all_g_hat1, 'all_m_hat': all_m_hat}
+    res = {
+        "theta": theta,
+        "se": se,
+        "thetas": thetas,
+        "ses": ses,
+        "all_g_hat0": all_g_hat0,
+        "all_g_hat1": all_g_hat1,
+        "all_m_hat": all_m_hat,
+    }
 
     return res
 
 
-def fit_nuisance_apo(y, x, d, treated,
-                     learner_g, learner_m, smpls, score,
-                     g0_params=None, g1_params=None, m_params=None,
-                     trimming_threshold=1e-12):
+def fit_nuisance_apo(
+    y,
+    x,
+    d,
+    treated,
+    learner_g,
+    learner_m,
+    smpls,
+    score,
+    g0_params=None,
+    g1_params=None,
+    m_params=None,
+    trimming_threshold=1e-12,
+):
     ml_g0 = clone(learner_g)
     ml_g1 = clone(learner_g)
 
     train_cond0 = np.where(treated == 0)[0]
     if is_classifier(learner_g):
-        g_hat0_list = fit_predict_proba(y, x, ml_g0, g0_params, smpls,
-                                        train_cond=train_cond0)
+        g_hat0_list = fit_predict_proba(y, x, ml_g0, g0_params, smpls, train_cond=train_cond0)
     else:
-        g_hat0_list = fit_predict(y, x, ml_g0, g0_params, smpls,
-                                  train_cond=train_cond0)
+        g_hat0_list = fit_predict(y, x, ml_g0, g0_params, smpls, train_cond=train_cond0)
 
     train_cond1 = np.where(treated == 1)[0]
     if is_classifier(learner_g):
-        g_hat1_list = fit_predict_proba(y, x, ml_g1, g1_params, smpls,
-                                        train_cond=train_cond1)
+        g_hat1_list = fit_predict_proba(y, x, ml_g1, g1_params, smpls, train_cond=train_cond1)
     else:
-        g_hat1_list = fit_predict(y, x, ml_g1, g1_params, smpls,
-                                  train_cond=train_cond1)
+        g_hat1_list = fit_predict(y, x, ml_g1, g1_params, smpls, train_cond=train_cond1)
 
     ml_m = clone(learner_m)
-    m_hat_list = fit_predict_proba(treated, x, ml_m, m_params, smpls,
-                                   trimming_threshold=trimming_threshold)
+    m_hat_list = fit_predict_proba(treated, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold)
 
     return g_hat0_list, g_hat1_list, m_hat_list
 
 
 def compute_residuals(y, g_hat0_list, g_hat1_list, m_hat_list, smpls):
-    u_hat0 = np.full_like(y, np.nan, dtype='float64')
-    u_hat1 = np.full_like(y, np.nan, dtype='float64')
-    g_hat0 = np.full_like(y, np.nan, dtype='float64')
-    g_hat1 = np.full_like(y, np.nan, dtype='float64')
-    m_hat = np.full_like(y, np.nan, dtype='float64')
+    u_hat0 = np.full_like(y, np.nan, dtype="float64")
+    u_hat1 = np.full_like(y, np.nan, dtype="float64")
+    g_hat0 = np.full_like(y, np.nan, dtype="float64")
+    g_hat1 = np.full_like(y, np.nan, dtype="float64")
+    m_hat = np.full_like(y, np.nan, dtype="float64")
     for idx, (_, test_index) in enumerate(smpls):
         u_hat0[test_index] = y[test_index] - g_hat0_list[idx]
         u_hat1[test_index] = y[test_index] - g_hat1_list[idx]
@@ -89,28 +119,22 @@ def compute_residuals(y, g_hat0_list, g_hat1_list, m_hat_list, smpls):
         g_hat1[test_index] = g_hat1_list[idx]
         m_hat[test_index] = m_hat_list[idx]
 
-    _check_is_propensity(m_hat, 'learner_m', 'ml_m', smpls, eps=1e-12)
+    _check_is_propensity(m_hat, "learner_m", "ml_m", smpls, eps=1e-12)
     return u_hat0, u_hat1, g_hat0, g_hat1, m_hat
 
 
 def apo_dml2(y, x, d, treated, g_hat0_list, g_hat1_list, m_hat_list, smpls, score, normalize_ipw):
     n_obs = len(y)
-    u_hat0, u_hat1, g_hat0, g_hat1, m_hat = compute_residuals(
-        y, g_hat0_list, g_hat1_list, m_hat_list, smpls
-    )
+    u_hat0, u_hat1, g_hat0, g_hat1, m_hat = compute_residuals(y, g_hat0_list, g_hat1_list, m_hat_list, smpls)
 
     if normalize_ipw:
         m_hat_adj = _normalize_ipw(m_hat, treated)
     else:
         m_hat_adj = m_hat
 
-    theta_hat = apo_orth(g_hat0, g_hat1, m_hat_adj,
-                         u_hat0, u_hat1, treated, score)
+    theta_hat = apo_orth(g_hat0, g_hat1, m_hat_adj, u_hat0, u_hat1, treated, score)
 
-    se = np.sqrt(var_apo(theta_hat, g_hat0, g_hat1,
-                         m_hat_adj,
-                         u_hat0, u_hat1,
-                         treated, score, n_obs))
+    se = np.sqrt(var_apo(theta_hat, g_hat0, g_hat1, m_hat_adj, u_hat0, u_hat1, treated, score, n_obs))
 
     return theta_hat, se
 
@@ -121,14 +145,27 @@ def apo_orth(g_hat0, g_hat1, m_hat, u_hat0, u_hat1, treated, score):
 
 
 def var_apo(theta, g_hat0, g_hat1, m_hat, u_hat0, u_hat1, treated, score, n_obs):
-    var = 1/n_obs * np.mean(np.power(g_hat1 + np.divide(np.multiply(treated, u_hat1), m_hat) - theta, 2))
+    var = 1 / n_obs * np.mean(np.power(g_hat1 + np.divide(np.multiply(treated, u_hat1), m_hat) - theta, 2))
     return var
 
 
-def boot_apo(y, d, treatment_level, thetas, ses, all_g_hat0, all_g_hat1, all_m_hat,
-             all_smpls, score, bootstrap, n_rep_boot,
-             n_rep=1, normalize_ipw=True):
-    treated = (d == treatment_level)
+def boot_apo(
+    y,
+    d,
+    treatment_level,
+    thetas,
+    ses,
+    all_g_hat0,
+    all_g_hat1,
+    all_m_hat,
+    all_smpls,
+    score,
+    bootstrap,
+    n_rep_boot,
+    n_rep=1,
+    normalize_ipw=True,
+):
+    treated = d == treatment_level
     all_boot_t_stat = list()
     for i_rep in range(n_rep):
         smpls = all_smpls[i_rep]
@@ -136,9 +173,20 @@ def boot_apo(y, d, treatment_level, thetas, ses, all_g_hat0, all_g_hat1, all_m_h
 
         weights = draw_weights(bootstrap, n_rep_boot, n_obs)
         boot_t_stat = boot_apo_single_split(
-            thetas[i_rep], y, d, treated,
-            all_g_hat0[i_rep], all_g_hat1[i_rep], all_m_hat[i_rep], smpls,
-            score, ses[i_rep], weights, n_rep_boot, normalize_ipw)
+            thetas[i_rep],
+            y,
+            d,
+            treated,
+            all_g_hat0[i_rep],
+            all_g_hat1[i_rep],
+            all_m_hat[i_rep],
+            smpls,
+            score,
+            ses[i_rep],
+            weights,
+            n_rep_boot,
+            normalize_ipw,
+        )
         all_boot_t_stat.append(boot_t_stat)
 
     boot_t_stat = np.hstack(all_boot_t_stat)
@@ -146,10 +194,10 @@ def boot_apo(y, d, treatment_level, thetas, ses, all_g_hat0, all_g_hat1, all_m_h
     return boot_t_stat
 
 
-def boot_apo_single_split(theta, y, d, treated, g_hat0_list, g_hat1_list, m_hat_list,
-                          smpls, score, se, weights, n_rep_boot, normalize_ipw):
-    _, u_hat1, _, g_hat1, m_hat = compute_residuals(
-        y, g_hat0_list, g_hat1_list, m_hat_list, smpls)
+def boot_apo_single_split(
+    theta, y, d, treated, g_hat0_list, g_hat1_list, m_hat_list, smpls, score, se, weights, n_rep_boot, normalize_ipw
+):
+    _, u_hat1, _, g_hat1, m_hat = compute_residuals(y, g_hat0_list, g_hat1_list, m_hat_list, smpls)
 
     if normalize_ipw:
         m_hat_adj = _normalize_ipw(m_hat, treated)
@@ -166,7 +214,7 @@ def boot_apo_single_split(theta, y, d, treated, g_hat0_list, g_hat1_list, m_hat_
 def fit_sensitivity_elements_apo(y, d, treatment_level, all_coef, predictions, score, n_rep):
     n_treat = 1
     n_obs = len(y)
-    treated = (d == treatment_level)
+    treated = d == treatment_level
 
     sigma2 = np.full(shape=(1, n_rep, n_treat), fill_value=np.nan)
     nu2 = np.full(shape=(1, n_rep, n_treat), fill_value=np.nan)
@@ -174,15 +222,14 @@ def fit_sensitivity_elements_apo(y, d, treatment_level, all_coef, predictions, s
     psi_nu2 = np.full(shape=(n_obs, n_rep, n_treat), fill_value=np.nan)
 
     for i_rep in range(n_rep):
-
-        m_hat = predictions['ml_m'][:, i_rep, 0]
-        g_hat0 = predictions['ml_g0'][:, i_rep, 0]
-        g_hat1 = predictions['ml_g1'][:, i_rep, 0]
+        m_hat = predictions["ml_m"][:, i_rep, 0]
+        g_hat0 = predictions["ml_g0"][:, i_rep, 0]
+        g_hat1 = predictions["ml_g1"][:, i_rep, 0]
 
         weights = np.ones_like(d)
         weights_bar = np.ones_like(d)
 
-        sigma2_score_element = np.square(y - np.multiply(treated, g_hat1) - np.multiply(1.0-treated, g_hat0))
+        sigma2_score_element = np.square(y - np.multiply(treated, g_hat1) - np.multiply(1.0 - treated, g_hat0))
         sigma2[0, i_rep, 0] = np.mean(sigma2_score_element)
         psi_sigma2[:, i_rep, 0] = sigma2_score_element - sigma2[0, i_rep, 0]
 
@@ -194,24 +241,18 @@ def fit_sensitivity_elements_apo(y, d, treatment_level, all_coef, predictions, s
         nu2[0, i_rep, 0] = np.mean(nu2_score_element)
         psi_nu2[:, i_rep, 0] = nu2_score_element - nu2[0, i_rep, 0]
 
-    element_dict = {'sigma2': sigma2,
-                    'nu2': nu2,
-                    'psi_sigma2': psi_sigma2,
-                    'psi_nu2': psi_nu2}
+    element_dict = {"sigma2": sigma2, "nu2": nu2, "psi_sigma2": psi_sigma2, "psi_nu2": psi_nu2}
     return element_dict
 
 
-def tune_nuisance_apo(y, x, d, treatment_level, ml_g, ml_m, smpls, score, n_folds_tune,
-                      param_grid_g, param_grid_m):
+def tune_nuisance_apo(y, x, d, treatment_level, ml_g, ml_m, smpls, score, n_folds_tune, param_grid_g, param_grid_m):
     train_cond0 = np.where(d != treatment_level)[0]
-    g0_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune,
-                                   train_cond=train_cond0)
+    g0_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=train_cond0)
 
     train_cond1 = np.where(d == treatment_level)[0]
-    g1_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune,
-                                   train_cond=train_cond1)
+    g1_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=train_cond1)
 
-    treated = (d == treatment_level)
+    treated = d == treatment_level
     m_tune_res = tune_grid_search(treated, x, ml_m, smpls, param_grid_m, n_folds_tune)
 
     g0_best_params = [xx.best_params_ for xx in g0_tune_res]
diff --git a/doubleml/irm/tests/_utils_apos_manual.py b/doubleml/irm/tests/_utils_apos_manual.py
index cf47d6450..efc5eea13 100644
--- a/doubleml/irm/tests/_utils_apos_manual.py
+++ b/doubleml/irm/tests/_utils_apos_manual.py
@@ -1,16 +1,25 @@
 import numpy as np
 from sklearn.base import clone
 
-from ..apo import DoubleMLAPO
 from ...double_ml_data import DoubleMLData
-
 from ...tests._utils_boot import draw_weights
+from ..apo import DoubleMLAPO
 
 
-def fit_apos(y, x, d,
-             learner_g, learner_m, treatment_levels, all_smpls, score,
-             n_rep=1, trimming_rule='truncate',
-             normalize_ipw=False, trimming_threshold=1e-2):
+def fit_apos(
+    y,
+    x,
+    d,
+    learner_g,
+    learner_m,
+    treatment_levels,
+    all_smpls,
+    score,
+    n_rep=1,
+    trimming_rule="truncate",
+    normalize_ipw=False,
+    trimming_threshold=1e-2,
+):
     n_obs = len(y)
     n_treatments = len(treatment_levels)
     n_folds = len(all_smpls[0])
@@ -33,7 +42,7 @@ def fit_apos(y, x, d,
             trimming_rule=trimming_rule,
             trimming_threshold=trimming_threshold,
             normalize_ipw=normalize_ipw,
-            draw_sample_splitting=False
+            draw_sample_splitting=False,
         )
 
         # synchronize the sample splitting
@@ -52,12 +61,11 @@ def fit_apos(y, x, d,
     apos = np.median(all_apos, axis=1)
     se = np.zeros(n_treatments)
     for i_level in range(n_treatments):
-        se[i_level] = np.sqrt(np.median(np.power(all_se[i_level, :], 2) * n_obs +
-                                        np.power(all_apos[i_level, :] - all_apos[i_level], 2)) / n_obs)
+        se[i_level] = np.sqrt(
+            np.median(np.power(all_se[i_level, :], 2) * n_obs + np.power(all_apos[i_level, :] - all_apos[i_level], 2)) / n_obs
+        )
 
-    res = {'apos': apos, 'se': se,
-           'all_apos': all_apos, 'all_se': all_se,
-           'apo_scaled_score': apo_scaled_score}
+    res = {"apos": apos, "se": se, "all_apos": all_apos, "all_se": all_se, "apo_scaled_score": apo_scaled_score}
     return res
 
 
@@ -68,7 +76,8 @@ def boot_apos(scaled_scores, ses, treatment_levels, all_smpls, n_rep, bootstrap,
         n_obs = scaled_scores.shape[0]
         weights = draw_weights(bootstrap, n_rep_boot, n_obs)
         for i_treatment_levels in range(n_treatment_levels):
-            boot_t_stat[:, i_treatment_levels, i_rep] = np.matmul(weights, scaled_scores[:, i_treatment_levels, i_rep]) / \
-                (n_obs * ses[i_treatment_levels, i_rep])
+            boot_t_stat[:, i_treatment_levels, i_rep] = np.matmul(weights, scaled_scores[:, i_treatment_levels, i_rep]) / (
+                n_obs * ses[i_treatment_levels, i_rep]
+            )
 
     return boot_t_stat
diff --git a/doubleml/irm/tests/_utils_cvar_manual.py b/doubleml/irm/tests/_utils_cvar_manual.py
index 34f072201..7ff2c2306 100644
--- a/doubleml/irm/tests/_utils_cvar_manual.py
+++ b/doubleml/irm/tests/_utils_cvar_manual.py
@@ -1,14 +1,26 @@
 import numpy as np
 from sklearn.base import clone
-from sklearn.model_selection import train_test_split, StratifiedKFold
+from sklearn.model_selection import StratifiedKFold, train_test_split
 
 from ...tests._utils import fit_predict_proba, tune_grid_search
-from ...utils._estimation import _dml_cv_predict, _normalize_ipw, _get_bracket_guess, _solve_ipw_score
-
-
-def fit_cvar(y, x, d, quantile,
-             learner_g, learner_m, all_smpls, treatment, normalize_ipw=True, n_rep=1,
-             trimming_threshold=1e-2, g_params=None, m_params=None):
+from ...utils._estimation import _dml_cv_predict, _get_bracket_guess, _normalize_ipw, _solve_ipw_score
+
+
+def fit_cvar(
+    y,
+    x,
+    d,
+    quantile,
+    learner_g,
+    learner_m,
+    all_smpls,
+    treatment,
+    normalize_ipw=True,
+    n_rep=1,
+    trimming_threshold=1e-2,
+    g_params=None,
+    m_params=None,
+):
     n_obs = len(y)
 
     cvars = np.zeros(n_rep)
@@ -17,25 +29,34 @@ def fit_cvar(y, x, d, quantile,
     for i_rep in range(n_rep):
         smpls = all_smpls[i_rep]
 
-        g_hat, m_hat, ipw_est = fit_nuisance_cvar(y, x, d, quantile,
-                                                  learner_g, learner_m, smpls, treatment,
-                                                  normalize_ipw=normalize_ipw,
-                                                  trimming_threshold=trimming_threshold,
-                                                  g_params=g_params, m_params=m_params)
+        g_hat, m_hat, ipw_est = fit_nuisance_cvar(
+            y,
+            x,
+            d,
+            quantile,
+            learner_g,
+            learner_m,
+            smpls,
+            treatment,
+            normalize_ipw=normalize_ipw,
+            trimming_threshold=trimming_threshold,
+            g_params=g_params,
+            m_params=m_params,
+        )
 
         cvars[i_rep], ses[i_rep] = cvar_dml2(y, d, g_hat, m_hat, treatment, quantile, ipw_est)
 
     cvar = np.median(cvars)
     se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(cvars - cvar, 2)) / n_obs)
 
-    res = {'pq': cvar, 'se': se,
-           'pqs': cvars, 'ses': ses}
+    res = {"pq": cvar, "se": se, "pqs": cvars, "ses": ses}
 
     return res
 
 
-def fit_nuisance_cvar(y, x, d, quantile, learner_g, learner_m, smpls, treatment,
-                      normalize_ipw, trimming_threshold, g_params, m_params):
+def fit_nuisance_cvar(
+    y, x, d, quantile, learner_g, learner_m, smpls, treatment, normalize_ipw, trimming_threshold, g_params, m_params
+):
     n_folds = len(smpls)
     n_obs = len(y)
     coef_bounds = (y.min(), y.max())
@@ -63,26 +84,24 @@ def fit_nuisance_cvar(y, x, d, quantile, learner_g, learner_m, smpls, treatment,
         test_inds = smpls[i_fold][1]
 
         # start nested crossfitting
-        train_inds_1, train_inds_2 = train_test_split(train_inds, test_size=0.5,
-                                                      random_state=42, stratify=d[train_inds])
-        smpls_prelim = [(train, test) for train, test in
-                        StratifiedKFold(n_splits=n_folds).split(X=train_inds_1, y=d[train_inds_1])]
+        train_inds_1, train_inds_2 = train_test_split(train_inds, test_size=0.5, random_state=42, stratify=d[train_inds])
+        smpls_prelim = [
+            (train, test) for train, test in StratifiedKFold(n_splits=n_folds).split(X=train_inds_1, y=d[train_inds_1])
+        ]
 
         d_train_1 = d[train_inds_1]
         y_train_1 = y[train_inds_1]
         x_train_1 = x[train_inds_1, :]
         # todo change prediction method
-        m_hat_prelim_list = fit_predict_proba(d_train_1, x_train_1, ml_m,
-                                              params=None,
-                                              trimming_threshold=trimming_threshold,
-                                              smpls=smpls_prelim)
+        m_hat_prelim_list = fit_predict_proba(
+            d_train_1, x_train_1, ml_m, params=None, trimming_threshold=trimming_threshold, smpls=smpls_prelim
+        )
 
-        m_hat_prelim = np.full_like(y_train_1, np.nan, dtype='float64')
+        m_hat_prelim = np.full_like(y_train_1, np.nan, dtype="float64")
         for idx, (_, test_index) in enumerate(smpls_prelim):
             m_hat_prelim[test_index] = m_hat_prelim_list[idx]
 
-        m_hat_prelim = _dml_cv_predict(ml_m, x_train_1, d_train_1,
-                                       method='predict_proba', smpls=smpls_prelim)['preds']
+        m_hat_prelim = _dml_cv_predict(ml_m, x_train_1, d_train_1, method="predict_proba", smpls=smpls_prelim)["preds"]
 
         m_hat_prelim[m_hat_prelim < trimming_threshold] = trimming_threshold
         m_hat_prelim[m_hat_prelim > 1 - trimming_threshold] = 1 - trimming_threshold
@@ -168,16 +187,14 @@ def cvar_var_est(coef, g_hat, m_hat, d, y, treatment, quantile, ipw_est, n_obs):
     return var_est
 
 
-def tune_nuisance_cvar(y, x, d, ml_g, ml_m, smpls, treatment, quantile, n_folds_tune,
-                       param_grid_g, param_grid_m):
+def tune_nuisance_cvar(y, x, d, ml_g, ml_m, smpls, treatment, quantile, n_folds_tune, param_grid_g, param_grid_m):
     train_cond_treat = np.where(d == treatment)[0]
 
     quantile_approx = np.quantile(y[d == treatment], quantile)
     g_target_1 = np.ones_like(y) * quantile_approx
     g_target_2 = (y - quantile * quantile_approx) / (1 - quantile)
     g_target_approx = np.max(np.column_stack((g_target_1, g_target_2)), 1)
-    g_tune_res = tune_grid_search(g_target_approx, x, ml_g, smpls, param_grid_g, n_folds_tune,
-                                  train_cond=train_cond_treat)
+    g_tune_res = tune_grid_search(g_target_approx, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=train_cond_treat)
     m_tune_res = tune_grid_search(d, x, ml_m, smpls, param_grid_m, n_folds_tune)
 
     g_best_params = [xx.best_params_ for xx in g_tune_res]
diff --git a/doubleml/irm/tests/_utils_iivm_manual.py b/doubleml/irm/tests/_utils_iivm_manual.py
index d3ebde2cf..a358d6f81 100644
--- a/doubleml/irm/tests/_utils_iivm_manual.py
+++ b/doubleml/irm/tests/_utils_iivm_manual.py
@@ -1,16 +1,32 @@
 import numpy as np
 from sklearn.base import clone, is_classifier
 
-from ...tests._utils_boot import boot_manual, draw_weights
 from ...tests._utils import fit_predict, fit_predict_proba, tune_grid_search
-
+from ...tests._utils_boot import boot_manual, draw_weights
 from ...utils._estimation import _normalize_ipw
 
 
-def fit_iivm(y, x, d, z,
-             learner_g, learner_m, learner_r, all_smpls, score,
-             n_rep=1, g0_params=None, g1_params=None, m_params=None, r0_params=None, r1_params=None,
-             normalize_ipw=True, trimming_threshold=1e-2, always_takers=True, never_takers=True):
+def fit_iivm(
+    y,
+    x,
+    d,
+    z,
+    learner_g,
+    learner_m,
+    learner_r,
+    all_smpls,
+    score,
+    n_rep=1,
+    g0_params=None,
+    g1_params=None,
+    m_params=None,
+    r0_params=None,
+    r1_params=None,
+    normalize_ipw=True,
+    trimming_threshold=1e-2,
+    always_takers=True,
+    never_takers=True,
+):
     n_obs = len(y)
 
     thetas = np.zeros(n_rep)
@@ -24,10 +40,23 @@ def fit_iivm(y, x, d, z,
         smpls = all_smpls[i_rep]
 
         g_hat0, g_hat1, m_hat, r_hat0, r_hat1 = fit_nuisance_iivm(
-            y, x, d, z,
-            learner_g, learner_m, learner_r, smpls,
-            g0_params=g0_params, g1_params=g1_params, m_params=m_params, r0_params=r0_params, r1_params=r1_params,
-            trimming_threshold=trimming_threshold, always_takers=always_takers, never_takers=never_takers)
+            y,
+            x,
+            d,
+            z,
+            learner_g,
+            learner_m,
+            learner_r,
+            smpls,
+            g0_params=g0_params,
+            g1_params=g1_params,
+            m_params=m_params,
+            r0_params=r0_params,
+            r1_params=r1_params,
+            trimming_threshold=trimming_threshold,
+            always_takers=always_takers,
+            never_takers=never_takers,
+        )
 
         all_g_hat0.append(g_hat0)
         all_g_hat1.append(g_hat1)
@@ -35,90 +64,112 @@ def fit_iivm(y, x, d, z,
         all_r_hat0.append(r_hat0)
         all_r_hat1.append(r_hat1)
 
-        thetas[i_rep], ses[i_rep] = iivm_dml2(y, x, d, z,
-                                              g_hat0, g_hat1, m_hat, r_hat0, r_hat1,
-                                              smpls, score, normalize_ipw)
+        thetas[i_rep], ses[i_rep] = iivm_dml2(y, x, d, z, g_hat0, g_hat1, m_hat, r_hat0, r_hat1, smpls, score, normalize_ipw)
 
     theta = np.median(thetas)
     se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs)
 
-    res = {'theta': theta, 'se': se,
-           'thetas': thetas, 'ses': ses,
-           'all_g_hat0': all_g_hat0, 'all_g_hat1': all_g_hat1,
-           'all_m_hat': all_m_hat, 'all_r_hat0': all_r_hat0, 'all_r_hat1': all_r_hat1}
+    res = {
+        "theta": theta,
+        "se": se,
+        "thetas": thetas,
+        "ses": ses,
+        "all_g_hat0": all_g_hat0,
+        "all_g_hat1": all_g_hat1,
+        "all_m_hat": all_m_hat,
+        "all_r_hat0": all_r_hat0,
+        "all_r_hat1": all_r_hat1,
+    }
 
     return res
 
 
-def fit_nuisance_iivm(y, x, d, z, learner_g, learner_m, learner_r, smpls,
-                      g0_params=None, g1_params=None, m_params=None, r0_params=None, r1_params=None,
-                      trimming_threshold=1e-12, always_takers=True, never_takers=True):
+def fit_nuisance_iivm(
+    y,
+    x,
+    d,
+    z,
+    learner_g,
+    learner_m,
+    learner_r,
+    smpls,
+    g0_params=None,
+    g1_params=None,
+    m_params=None,
+    r0_params=None,
+    r1_params=None,
+    trimming_threshold=1e-12,
+    always_takers=True,
+    never_takers=True,
+):
     ml_g0 = clone(learner_g)
     train_cond0 = np.where(z == 0)[0]
     if is_classifier(learner_g):
-        g_hat0_list = fit_predict_proba(y, x, ml_g0, g0_params, smpls,
-                                        train_cond=train_cond0)
+        g_hat0_list = fit_predict_proba(y, x, ml_g0, g0_params, smpls, train_cond=train_cond0)
     else:
-        g_hat0_list = fit_predict(y, x, ml_g0, g0_params, smpls,
-                                  train_cond=train_cond0)
+        g_hat0_list = fit_predict(y, x, ml_g0, g0_params, smpls, train_cond=train_cond0)
 
     ml_g1 = clone(learner_g)
     train_cond1 = np.where(z == 1)[0]
     if is_classifier(learner_g):
-        g_hat1_list = fit_predict_proba(y, x, ml_g1, g1_params, smpls,
-                                        train_cond=train_cond1)
+        g_hat1_list = fit_predict_proba(y, x, ml_g1, g1_params, smpls, train_cond=train_cond1)
     else:
-        g_hat1_list = fit_predict(y, x, ml_g1, g1_params, smpls,
-                                  train_cond=train_cond1)
+        g_hat1_list = fit_predict(y, x, ml_g1, g1_params, smpls, train_cond=train_cond1)
 
     ml_m = clone(learner_m)
-    m_hat_list = fit_predict_proba(z, x, ml_m, m_params, smpls,
-                                   trimming_threshold=trimming_threshold)
+    m_hat_list = fit_predict_proba(z, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold)
 
     ml_r0 = clone(learner_r)
     if always_takers:
-        r_hat0_list = fit_predict_proba(d, x, ml_r0, r0_params, smpls,
-                                        train_cond=train_cond0)
+        r_hat0_list = fit_predict_proba(d, x, ml_r0, r0_params, smpls, train_cond=train_cond0)
     else:
         r_hat0_list = []
-        for (_, test_index) in smpls:
+        for _, test_index in smpls:
             r_hat0_list.append(np.zeros_like(d[test_index]))
 
     ml_r1 = clone(learner_r)
     if never_takers:
-        r_hat1_list = fit_predict_proba(d, x, ml_r1, r1_params, smpls,
-                                        train_cond=train_cond1)
+        r_hat1_list = fit_predict_proba(d, x, ml_r1, r1_params, smpls, train_cond=train_cond1)
     else:
         r_hat1_list = []
-        for (_, test_index) in smpls:
+        for _, test_index in smpls:
             r_hat1_list.append(np.ones_like(d[test_index]))
 
     return g_hat0_list, g_hat1_list, m_hat_list, r_hat0_list, r_hat1_list
 
 
-def tune_nuisance_iivm(y, x, d, z, ml_g, ml_m, ml_r, smpls, n_folds_tune,
-                       param_grid_g, param_grid_m, param_grid_r,
-                       always_takers=True, never_takers=True):
+def tune_nuisance_iivm(
+    y,
+    x,
+    d,
+    z,
+    ml_g,
+    ml_m,
+    ml_r,
+    smpls,
+    n_folds_tune,
+    param_grid_g,
+    param_grid_m,
+    param_grid_r,
+    always_takers=True,
+    never_takers=True,
+):
     train_cond0 = np.where(z == 0)[0]
-    g0_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune,
-                                   train_cond=train_cond0)
+    g0_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=train_cond0)
 
     train_cond1 = np.where(z == 1)[0]
-    g1_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune,
-                                   train_cond=train_cond1)
+    g1_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=train_cond1)
 
     m_tune_res = tune_grid_search(z, x, ml_m, smpls, param_grid_m, n_folds_tune)
 
     if always_takers:
-        r0_tune_res = tune_grid_search(d, x, ml_r, smpls, param_grid_r, n_folds_tune,
-                                       train_cond=train_cond0)
+        r0_tune_res = tune_grid_search(d, x, ml_r, smpls, param_grid_r, n_folds_tune, train_cond=train_cond0)
         r0_best_params = [xx.best_params_ for xx in r0_tune_res]
     else:
         r0_best_params = None
 
     if never_takers:
-        r1_tune_res = tune_grid_search(d, x, ml_r, smpls, param_grid_r, n_folds_tune,
-                                       train_cond=train_cond1)
+        r1_tune_res = tune_grid_search(d, x, ml_r, smpls, param_grid_r, n_folds_tune, train_cond=train_cond1)
         r1_best_params = [xx.best_params_ for xx in r1_tune_res]
     else:
         r1_best_params = None
@@ -131,15 +182,15 @@ def tune_nuisance_iivm(y, x, d, z, ml_g, ml_m, ml_r, smpls, n_folds_tune,
 
 
 def compute_iivm_residuals(y, d, g_hat0_list, g_hat1_list, m_hat_list, r_hat0_list, r_hat1_list, smpls):
-    u_hat0 = np.full_like(y, np.nan, dtype='float64')
-    u_hat1 = np.full_like(y, np.nan, dtype='float64')
-    w_hat0 = np.full_like(y, np.nan, dtype='float64')
-    w_hat1 = np.full_like(y, np.nan, dtype='float64')
-    g_hat0 = np.full_like(y, np.nan, dtype='float64')
-    g_hat1 = np.full_like(y, np.nan, dtype='float64')
-    r_hat0 = np.full_like(y, np.nan, dtype='float64')
-    r_hat1 = np.full_like(y, np.nan, dtype='float64')
-    m_hat = np.full_like(y, np.nan, dtype='float64')
+    u_hat0 = np.full_like(y, np.nan, dtype="float64")
+    u_hat1 = np.full_like(y, np.nan, dtype="float64")
+    w_hat0 = np.full_like(y, np.nan, dtype="float64")
+    w_hat1 = np.full_like(y, np.nan, dtype="float64")
+    g_hat0 = np.full_like(y, np.nan, dtype="float64")
+    g_hat1 = np.full_like(y, np.nan, dtype="float64")
+    r_hat0 = np.full_like(y, np.nan, dtype="float64")
+    r_hat1 = np.full_like(y, np.nan, dtype="float64")
+    m_hat = np.full_like(y, np.nan, dtype="float64")
     for idx, (_, test_index) in enumerate(smpls):
         u_hat0[test_index] = y[test_index] - g_hat0_list[idx]
         u_hat1[test_index] = y[test_index] - g_hat1_list[idx]
@@ -157,53 +208,87 @@ def compute_iivm_residuals(y, d, g_hat0_list, g_hat1_list, m_hat_list, r_hat0_li
 def iivm_dml2(y, x, d, z, g_hat0_list, g_hat1_list, m_hat_list, r_hat0_list, r_hat1_list, smpls, score, normalize_ipw):
     n_obs = len(y)
     u_hat0, u_hat1, w_hat0, w_hat1, g_hat0, g_hat1, m_hat, r_hat0, r_hat1 = compute_iivm_residuals(
-        y, d, g_hat0_list, g_hat1_list, m_hat_list, r_hat0_list, r_hat1_list, smpls)
+        y, d, g_hat0_list, g_hat1_list, m_hat_list, r_hat0_list, r_hat1_list, smpls
+    )
 
     if normalize_ipw:
         m_hat_adj = _normalize_ipw(m_hat, d)
     else:
         m_hat_adj = m_hat
 
-    theta_hat = iivm_orth(g_hat0, g_hat1, m_hat_adj, r_hat0, r_hat1,
-                          u_hat0, u_hat1, w_hat0, w_hat1, z, score)
-    se = np.sqrt(var_iivm(theta_hat, g_hat0, g_hat1,
-                          m_hat_adj, r_hat0, r_hat1,
-                          u_hat0, u_hat1, w_hat0, w_hat1,
-                          z, score, n_obs))
+    theta_hat = iivm_orth(g_hat0, g_hat1, m_hat_adj, r_hat0, r_hat1, u_hat0, u_hat1, w_hat0, w_hat1, z, score)
+    se = np.sqrt(
+        var_iivm(theta_hat, g_hat0, g_hat1, m_hat_adj, r_hat0, r_hat1, u_hat0, u_hat1, w_hat0, w_hat1, z, score, n_obs)
+    )
 
     return theta_hat, se
 
 
 def var_iivm(theta, g_hat0, g_hat1, m_hat, r_hat0, r_hat1, u_hat0, u_hat1, w_hat0, w_hat1, z, score, n_obs):
-    assert score == 'LATE'
-    var = 1/n_obs * np.mean(np.power(g_hat1 - g_hat0
-                                     + np.divide(np.multiply(z, u_hat1), m_hat)
-                                     - np.divide(np.multiply(1.-z, u_hat0), 1.-m_hat)
-                                     - theta*(r_hat1 - r_hat0
-                                              + np.divide(np.multiply(z, w_hat1), m_hat)
-                                              - np.divide(np.multiply(1.-z, w_hat0), 1.-m_hat)), 2)) \
-        / np.power(np.mean(r_hat1 - r_hat0
-                   + np.divide(np.multiply(z, w_hat1), m_hat)
-                   - np.divide(np.multiply(1.-z, w_hat0), 1.-m_hat)), 2)
+    assert score == "LATE"
+    var = (
+        1
+        / n_obs
+        * np.mean(
+            np.power(
+                g_hat1
+                - g_hat0
+                + np.divide(np.multiply(z, u_hat1), m_hat)
+                - np.divide(np.multiply(1.0 - z, u_hat0), 1.0 - m_hat)
+                - theta
+                * (
+                    r_hat1
+                    - r_hat0
+                    + np.divide(np.multiply(z, w_hat1), m_hat)
+                    - np.divide(np.multiply(1.0 - z, w_hat0), 1.0 - m_hat)
+                ),
+                2,
+            )
+        )
+        / np.power(
+            np.mean(
+                r_hat1
+                - r_hat0
+                + np.divide(np.multiply(z, w_hat1), m_hat)
+                - np.divide(np.multiply(1.0 - z, w_hat0), 1.0 - m_hat)
+            ),
+            2,
+        )
+    )
 
     return var
 
 
 def iivm_orth(g_hat0, g_hat1, m_hat, r_hat0, r_hat1, u_hat0, u_hat1, w_hat0, w_hat1, z, score):
-    assert score == 'LATE'
-    res = np.mean(g_hat1 - g_hat0
-                  + np.divide(np.multiply(z, u_hat1), m_hat)
-                  - np.divide(np.multiply(1.-z, u_hat0), 1.-m_hat)) \
-        / np.mean(r_hat1 - r_hat0
-                  + np.divide(np.multiply(z, w_hat1), m_hat)
-                  - np.divide(np.multiply(1.-z, w_hat0), 1.-m_hat))
+    assert score == "LATE"
+    res = np.mean(
+        g_hat1 - g_hat0 + np.divide(np.multiply(z, u_hat1), m_hat) - np.divide(np.multiply(1.0 - z, u_hat0), 1.0 - m_hat)
+    ) / np.mean(
+        r_hat1 - r_hat0 + np.divide(np.multiply(z, w_hat1), m_hat) - np.divide(np.multiply(1.0 - z, w_hat0), 1.0 - m_hat)
+    )
 
     return res
 
 
-def boot_iivm(y, d, z, thetas, ses, all_g_hat0, all_g_hat1, all_m_hat, all_r_hat0, all_r_hat1,
-              all_smpls, score, bootstrap, n_rep_boot,
-              n_rep=1, apply_cross_fitting=True, normalize_ipw=True):
+def boot_iivm(
+    y,
+    d,
+    z,
+    thetas,
+    ses,
+    all_g_hat0,
+    all_g_hat1,
+    all_m_hat,
+    all_r_hat0,
+    all_r_hat1,
+    all_smpls,
+    score,
+    bootstrap,
+    n_rep_boot,
+    n_rep=1,
+    apply_cross_fitting=True,
+    normalize_ipw=True,
+):
     all_boot_t_stat = list()
     for i_rep in range(n_rep):
         smpls = all_smpls[i_rep]
@@ -214,9 +299,23 @@ def boot_iivm(y, d, z, thetas, ses, all_g_hat0, all_g_hat1, all_m_hat, all_r_hat
             n_obs = len(test_index)
         weights = draw_weights(bootstrap, n_rep_boot, n_obs)
         boot_t_stat = boot_iivm_single_split(
-            thetas[i_rep], y, d, z,
-            all_g_hat0[i_rep], all_g_hat1[i_rep], all_m_hat[i_rep], all_r_hat0[i_rep], all_r_hat1[i_rep],
-            smpls, score, ses[i_rep], weights, n_rep_boot, apply_cross_fitting, normalize_ipw)
+            thetas[i_rep],
+            y,
+            d,
+            z,
+            all_g_hat0[i_rep],
+            all_g_hat1[i_rep],
+            all_m_hat[i_rep],
+            all_r_hat0[i_rep],
+            all_r_hat1[i_rep],
+            smpls,
+            score,
+            ses[i_rep],
+            weights,
+            n_rep_boot,
+            apply_cross_fitting,
+            normalize_ipw,
+        )
         all_boot_t_stat.append(boot_t_stat)
 
     boot_t_stat = np.hstack(all_boot_t_stat)
@@ -224,11 +323,28 @@ def boot_iivm(y, d, z, thetas, ses, all_g_hat0, all_g_hat1, all_m_hat, all_r_hat
     return boot_t_stat
 
 
-def boot_iivm_single_split(theta, y, d, z, g_hat0_list, g_hat1_list, m_hat_list, r_hat0_list, r_hat1_list,
-                           smpls, score, se, weights, n_rep, apply_cross_fitting, normalize_ipw):
-    assert score == 'LATE'
+def boot_iivm_single_split(
+    theta,
+    y,
+    d,
+    z,
+    g_hat0_list,
+    g_hat1_list,
+    m_hat_list,
+    r_hat0_list,
+    r_hat1_list,
+    smpls,
+    score,
+    se,
+    weights,
+    n_rep,
+    apply_cross_fitting,
+    normalize_ipw,
+):
+    assert score == "LATE"
     u_hat0, u_hat1, w_hat0, w_hat1, g_hat0, g_hat1, m_hat, r_hat0, r_hat1 = compute_iivm_residuals(
-        y, d, g_hat0_list, g_hat1_list, m_hat_list, r_hat0_list, r_hat1_list, smpls)
+        y, d, g_hat0_list, g_hat1_list, m_hat_list, r_hat0_list, r_hat1_list, smpls
+    )
 
     if normalize_ipw:
         m_hat_adj = _normalize_ipw(m_hat, d)
@@ -236,22 +352,38 @@ def boot_iivm_single_split(theta, y, d, z, g_hat0_list, g_hat1_list, m_hat_list,
         m_hat_adj = m_hat
 
     if apply_cross_fitting:
-        J = np.mean(-(r_hat1 - r_hat0
-                      + np.divide(np.multiply(z, w_hat1), m_hat_adj)
-                      - np.divide(np.multiply(1. - z, w_hat0), 1. - m_hat_adj)))
+        J = np.mean(
+            -(
+                r_hat1
+                - r_hat0
+                + np.divide(np.multiply(z, w_hat1), m_hat_adj)
+                - np.divide(np.multiply(1.0 - z, w_hat0), 1.0 - m_hat_adj)
+            )
+        )
     else:
         test_index = smpls[0][1]
-        J = np.mean(-(r_hat1[test_index] - r_hat0[test_index]
-                      + np.divide(np.multiply(z[test_index], w_hat1[test_index]), m_hat_adj[test_index])
-                      - np.divide(np.multiply(1. - z[test_index], w_hat0[test_index]),
-                                  1. - m_hat_adj[test_index])))
-
-    psi = g_hat1 - g_hat0 \
-        + np.divide(np.multiply(z, u_hat1), m_hat_adj) \
-        - np.divide(np.multiply(1.-z, u_hat0), 1.-m_hat_adj) \
-        - theta*(r_hat1 - r_hat0
-                 + np.divide(np.multiply(z, w_hat1), m_hat_adj)
-                 - np.divide(np.multiply(1.-z, w_hat0), 1.-m_hat_adj))
+        J = np.mean(
+            -(
+                r_hat1[test_index]
+                - r_hat0[test_index]
+                + np.divide(np.multiply(z[test_index], w_hat1[test_index]), m_hat_adj[test_index])
+                - np.divide(np.multiply(1.0 - z[test_index], w_hat0[test_index]), 1.0 - m_hat_adj[test_index])
+            )
+        )
+
+    psi = (
+        g_hat1
+        - g_hat0
+        + np.divide(np.multiply(z, u_hat1), m_hat_adj)
+        - np.divide(np.multiply(1.0 - z, u_hat0), 1.0 - m_hat_adj)
+        - theta
+        * (
+            r_hat1
+            - r_hat0
+            + np.divide(np.multiply(z, w_hat1), m_hat_adj)
+            - np.divide(np.multiply(1.0 - z, w_hat0), 1.0 - m_hat_adj)
+        )
+    )
 
     boot_t_stat = boot_manual(psi, J, smpls, se, weights, n_rep, apply_cross_fitting)
 
diff --git a/doubleml/irm/tests/_utils_irm_manual.py b/doubleml/irm/tests/_utils_irm_manual.py
index 5fbdd174c..27f1a8390 100644
--- a/doubleml/irm/tests/_utils_irm_manual.py
+++ b/doubleml/irm/tests/_utils_irm_manual.py
@@ -1,17 +1,27 @@
 import numpy as np
 from sklearn.base import clone, is_classifier
 
-from ...tests._utils_boot import boot_manual, draw_weights
 from ...tests._utils import fit_predict, fit_predict_proba, tune_grid_search
-
-from ...utils._estimation import _normalize_ipw
+from ...tests._utils_boot import boot_manual, draw_weights
 from ...utils._checks import _check_is_propensity
+from ...utils._estimation import _normalize_ipw
 
 
-def fit_irm(y, x, d,
-            learner_g, learner_m, all_smpls, score,
-            n_rep=1, g0_params=None, g1_params=None, m_params=None,
-            normalize_ipw=True, trimming_threshold=1e-2):
+def fit_irm(
+    y,
+    x,
+    d,
+    learner_g,
+    learner_m,
+    all_smpls,
+    score,
+    n_rep=1,
+    g0_params=None,
+    g1_params=None,
+    m_params=None,
+    normalize_ipw=True,
+    trimming_threshold=1e-2,
+):
     n_obs = len(y)
 
     thetas = np.zeros(n_rep)
@@ -23,55 +33,63 @@ def fit_irm(y, x, d,
     for i_rep in range(n_rep):
         smpls = all_smpls[i_rep]
 
-        g_hat0, g_hat1, m_hat, p_hat = fit_nuisance_irm(y, x, d,
-                                                        learner_g, learner_m, smpls,
-                                                        score,
-                                                        g0_params=g0_params, g1_params=g1_params, m_params=m_params,
-                                                        trimming_threshold=trimming_threshold)
+        g_hat0, g_hat1, m_hat, p_hat = fit_nuisance_irm(
+            y,
+            x,
+            d,
+            learner_g,
+            learner_m,
+            smpls,
+            score,
+            g0_params=g0_params,
+            g1_params=g1_params,
+            m_params=m_params,
+            trimming_threshold=trimming_threshold,
+        )
 
         all_g_hat0.append(g_hat0)
         all_g_hat1.append(g_hat1)
         all_m_hat.append(m_hat)
         all_p_hat.append(p_hat)
 
-        thetas[i_rep], ses[i_rep] = irm_dml2(y, x, d,
-                                             g_hat0, g_hat1, m_hat, p_hat,
-                                             smpls, score, normalize_ipw)
+        thetas[i_rep], ses[i_rep] = irm_dml2(y, x, d, g_hat0, g_hat1, m_hat, p_hat, smpls, score, normalize_ipw)
 
     theta = np.median(thetas)
     se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs)
 
-    res = {'theta': theta, 'se': se,
-           'thetas': thetas, 'ses': ses,
-           'all_g_hat0': all_g_hat0, 'all_g_hat1': all_g_hat1, 'all_m_hat': all_m_hat, 'all_p_hat': all_p_hat}
+    res = {
+        "theta": theta,
+        "se": se,
+        "thetas": thetas,
+        "ses": ses,
+        "all_g_hat0": all_g_hat0,
+        "all_g_hat1": all_g_hat1,
+        "all_m_hat": all_m_hat,
+        "all_p_hat": all_p_hat,
+    }
 
     return res
 
 
-def fit_nuisance_irm(y, x, d, learner_g, learner_m, smpls, score,
-                     g0_params=None, g1_params=None, m_params=None,
-                     trimming_threshold=1e-12):
+def fit_nuisance_irm(
+    y, x, d, learner_g, learner_m, smpls, score, g0_params=None, g1_params=None, m_params=None, trimming_threshold=1e-12
+):
     ml_g0 = clone(learner_g)
     ml_g1 = clone(learner_g)
     train_cond0 = np.where(d == 0)[0]
     if is_classifier(learner_g):
-        g_hat0_list = fit_predict_proba(y, x, ml_g0, g0_params, smpls,
-                                        train_cond=train_cond0)
+        g_hat0_list = fit_predict_proba(y, x, ml_g0, g0_params, smpls, train_cond=train_cond0)
     else:
-        g_hat0_list = fit_predict(y, x, ml_g0, g0_params, smpls,
-                                  train_cond=train_cond0)
+        g_hat0_list = fit_predict(y, x, ml_g0, g0_params, smpls, train_cond=train_cond0)
 
     train_cond1 = np.where(d == 1)[0]
     if is_classifier(learner_g):
-        g_hat1_list = fit_predict_proba(y, x, ml_g1, g1_params, smpls,
-                                        train_cond=train_cond1)
+        g_hat1_list = fit_predict_proba(y, x, ml_g1, g1_params, smpls, train_cond=train_cond1)
     else:
-        g_hat1_list = fit_predict(y, x, ml_g1, g1_params, smpls,
-                                  train_cond=train_cond1)
+        g_hat1_list = fit_predict(y, x, ml_g1, g1_params, smpls, train_cond=train_cond1)
 
     ml_m = clone(learner_m)
-    m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls,
-                                   trimming_threshold=trimming_threshold)
+    m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold)
 
     p_hat_list = []
     for _ in smpls:
@@ -80,15 +98,12 @@ def fit_nuisance_irm(y, x, d, learner_g, learner_m, smpls, score,
     return g_hat0_list, g_hat1_list, m_hat_list, p_hat_list
 
 
-def tune_nuisance_irm(y, x, d, ml_g, ml_m, smpls, score, n_folds_tune,
-                      param_grid_g, param_grid_m):
+def tune_nuisance_irm(y, x, d, ml_g, ml_m, smpls, score, n_folds_tune, param_grid_g, param_grid_m):
     train_cond0 = np.where(d == 0)[0]
-    g0_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune,
-                                   train_cond=train_cond0)
+    g0_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=train_cond0)
 
     train_cond1 = np.where(d == 1)[0]
-    g1_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune,
-                                   train_cond=train_cond1)
+    g1_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=train_cond1)
 
     m_tune_res = tune_grid_search(d, x, ml_m, smpls, param_grid_m, n_folds_tune)
 
@@ -100,12 +115,12 @@ def tune_nuisance_irm(y, x, d, ml_g, ml_m, smpls, score, n_folds_tune,
 
 
 def compute_iivm_residuals(y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls):
-    u_hat0 = np.full_like(y, np.nan, dtype='float64')
-    u_hat1 = np.full_like(y, np.nan, dtype='float64')
-    g_hat0 = np.full_like(y, np.nan, dtype='float64')
-    g_hat1 = np.full_like(y, np.nan, dtype='float64')
-    m_hat = np.full_like(y, np.nan, dtype='float64')
-    p_hat = np.full_like(y, np.nan, dtype='float64')
+    u_hat0 = np.full_like(y, np.nan, dtype="float64")
+    u_hat1 = np.full_like(y, np.nan, dtype="float64")
+    g_hat0 = np.full_like(y, np.nan, dtype="float64")
+    g_hat1 = np.full_like(y, np.nan, dtype="float64")
+    m_hat = np.full_like(y, np.nan, dtype="float64")
+    p_hat = np.full_like(y, np.nan, dtype="float64")
     for idx, (_, test_index) in enumerate(smpls):
         u_hat0[test_index] = y[test_index] - g_hat0_list[idx]
         u_hat1[test_index] = y[test_index] - g_hat1_list[idx]
@@ -114,66 +129,94 @@ def compute_iivm_residuals(y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list,
         m_hat[test_index] = m_hat_list[idx]
         p_hat[test_index] = p_hat_list[idx]
 
-    _check_is_propensity(m_hat, 'learner_m', 'ml_m', smpls, eps=1e-12)
+    _check_is_propensity(m_hat, "learner_m", "ml_m", smpls, eps=1e-12)
     return u_hat0, u_hat1, g_hat0, g_hat1, m_hat, p_hat
 
 
 def irm_dml2(y, x, d, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls, score, normalize_ipw):
     n_obs = len(y)
     u_hat0, u_hat1, g_hat0, g_hat1, m_hat, p_hat = compute_iivm_residuals(
-        y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls)
+        y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls
+    )
 
     if normalize_ipw:
         m_hat_adj = _normalize_ipw(m_hat, d)
     else:
         m_hat_adj = m_hat
 
-    theta_hat = irm_orth(g_hat0, g_hat1, m_hat_adj, p_hat,
-                         u_hat0, u_hat1, d, score)
-    se = np.sqrt(var_irm(theta_hat, g_hat0, g_hat1,
-                         m_hat_adj, p_hat,
-                         u_hat0, u_hat1,
-                         d, score, n_obs))
+    theta_hat = irm_orth(g_hat0, g_hat1, m_hat_adj, p_hat, u_hat0, u_hat1, d, score)
+    se = np.sqrt(var_irm(theta_hat, g_hat0, g_hat1, m_hat_adj, p_hat, u_hat0, u_hat1, d, score, n_obs))
 
     return theta_hat, se
 
 
 def var_irm(theta, g_hat0, g_hat1, m_hat, p_hat, u_hat0, u_hat1, d, score, n_obs):
-
-    if score == 'ATE':
-        var = 1/n_obs * np.mean(np.power(g_hat1 - g_hat0
-                                         + np.divide(np.multiply(d, u_hat1), m_hat)
-                                         - np.divide(np.multiply(1.-d, u_hat0), 1.-m_hat) - theta, 2))
+    if score == "ATE":
+        var = (
+            1
+            / n_obs
+            * np.mean(
+                np.power(
+                    g_hat1
+                    - g_hat0
+                    + np.divide(np.multiply(d, u_hat1), m_hat)
+                    - np.divide(np.multiply(1.0 - d, u_hat0), 1.0 - m_hat)
+                    - theta,
+                    2,
+                )
+            )
+        )
     else:
-        assert score == 'ATTE'
-        var = 1/n_obs * np.mean(np.power(np.divide(np.multiply(d, u_hat0), p_hat)
-                                         - np.divide(np.multiply(m_hat, np.multiply(1.-d, u_hat0)),
-                                                     np.multiply(p_hat, (1.-m_hat)))
-                                         - theta * np.divide(d, p_hat), 2)) \
+        assert score == "ATTE"
+        var = (
+            1
+            / n_obs
+            * np.mean(
+                np.power(
+                    np.divide(np.multiply(d, u_hat0), p_hat)
+                    - np.divide(np.multiply(m_hat, np.multiply(1.0 - d, u_hat0)), np.multiply(p_hat, (1.0 - m_hat)))
+                    - theta * np.divide(d, p_hat),
+                    2,
+                )
+            )
             / np.power(np.mean(np.divide(d, p_hat)), 2)
+        )
 
     return var
 
 
 def irm_orth(g_hat0, g_hat1, m_hat, p_hat, u_hat0, u_hat1, d, score):
-
-    if score == 'ATE':
-        res = np.mean(g_hat1 - g_hat0
-                      + np.divide(np.multiply(d, u_hat1), m_hat)
-                      - np.divide(np.multiply(1.-d, u_hat0), 1.-m_hat))
+    if score == "ATE":
+        res = np.mean(
+            g_hat1 - g_hat0 + np.divide(np.multiply(d, u_hat1), m_hat) - np.divide(np.multiply(1.0 - d, u_hat0), 1.0 - m_hat)
+        )
     else:
-        assert score == 'ATTE'
-        res = np.mean(np.divide(np.multiply(d, u_hat0), p_hat)
-                      - np.divide(np.multiply(m_hat, np.multiply(1.-d, u_hat0)),
-                                  np.multiply(p_hat, (1.-m_hat)))) \
-            / np.mean(np.divide(d, p_hat))
+        assert score == "ATTE"
+        res = np.mean(
+            np.divide(np.multiply(d, u_hat0), p_hat)
+            - np.divide(np.multiply(m_hat, np.multiply(1.0 - d, u_hat0)), np.multiply(p_hat, (1.0 - m_hat)))
+        ) / np.mean(np.divide(d, p_hat))
 
     return res
 
 
-def boot_irm(y, d, thetas, ses, all_g_hat0, all_g_hat1, all_m_hat, all_p_hat,
-             all_smpls, score, bootstrap, n_rep_boot,
-             n_rep=1, apply_cross_fitting=True, normalize_ipw=True):
+def boot_irm(
+    y,
+    d,
+    thetas,
+    ses,
+    all_g_hat0,
+    all_g_hat1,
+    all_m_hat,
+    all_p_hat,
+    all_smpls,
+    score,
+    bootstrap,
+    n_rep_boot,
+    n_rep=1,
+    apply_cross_fitting=True,
+    normalize_ipw=True,
+):
     all_boot_t_stat = list()
     for i_rep in range(n_rep):
         smpls = all_smpls[i_rep]
@@ -184,9 +227,21 @@ def boot_irm(y, d, thetas, ses, all_g_hat0, all_g_hat1, all_m_hat, all_p_hat,
             n_obs = len(test_index)
         weights = draw_weights(bootstrap, n_rep_boot, n_obs)
         boot_t_stat = boot_irm_single_split(
-            thetas[i_rep], y, d,
-            all_g_hat0[i_rep], all_g_hat1[i_rep], all_m_hat[i_rep], all_p_hat[i_rep], smpls,
-            score, ses[i_rep], weights, n_rep_boot, apply_cross_fitting, normalize_ipw)
+            thetas[i_rep],
+            y,
+            d,
+            all_g_hat0[i_rep],
+            all_g_hat1[i_rep],
+            all_m_hat[i_rep],
+            all_p_hat[i_rep],
+            smpls,
+            score,
+            ses[i_rep],
+            weights,
+            n_rep_boot,
+            apply_cross_fitting,
+            normalize_ipw,
+        )
         all_boot_t_stat.append(boot_t_stat)
 
     boot_t_stat = np.hstack(all_boot_t_stat)
@@ -194,41 +249,61 @@ def boot_irm(y, d, thetas, ses, all_g_hat0, all_g_hat1, all_m_hat, all_p_hat,
     return boot_t_stat
 
 
-def boot_irm_single_split(theta, y, d, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list,
-                          smpls, score, se, weights, n_rep_boot, apply_cross_fitting, normalize_ipw):
+def boot_irm_single_split(
+    theta,
+    y,
+    d,
+    g_hat0_list,
+    g_hat1_list,
+    m_hat_list,
+    p_hat_list,
+    smpls,
+    score,
+    se,
+    weights,
+    n_rep_boot,
+    apply_cross_fitting,
+    normalize_ipw,
+):
     u_hat0, u_hat1, g_hat0, g_hat1, m_hat, p_hat = compute_iivm_residuals(
-        y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls)
+        y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls
+    )
 
-    m_hat_adj = np.full_like(m_hat, np.nan, dtype='float64')
+    m_hat_adj = np.full_like(m_hat, np.nan, dtype="float64")
     if normalize_ipw:
         m_hat_adj = _normalize_ipw(m_hat, d)
     else:
         m_hat_adj = m_hat
 
     if apply_cross_fitting:
-        if score == 'ATE':
+        if score == "ATE":
             J = -1.0
         else:
-            assert score == 'ATTE'
+            assert score == "ATTE"
             J = np.mean(-np.divide(d, p_hat))
     else:
         test_index = smpls[0][1]
-        if score == 'ATE':
+        if score == "ATE":
             J = -1.0
         else:
-            assert score == 'ATTE'
+            assert score == "ATTE"
             J = np.mean(-np.divide(d[test_index], p_hat[test_index]))
 
-    if score == 'ATE':
-        psi = g_hat1 - g_hat0 \
-                + np.divide(np.multiply(d, u_hat1), m_hat_adj) \
-                - np.divide(np.multiply(1.-d, u_hat0), 1.-m_hat_adj) - theta
+    if score == "ATE":
+        psi = (
+            g_hat1
+            - g_hat0
+            + np.divide(np.multiply(d, u_hat1), m_hat_adj)
+            - np.divide(np.multiply(1.0 - d, u_hat0), 1.0 - m_hat_adj)
+            - theta
+        )
     else:
-        assert score == 'ATTE'
-        psi = np.divide(np.multiply(d, u_hat0), p_hat) \
-            - np.divide(np.multiply(m_hat_adj, np.multiply(1.-d, u_hat0)),
-                        np.multiply(p_hat, (1.-m_hat_adj))) \
+        assert score == "ATTE"
+        psi = (
+            np.divide(np.multiply(d, u_hat0), p_hat)
+            - np.divide(np.multiply(m_hat_adj, np.multiply(1.0 - d, u_hat0)), np.multiply(p_hat, (1.0 - m_hat_adj)))
             - theta * np.divide(d, p_hat)
+        )
 
     boot_t_stat = boot_manual(psi, J, smpls, se, weights, n_rep_boot, apply_cross_fitting)
 
@@ -245,33 +320,29 @@ def fit_sensitivity_elements_irm(y, d, all_coef, predictions, score, n_rep):
     psi_nu2 = np.full(shape=(n_obs, n_rep, n_treat), fill_value=np.nan)
 
     for i_rep in range(n_rep):
+        m_hat = predictions["ml_m"][:, i_rep, 0]
+        g_hat0 = predictions["ml_g0"][:, i_rep, 0]
+        g_hat1 = predictions["ml_g1"][:, i_rep, 0]
 
-        m_hat = predictions['ml_m'][:, i_rep, 0]
-        g_hat0 = predictions['ml_g0'][:, i_rep, 0]
-        g_hat1 = predictions['ml_g1'][:, i_rep, 0]
-
-        if score == 'ATE':
+        if score == "ATE":
             weights = np.ones_like(d)
             weights_bar = np.ones_like(d)
         else:
-            assert score == 'ATTE'
+            assert score == "ATTE"
             weights = np.divide(d, np.mean(d))
             weights_bar = np.divide(m_hat, np.mean(d))
 
-        sigma2_score_element = np.square(y - np.multiply(d, g_hat1) - np.multiply(1.0-d, g_hat0))
+        sigma2_score_element = np.square(y - np.multiply(d, g_hat1) - np.multiply(1.0 - d, g_hat0))
         sigma2[0, i_rep, 0] = np.mean(sigma2_score_element)
         psi_sigma2[:, i_rep, 0] = sigma2_score_element - sigma2[0, i_rep, 0]
 
         # calc m(W,alpha) and Riesz representer
-        m_alpha = np.multiply(weights, np.multiply(weights_bar, (np.divide(1.0, m_hat) + np.divide(1.0, 1.0-m_hat))))
-        rr = np.multiply(weights_bar, (np.divide(d, m_hat) - np.divide(1.0-d, 1.0-m_hat)))
+        m_alpha = np.multiply(weights, np.multiply(weights_bar, (np.divide(1.0, m_hat) + np.divide(1.0, 1.0 - m_hat))))
+        rr = np.multiply(weights_bar, (np.divide(d, m_hat) - np.divide(1.0 - d, 1.0 - m_hat)))
 
         nu2_score_element = np.multiply(2.0, m_alpha) - np.square(rr)
         nu2[0, i_rep, 0] = np.mean(nu2_score_element)
         psi_nu2[:, i_rep, 0] = nu2_score_element - nu2[0, i_rep, 0]
 
-    element_dict = {'sigma2': sigma2,
-                    'nu2': nu2,
-                    'psi_sigma2': psi_sigma2,
-                    'psi_nu2': psi_nu2}
+    element_dict = {"sigma2": sigma2, "nu2": nu2, "psi_sigma2": psi_sigma2, "psi_nu2": psi_nu2}
     return element_dict
diff --git a/doubleml/irm/tests/_utils_lpq_manual.py b/doubleml/irm/tests/_utils_lpq_manual.py
index ce401f26b..e24b5e92f 100644
--- a/doubleml/irm/tests/_utils_lpq_manual.py
+++ b/doubleml/irm/tests/_utils_lpq_manual.py
@@ -1,20 +1,33 @@
 import numpy as np
-from sklearn.base import clone
-from sklearn.model_selection import train_test_split, StratifiedKFold
 from scipy.optimize import root_scalar
+from sklearn.base import clone
+from sklearn.model_selection import StratifiedKFold, train_test_split
 
 from ...tests._utils import tune_grid_search
-from ...utils._estimation import _dml_cv_predict, _trimm, _default_kde, _normalize_ipw, _get_bracket_guess, _solve_ipw_score
-
-
-def fit_lpq(y, x, d, z, quantile,
-            learner_g, learner_m, all_smpls, treatment, n_rep=1,
-            trimming_rule='truncate',
-            trimming_threshold=1e-2,
-            kde=_default_kde,
-            normalize_ipw=True, m_z_params=None,
-            m_d_z0_params=None, m_d_z1_params=None,
-            g_du_z0_params=None, g_du_z1_params=None):
+from ...utils._estimation import _default_kde, _dml_cv_predict, _get_bracket_guess, _normalize_ipw, _solve_ipw_score, _trimm
+
+
+def fit_lpq(
+    y,
+    x,
+    d,
+    z,
+    quantile,
+    learner_g,
+    learner_m,
+    all_smpls,
+    treatment,
+    n_rep=1,
+    trimming_rule="truncate",
+    trimming_threshold=1e-2,
+    kde=_default_kde,
+    normalize_ipw=True,
+    m_z_params=None,
+    m_d_z0_params=None,
+    m_d_z1_params=None,
+    g_du_z0_params=None,
+    g_du_z1_params=None,
+):
     n_obs = len(y)
 
     lpqs = np.zeros(n_rep)
@@ -23,34 +36,57 @@ def fit_lpq(y, x, d, z, quantile,
     for i_rep in range(n_rep):
         smpls = all_smpls[i_rep]
 
-        m_z_hat, g_du_z0_hat, g_du_z1_hat, \
-            comp_prob_hat, ipw_vec, coef_bounds = fit_nuisance_lpq(y, x, d, z, quantile,
-                                                                   learner_g, learner_m, smpls,
-                                                                   treatment,
-                                                                   trimming_rule=trimming_rule,
-                                                                   trimming_threshold=trimming_threshold,
-                                                                   normalize_ipw=normalize_ipw,
-                                                                   m_z_params=m_z_params,
-                                                                   m_d_z0_params=m_d_z0_params,
-                                                                   m_d_z1_params=m_d_z1_params,
-                                                                   g_du_z0_params=g_du_z0_params,
-                                                                   g_du_z1_params=g_du_z1_params)
-
-        lpqs[i_rep], ses[i_rep] = lpq_dml2(y, d, z, m_z_hat, g_du_z0_hat, g_du_z1_hat, comp_prob_hat,
-                                           treatment, quantile, ipw_vec, coef_bounds, kde)
+        m_z_hat, g_du_z0_hat, g_du_z1_hat, comp_prob_hat, ipw_vec, coef_bounds = fit_nuisance_lpq(
+            y,
+            x,
+            d,
+            z,
+            quantile,
+            learner_g,
+            learner_m,
+            smpls,
+            treatment,
+            trimming_rule=trimming_rule,
+            trimming_threshold=trimming_threshold,
+            normalize_ipw=normalize_ipw,
+            m_z_params=m_z_params,
+            m_d_z0_params=m_d_z0_params,
+            m_d_z1_params=m_d_z1_params,
+            g_du_z0_params=g_du_z0_params,
+            g_du_z1_params=g_du_z1_params,
+        )
+
+        lpqs[i_rep], ses[i_rep] = lpq_dml2(
+            y, d, z, m_z_hat, g_du_z0_hat, g_du_z1_hat, comp_prob_hat, treatment, quantile, ipw_vec, coef_bounds, kde
+        )
 
     lpq = np.median(lpqs)
     se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(lpqs - lpq, 2)) / n_obs)
 
-    res = {'lpq': lpq, 'se': se,
-           'lpqs': lpqs, 'ses': ses}
+    res = {"lpq": lpq, "se": se, "lpqs": lpqs, "ses": ses}
 
     return res
 
 
-def fit_nuisance_lpq(y, x, d, z, quantile, learner_g, learner_m, smpls, treatment,
-                     trimming_rule, trimming_threshold, normalize_ipw, m_z_params,
-                     m_d_z0_params, m_d_z1_params, g_du_z0_params, g_du_z1_params):
+def fit_nuisance_lpq(
+    y,
+    x,
+    d,
+    z,
+    quantile,
+    learner_g,
+    learner_m,
+    smpls,
+    treatment,
+    trimming_rule,
+    trimming_threshold,
+    normalize_ipw,
+    m_z_params,
+    m_d_z0_params,
+    m_d_z1_params,
+    g_du_z0_params,
+    g_du_z1_params,
+):
     n_folds = len(smpls)
     n_obs = len(y)
     # initialize starting values and bounds
@@ -90,10 +126,10 @@ def fit_nuisance_lpq(y, x, d, z, quantile, learner_g, learner_m, smpls, treatmen
         test_inds = smpls[i_fold][1]
 
         # start nested crossfitting
-        train_inds_1, train_inds_2 = train_test_split(train_inds, test_size=0.5,
-                                                      random_state=42, stratify=strata[train_inds])
-        smpls_prelim = [(train, test) for train, test in
-                        StratifiedKFold(n_splits=n_folds).split(X=train_inds_1, y=strata[train_inds_1])]
+        train_inds_1, train_inds_2 = train_test_split(train_inds, test_size=0.5, random_state=42, stratify=strata[train_inds])
+        smpls_prelim = [
+            (train, test) for train, test in StratifiedKFold(n_splits=n_folds).split(X=train_inds_1, y=strata[train_inds_1])
+        ]
 
         d_train_1 = d[train_inds_1]
         y_train_1 = y[train_inds_1]
@@ -103,8 +139,9 @@ def fit_nuisance_lpq(y, x, d, z, quantile, learner_g, learner_m, smpls, treatmen
         # preliminary propensity for z
         # todo change prediction method
         ml_m_z_prelim = clone(ml_m_z)
-        m_z_hat_prelim = _dml_cv_predict(ml_m_z_prelim, x_train_1, z_train_1,
-                                         method='predict_proba', smpls=smpls_prelim)['preds']
+        m_z_hat_prelim = _dml_cv_predict(ml_m_z_prelim, x_train_1, z_train_1, method="predict_proba", smpls=smpls_prelim)[
+            "preds"
+        ]
 
         m_z_hat_prelim = _trimm(m_z_hat_prelim, trimming_rule, trimming_threshold)
         if normalize_ipw:
@@ -125,15 +162,18 @@ def fit_nuisance_lpq(y, x, d, z, quantile, learner_g, learner_m, smpls, treatmen
         m_d_z1_hat_prelim = ml_m_d_z1_prelim.predict_proba(x_train_1)[:, 1]
 
         # preliminary estimate of theta_2_aux
-        comp_prob_prelim = np.mean(m_d_z1_hat_prelim - m_d_z0_hat_prelim
-                                   + z_train_1 / m_z_hat_prelim * (d_train_1 - m_d_z1_hat_prelim)
-                                   - (1 - z_train_1) / (1 - m_z_hat_prelim) * (d_train_1 - m_d_z0_hat_prelim))
+        comp_prob_prelim = np.mean(
+            m_d_z1_hat_prelim
+            - m_d_z0_hat_prelim
+            + z_train_1 / m_z_hat_prelim * (d_train_1 - m_d_z1_hat_prelim)
+            - (1 - z_train_1) / (1 - m_z_hat_prelim) * (d_train_1 - m_d_z0_hat_prelim)
+        )
 
         def ipw_score(theta):
             sign = 2 * treatment - 1.0
             weights = sign * (z_train_1 / m_z_hat_prelim - (1 - z_train_1) / (1 - m_z_hat_prelim)) / comp_prob_prelim
             u = (d_train_1 == treatment) * (y_train_1 <= theta)
-            v = -1. * quantile
+            v = -1.0 * quantile
             res = np.mean(weights * u + v)
             return res
 
@@ -187,9 +227,9 @@ def ipw_score(theta):
         m_z_hat = _normalize_ipw(m_z_hat, z)
 
     # estimate final nuisance parameter
-    comp_prob_hat = np.mean(m_d_z1_hat - m_d_z0_hat
-                            + z / m_z_hat * (d - m_d_z1_hat)
-                            - (1 - z) / (1 - m_z_hat) * (d - m_d_z0_hat))
+    comp_prob_hat = np.mean(
+        m_d_z1_hat - m_d_z0_hat + z / m_z_hat * (d - m_d_z1_hat) - (1 - z) / (1 - m_z_hat) * (d - m_d_z0_hat)
+    )
     return m_z_hat, g_du_z0_hat, g_du_z1_hat, comp_prob_hat, ipw_vec, coef_bounds
 
 
@@ -204,7 +244,6 @@ def lpq_dml2(y, d, z, m_z, g_du_z0, g_du_z1, comp_prob, treatment, quantile, ipw
 
 
 def lpq_est(m_z, g_du_z0, g_du_z1, comp_prob, d, y, z, treatment, quantile, ipw_est, coef_bounds):
-
     def compute_score(coef):
         sign = 2 * treatment - 1.0
         score1 = g_du_z1 - g_du_z0
@@ -227,14 +266,12 @@ def get_bracket_guess(coef_start, coef_bounds):
             b_guess = (a, b)
             f_a = compute_score(b_guess[0])
             f_b = compute_score(b_guess[1])
-            s_different = (np.sign(f_a) != np.sign(f_b))
+            s_different = np.sign(f_a) != np.sign(f_b)
             delta += 0.1
         return s_different, b_guess
 
     _, bracket_guess = get_bracket_guess(ipw_est, coef_bounds)
-    root_res = root_scalar(compute_score_mean,
-                           bracket=bracket_guess,
-                           method='brentq')
+    root_res = root_scalar(compute_score_mean, bracket=bracket_guess, method="brentq")
     dml_est = root_res.root
     return dml_est
 
@@ -251,15 +288,30 @@ def lpq_var_est(coef, m_z, g_du_z0, g_du_z1, comp_prob, d, y, z, treatment, quan
     score2 = (z / m_z) * ((d == treatment) * (y <= coef) - g_du_z1)
     score3 = (1 - z) / (1 - m_z) * ((d == treatment) * (y <= coef) - g_du_z0)
     score = sign * (score1 + score2 - score3) / comp_prob - quantile
-    var_est = 1/n_obs * np.mean(np.square(score)) / np.square(J)
+    var_est = 1 / n_obs * np.mean(np.square(score)) / np.square(J)
     return var_est
 
 
-def tune_nuisance_lpq(y, x, d, z,
-                      ml_m_z, ml_m_d_z0, ml_m_d_z1, ml_g_du_z0, ml_g_du_z1,
-                      smpls, treatment, quantile, n_folds_tune,
-                      param_grid_m_z, param_grid_m_d_z0, param_grid_m_d_z1,
-                      param_grid_g_du_z0, param_grid_g_du_z1):
+def tune_nuisance_lpq(
+    y,
+    x,
+    d,
+    z,
+    ml_m_z,
+    ml_m_d_z0,
+    ml_m_d_z1,
+    ml_g_du_z0,
+    ml_g_du_z1,
+    smpls,
+    treatment,
+    quantile,
+    n_folds_tune,
+    param_grid_m_z,
+    param_grid_m_d_z0,
+    param_grid_m_d_z1,
+    param_grid_g_du_z0,
+    param_grid_g_du_z1,
+):
     train_cond_z0 = np.where(z == 0)[0]
     train_cond_z1 = np.where(z == 1)[0]
 
@@ -267,14 +319,10 @@ def tune_nuisance_lpq(y, x, d, z,
     du = (d == treatment) * (y <= approx_quant)
 
     m_z_tune_res = tune_grid_search(z, x, ml_m_z, smpls, param_grid_m_z, n_folds_tune)
-    m_d_z0_tune_res = tune_grid_search(d, x, ml_m_d_z0, smpls, param_grid_m_d_z0, n_folds_tune,
-                                       train_cond=train_cond_z0)
-    m_d_z1_tune_res = tune_grid_search(d, x, ml_m_d_z1, smpls, param_grid_m_d_z1, n_folds_tune,
-                                       train_cond=train_cond_z1)
-    g_du_z0_tune_res = tune_grid_search(du, x, ml_g_du_z0, smpls, param_grid_g_du_z0, n_folds_tune,
-                                        train_cond=train_cond_z0)
-    g_du_z1_tune_res = tune_grid_search(du, x, ml_g_du_z1, smpls, param_grid_g_du_z1, n_folds_tune,
-                                        train_cond=train_cond_z1)
+    m_d_z0_tune_res = tune_grid_search(d, x, ml_m_d_z0, smpls, param_grid_m_d_z0, n_folds_tune, train_cond=train_cond_z0)
+    m_d_z1_tune_res = tune_grid_search(d, x, ml_m_d_z1, smpls, param_grid_m_d_z1, n_folds_tune, train_cond=train_cond_z1)
+    g_du_z0_tune_res = tune_grid_search(du, x, ml_g_du_z0, smpls, param_grid_g_du_z0, n_folds_tune, train_cond=train_cond_z0)
+    g_du_z1_tune_res = tune_grid_search(du, x, ml_g_du_z1, smpls, param_grid_g_du_z1, n_folds_tune, train_cond=train_cond_z1)
 
     m_z_best_params = [xx.best_params_ for xx in m_z_tune_res]
     m_d_z0_best_params = [xx.best_params_ for xx in m_d_z0_tune_res]
diff --git a/doubleml/irm/tests/_utils_pq_manual.py b/doubleml/irm/tests/_utils_pq_manual.py
index 93d43a4aa..3bcd13958 100644
--- a/doubleml/irm/tests/_utils_pq_manual.py
+++ b/doubleml/irm/tests/_utils_pq_manual.py
@@ -1,15 +1,27 @@
 import numpy as np
-from sklearn.base import clone
-from sklearn.model_selection import train_test_split, StratifiedKFold
 from scipy.optimize import root_scalar
+from sklearn.base import clone
+from sklearn.model_selection import StratifiedKFold, train_test_split
 
 from ...tests._utils import tune_grid_search
-from ...utils._estimation import _dml_cv_predict, _default_kde, _normalize_ipw, _solve_ipw_score, _get_bracket_guess
-
-
-def fit_pq(y, x, d, quantile,
-           learner_g, learner_m, all_smpls, treatment, n_rep=1,
-           trimming_threshold=1e-2, normalize_ipw=True, g_params=None, m_params=None):
+from ...utils._estimation import _default_kde, _dml_cv_predict, _get_bracket_guess, _normalize_ipw, _solve_ipw_score
+
+
+def fit_pq(
+    y,
+    x,
+    d,
+    quantile,
+    learner_g,
+    learner_m,
+    all_smpls,
+    treatment,
+    n_rep=1,
+    trimming_threshold=1e-2,
+    normalize_ipw=True,
+    g_params=None,
+    m_params=None,
+):
     n_obs = len(y)
 
     pqs = np.zeros(n_rep)
@@ -18,25 +30,34 @@ def fit_pq(y, x, d, quantile,
     for i_rep in range(n_rep):
         smpls = all_smpls[i_rep]
 
-        g_hat, m_hat, ipw_est = fit_nuisance_pq(y, x, d, quantile,
-                                                learner_g, learner_m, smpls, treatment,
-                                                trimming_threshold=trimming_threshold,
-                                                normalize_ipw=normalize_ipw,
-                                                g_params=g_params, m_params=m_params)
+        g_hat, m_hat, ipw_est = fit_nuisance_pq(
+            y,
+            x,
+            d,
+            quantile,
+            learner_g,
+            learner_m,
+            smpls,
+            treatment,
+            trimming_threshold=trimming_threshold,
+            normalize_ipw=normalize_ipw,
+            g_params=g_params,
+            m_params=m_params,
+        )
 
         pqs[i_rep], ses[i_rep] = pq_dml2(y, d, g_hat, m_hat, treatment, quantile, ipw_est)
 
     pq = np.median(pqs)
     se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(pqs - pq, 2)) / n_obs)
 
-    res = {'pq': pq, 'se': se,
-           'pqs': pqs, 'ses': ses}
+    res = {"pq": pq, "se": se, "pqs": pqs, "ses": ses}
 
     return res
 
 
-def fit_nuisance_pq(y, x, d, quantile, learner_g, learner_m, smpls, treatment,
-                    trimming_threshold, normalize_ipw, g_params, m_params):
+def fit_nuisance_pq(
+    y, x, d, quantile, learner_g, learner_m, smpls, treatment, trimming_threshold, normalize_ipw, g_params, m_params
+):
     n_folds = len(smpls)
     n_obs = len(y)
     # initialize starting values and bounds
@@ -62,18 +83,17 @@ def fit_nuisance_pq(y, x, d, quantile, learner_g, learner_m, smpls, treatment,
         test_inds = smpls[i_fold][1]
 
         # start nested crossfitting
-        train_inds_1, train_inds_2 = train_test_split(train_inds, test_size=0.5,
-                                                      random_state=42, stratify=d[train_inds])
-        smpls_prelim = [(train, test) for train, test in
-                        StratifiedKFold(n_splits=n_folds).split(X=train_inds_1, y=d[train_inds_1])]
+        train_inds_1, train_inds_2 = train_test_split(train_inds, test_size=0.5, random_state=42, stratify=d[train_inds])
+        smpls_prelim = [
+            (train, test) for train, test in StratifiedKFold(n_splits=n_folds).split(X=train_inds_1, y=d[train_inds_1])
+        ]
 
         d_train_1 = d[train_inds_1]
         y_train_1 = y[train_inds_1]
         x_train_1 = x[train_inds_1, :]
 
         # todo change prediction method
-        m_hat_prelim = _dml_cv_predict(clone(ml_m), x_train_1, d_train_1,
-                                       method='predict_proba', smpls=smpls_prelim)['preds']
+        m_hat_prelim = _dml_cv_predict(clone(ml_m), x_train_1, d_train_1, method="predict_proba", smpls=smpls_prelim)["preds"]
 
         m_hat_prelim[m_hat_prelim < trimming_threshold] = trimming_threshold
         m_hat_prelim[m_hat_prelim > 1 - trimming_threshold] = 1 - trimming_threshold
@@ -146,7 +166,7 @@ def get_bracket_guess(coef_start, coef_bounds):
             b_guess = (a, b)
             f_a = compute_score(b_guess[0])
             f_b = compute_score(b_guess[1])
-            s_different = (np.sign(f_a) != np.sign(f_b))
+            s_different = np.sign(f_a) != np.sign(f_b)
             delta += 0.1
         return s_different, b_guess
 
@@ -154,9 +174,7 @@ def get_bracket_guess(coef_start, coef_bounds):
     coef_bounds = (y.min(), y.max())
     _, bracket_guess = get_bracket_guess(coef_start_val, coef_bounds)
 
-    root_res = root_scalar(compute_score,
-                           bracket=bracket_guess,
-                           method='brentq')
+    root_res = root_scalar(compute_score, bracket=bracket_guess, method="brentq")
     dml_est = root_res.root
 
     return dml_est
@@ -169,16 +187,14 @@ def pq_var_est(coef, g_hat, m_hat, d, y, treatment, quantile, n_obs, kde=_defaul
 
     J = np.mean(deriv)
     score = (d == treatment) * ((y <= coef) - g_hat) / m_hat + g_hat - quantile
-    var_est = 1/n_obs * np.mean(np.square(score)) / np.square(J)
+    var_est = 1 / n_obs * np.mean(np.square(score)) / np.square(J)
     return var_est
 
 
-def tune_nuisance_pq(y, x, d, ml_g, ml_m, smpls, treatment, quantile, n_folds_tune,
-                     param_grid_g, param_grid_m):
+def tune_nuisance_pq(y, x, d, ml_g, ml_m, smpls, treatment, quantile, n_folds_tune, param_grid_g, param_grid_m):
     train_cond_treat = np.where(d == treatment)[0]
     approx_goal = y <= np.quantile(y[d == treatment], quantile)
-    g_tune_res = tune_grid_search(approx_goal, x, ml_g, smpls, param_grid_g, n_folds_tune,
-                                  train_cond=train_cond_treat)
+    g_tune_res = tune_grid_search(approx_goal, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=train_cond_treat)
     m_tune_res = tune_grid_search(d, x, ml_m, smpls, param_grid_m, n_folds_tune)
 
     g_best_params = [xx.best_params_ for xx in g_tune_res]
diff --git a/doubleml/irm/tests/_utils_qte_manual.py b/doubleml/irm/tests/_utils_qte_manual.py
index 5c177907c..25de79cd9 100644
--- a/doubleml/irm/tests/_utils_qte_manual.py
+++ b/doubleml/irm/tests/_utils_qte_manual.py
@@ -1,17 +1,27 @@
 import numpy as np
 from sklearn.base import clone
 
-from ..pq import DoubleMLPQ
 from ...double_ml_data import DoubleMLData
-
 from ...tests._utils_boot import draw_weights
 from ...utils._estimation import _default_kde
+from ..pq import DoubleMLPQ
 
 
-def fit_qte(y, x, d, quantiles, learner_g, learner_m, all_smpls, n_rep=1,
-            trimming_rule='truncate', trimming_threshold=1e-2, kde=_default_kde,
-            normalize_ipw=True, draw_sample_splitting=True):
-
+def fit_qte(
+    y,
+    x,
+    d,
+    quantiles,
+    learner_g,
+    learner_m,
+    all_smpls,
+    n_rep=1,
+    trimming_rule="truncate",
+    trimming_threshold=1e-2,
+    kde=_default_kde,
+    normalize_ipw=True,
+    draw_sample_splitting=True,
+):
     n_obs = len(y)
     n_quantiles = len(quantiles)
     n_folds = len(all_smpls[0])
@@ -25,30 +35,34 @@ def fit_qte(y, x, d, quantiles, learner_g, learner_m, all_smpls, n_rep=1,
 
     for i_quant in range(n_quantiles):
         # initialize models for both potential quantiles
-        model_PQ_0 = DoubleMLPQ(dml_data,
-                                clone(learner_g),
-                                clone(learner_m),
-                                quantile=quantiles[i_quant],
-                                treatment=0,
-                                n_folds=n_folds,
-                                n_rep=n_rep,
-                                trimming_rule=trimming_rule,
-                                trimming_threshold=trimming_threshold,
-                                kde=kde,
-                                normalize_ipw=normalize_ipw,
-                                draw_sample_splitting=False)
-        model_PQ_1 = DoubleMLPQ(dml_data,
-                                clone(learner_g),
-                                clone(learner_m),
-                                quantile=quantiles[i_quant],
-                                treatment=1,
-                                n_folds=n_folds,
-                                n_rep=n_rep,
-                                trimming_rule=trimming_rule,
-                                trimming_threshold=trimming_threshold,
-                                kde=kde,
-                                normalize_ipw=normalize_ipw,
-                                draw_sample_splitting=False)
+        model_PQ_0 = DoubleMLPQ(
+            dml_data,
+            clone(learner_g),
+            clone(learner_m),
+            quantile=quantiles[i_quant],
+            treatment=0,
+            n_folds=n_folds,
+            n_rep=n_rep,
+            trimming_rule=trimming_rule,
+            trimming_threshold=trimming_threshold,
+            kde=kde,
+            normalize_ipw=normalize_ipw,
+            draw_sample_splitting=False,
+        )
+        model_PQ_1 = DoubleMLPQ(
+            dml_data,
+            clone(learner_g),
+            clone(learner_m),
+            quantile=quantiles[i_quant],
+            treatment=1,
+            n_folds=n_folds,
+            n_rep=n_rep,
+            trimming_rule=trimming_rule,
+            trimming_threshold=trimming_threshold,
+            kde=kde,
+            normalize_ipw=normalize_ipw,
+            draw_sample_splitting=False,
+        )
 
         # synchronize the sample splitting
         model_PQ_0.set_sample_splitting(all_smpls)
@@ -76,12 +90,11 @@ def fit_qte(y, x, d, quantiles, learner_g, learner_m, all_smpls, n_rep=1,
     qte = np.median(qtes, 1)
     se = np.zeros(n_quantiles)
     for i_quant in range(n_quantiles):
-        se[i_quant] = np.sqrt(np.median(np.power(ses[i_quant, :], 2) * n_obs +
-                                        np.power(qtes[i_quant, :] - qte[i_quant], 2)) / n_obs)
+        se[i_quant] = np.sqrt(
+            np.median(np.power(ses[i_quant, :], 2) * n_obs + np.power(qtes[i_quant, :] - qte[i_quant], 2)) / n_obs
+        )
 
-    res = {'qte': qte, 'se': se,
-           'qtes': qtes, 'ses': ses,
-           'scaled_scores': scaled_scores}
+    res = {"qte": qte, "se": se, "qtes": qtes, "ses": ses, "scaled_scores": scaled_scores}
 
     return res
 
@@ -93,7 +106,8 @@ def boot_qte(scaled_scores, ses, quantiles, all_smpls, n_rep, bootstrap, n_rep_b
         n_obs = scaled_scores.shape[0]
         weights = draw_weights(bootstrap, n_rep_boot, n_obs)
         for i_quant in range(n_quantiles):
-            boot_t_stat[:, i_quant, i_rep] = np.matmul(weights, scaled_scores[:, i_quant, i_rep]) / \
-                (n_obs * ses[i_quant, i_rep])
+            boot_t_stat[:, i_quant, i_rep] = np.matmul(weights, scaled_scores[:, i_quant, i_rep]) / (
+                n_obs * ses[i_quant, i_rep]
+            )
 
     return boot_t_stat
diff --git a/doubleml/irm/tests/_utils_ssm_manual.py b/doubleml/irm/tests/_utils_ssm_manual.py
index 2d3683191..1ce4a97bc 100644
--- a/doubleml/irm/tests/_utils_ssm_manual.py
+++ b/doubleml/irm/tests/_utils_ssm_manual.py
@@ -6,15 +6,26 @@
 from ...utils._estimation import _predict_zero_one_propensity, _trimm
 
 
-def fit_selection(y, x, d, z, s,
-                  learner_g, learner_pi, learner_m,
-                  all_smpls, score,
-                  trimming_rule='truncate',
-                  trimming_threshold=1e-2,
-                  normalize_ipw=True,
-                  n_rep=1,
-                  g_d0_params=None, g_d1_params=None,
-                  pi_params=None, m_params=None):
+def fit_selection(
+    y,
+    x,
+    d,
+    z,
+    s,
+    learner_g,
+    learner_pi,
+    learner_m,
+    all_smpls,
+    score,
+    trimming_rule="truncate",
+    trimming_threshold=1e-2,
+    normalize_ipw=True,
+    n_rep=1,
+    g_d0_params=None,
+    g_d1_params=None,
+    pi_params=None,
+    m_params=None,
+):
     n_obs = len(y)
 
     thetas = np.zeros(n_rep)
@@ -31,14 +42,24 @@ def fit_selection(y, x, d, z, s,
     for i_rep in range(n_rep):
         smpls = all_smpls[i_rep]
 
-        g_hat_d1_list, g_hat_d0_list, pi_hat_list, \
-            m_hat_list = fit_nuisance_selection(y, x, d, z, s,
-                                                learner_g, learner_pi, learner_m,
-                                                smpls, score,
-                                                trimming_rule=trimming_rule,
-                                                trimming_threshold=trimming_threshold,
-                                                g_d0_params=g_d0_params, g_d1_params=g_d1_params,
-                                                pi_params=pi_params, m_params=m_params)
+        g_hat_d1_list, g_hat_d0_list, pi_hat_list, m_hat_list = fit_nuisance_selection(
+            y,
+            x,
+            d,
+            z,
+            s,
+            learner_g,
+            learner_pi,
+            learner_m,
+            smpls,
+            score,
+            trimming_rule=trimming_rule,
+            trimming_threshold=trimming_threshold,
+            g_d0_params=g_d0_params,
+            g_d1_params=g_d1_params,
+            pi_params=pi_params,
+            m_params=m_params,
+        )
         all_g_d1_hat.append(g_hat_d1_list)
         all_g_d0_hat.append(g_hat_d0_list)
         all_pi_hat.append(pi_hat_list)
@@ -46,10 +67,9 @@ def fit_selection(y, x, d, z, s,
 
         g_hat_d1, g_hat_d0, pi_hat, m_hat = compute_selection(y, g_hat_d1_list, g_hat_d0_list, pi_hat_list, m_hat_list, smpls)
 
-        dtreat = (d == 1)
-        dcontrol = (d == 0)
-        psi_a, psi_b = selection_score_elements(dtreat, dcontrol, g_hat_d1, g_hat_d0, pi_hat, m_hat,
-                                                s, y, normalize_ipw)
+        dtreat = d == 1
+        dcontrol = d == 0
+        psi_a, psi_b = selection_score_elements(dtreat, dcontrol, g_hat_d1, g_hat_d0, pi_hat, m_hat, s, y, normalize_ipw)
 
         all_psi_a.append(psi_a)
         all_psi_b.append(psi_b)
@@ -59,23 +79,40 @@ def fit_selection(y, x, d, z, s,
     theta = np.median(thetas)
     se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs)
 
-    res = {'theta': theta, 'se': se,
-           'thetas': thetas, 'ses': ses,
-           'all_g_d1_hat': all_g_d1_hat, 'all_g_d0_hat': all_g_d0_hat,
-           'all_pi_hat': all_pi_hat, 'all_m_hat': all_m_hat,
-           'all_psi_a': all_psi_a, 'all_psi_b': all_psi_b}
+    res = {
+        "theta": theta,
+        "se": se,
+        "thetas": thetas,
+        "ses": ses,
+        "all_g_d1_hat": all_g_d1_hat,
+        "all_g_d0_hat": all_g_d0_hat,
+        "all_pi_hat": all_pi_hat,
+        "all_m_hat": all_m_hat,
+        "all_psi_a": all_psi_a,
+        "all_psi_b": all_psi_b,
+    }
 
     return res
 
 
-def fit_nuisance_selection(y, x, d, z, s,
-                           learner_g, learner_pi, learner_m,
-                           smpls, score,
-                           trimming_rule='truncate',
-                           trimming_threshold=1e-2,
-                           g_d0_params=None, g_d1_params=None,
-                           pi_params=None, m_params=None):
-
+def fit_nuisance_selection(
+    y,
+    x,
+    d,
+    z,
+    s,
+    learner_g,
+    learner_pi,
+    learner_m,
+    smpls,
+    score,
+    trimming_rule="truncate",
+    trimming_threshold=1e-2,
+    g_d0_params=None,
+    g_d1_params=None,
+    pi_params=None,
+    m_params=None,
+):
     ml_g_d1 = clone(learner_g)
     ml_g_d0 = clone(learner_g)
     ml_pi = clone(learner_pi)
@@ -86,7 +123,7 @@ def fit_nuisance_selection(y, x, d, z, s,
     else:
         dx = np.column_stack((d, x, z))
 
-    if score == 'missing-at-random':
+    if score == "missing-at-random":
         pi_hat_list = fit_predict_proba(s, dx, ml_pi, pi_params, smpls, trimming_threshold=trimming_threshold)
 
         m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls)
@@ -126,8 +163,9 @@ def fit_nuisance_selection(y, x, d, z, s,
             test_inds = smpls[i_fold][1]
 
             # start nested crossfitting
-            train_inds_1, train_inds_2 = train_test_split(train_inds, test_size=0.5,
-                                                          random_state=42, stratify=strata[train_inds])
+            train_inds_1, train_inds_2 = train_test_split(
+                train_inds, test_size=0.5, random_state=42, stratify=strata[train_inds]
+            )
 
             s_train_1 = s[train_inds_1]
             dx_train_1 = dx[train_inds_1, :]
@@ -154,8 +192,7 @@ def fit_nuisance_selection(y, x, d, z, s,
             m_hat = _predict_zero_one_propensity(ml_m, xpi_test)
 
             # estimate conditional outcome on second training sample -- treatment
-            s1_d1_train_2_indices = np.intersect1d(np.where(d == 1)[0],
-                                                   np.intersect1d(np.where(s == 1)[0], train_inds_2))
+            s1_d1_train_2_indices = np.intersect1d(np.where(d == 1)[0], np.intersect1d(np.where(s == 1)[0], train_inds_2))
             xpi_s1_d1_train_2 = xpi[s1_d1_train_2_indices, :]
             y_s1_d1_train_2 = y[s1_d1_train_2_indices]
 
@@ -165,8 +202,7 @@ def fit_nuisance_selection(y, x, d, z, s,
             g_hat_d1 = ml_g_d1.predict(xpi_test)
 
             # estimate conditional outcome on second training sample -- control
-            s1_d0_train_2_indices = np.intersect1d(np.where(d == 0)[0],
-                                                   np.intersect1d(np.where(s == 1)[0], train_inds_2))
+            s1_d0_train_2_indices = np.intersect1d(np.where(d == 0)[0], np.intersect1d(np.where(s == 1)[0], train_inds_2))
             xpi_s1_d0_train_2 = xpi[s1_d0_train_2_indices, :]
             y_s1_d0_train_2 = y[s1_d0_train_2_indices]
 
@@ -187,10 +223,10 @@ def fit_nuisance_selection(y, x, d, z, s,
 
 
 def compute_selection(y, g_hat_d1_list, g_hat_d0_list, pi_hat_list, m_hat_list, smpls):
-    g_hat_d1 = np.full_like(y, np.nan, dtype='float64')
-    g_hat_d0 = np.full_like(y, np.nan, dtype='float64')
-    pi_hat = np.full_like(y, np.nan, dtype='float64')
-    m_hat = np.full_like(y, np.nan, dtype='float64')
+    g_hat_d1 = np.full_like(y, np.nan, dtype="float64")
+    g_hat_d0 = np.full_like(y, np.nan, dtype="float64")
+    pi_hat = np.full_like(y, np.nan, dtype="float64")
+    m_hat = np.full_like(y, np.nan, dtype="float64")
 
     for idx, (_, test_index) in enumerate(smpls):
         g_hat_d1[test_index] = g_hat_d1_list[idx]
@@ -201,8 +237,7 @@ def compute_selection(y, g_hat_d1_list, g_hat_d0_list, pi_hat_list, m_hat_list,
     return g_hat_d1, g_hat_d0, pi_hat, m_hat
 
 
-def selection_score_elements(dtreat, dcontrol, g_d1, g_d0,
-                             pi, m, s, y, normalize_ipw):
+def selection_score_elements(dtreat, dcontrol, g_d1, g_d0, pi, m, s, y, normalize_ipw):
     # psi_a
     psi_a = -1 * np.ones_like(y)
 
@@ -225,7 +260,7 @@ def selection_score_elements(dtreat, dcontrol, g_d1, g_d0,
 
 def selection_dml2(psi_a, psi_b):
     n_obs = len(psi_a)
-    theta_hat = - np.mean(psi_b) / np.mean(psi_a)
+    theta_hat = -np.mean(psi_b) / np.mean(psi_a)
     se = np.sqrt(var_selection(theta_hat, psi_a, psi_b, n_obs))
 
     return theta_hat, se
@@ -233,21 +268,18 @@ def selection_dml2(psi_a, psi_b):
 
 def var_selection(theta, psi_a, psi_b, n_obs):
     J = np.mean(psi_a)
-    var = 1/n_obs * np.mean(np.power(np.multiply(psi_a, theta) + psi_b, 2)) / np.power(J, 2)
+    var = 1 / n_obs * np.mean(np.power(np.multiply(psi_a, theta) + psi_b, 2)) / np.power(J, 2)
     return var
 
 
-def tune_nuisance_ssm(y, x, d, z, s, ml_g, ml_pi, ml_m, smpls, score, n_folds_tune,
-                      param_grid_g, param_grid_pi, param_grid_m):
+def tune_nuisance_ssm(y, x, d, z, s, ml_g, ml_pi, ml_m, smpls, score, n_folds_tune, param_grid_g, param_grid_pi, param_grid_m):
     d0_s1 = np.intersect1d(np.where(d == 0)[0], np.where(s == 1)[0])
     d1_s1 = np.intersect1d(np.where(d == 1)[0], np.where(s == 1)[0])
 
-    g0_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune,
-                                   train_cond=d0_s1)
-    g1_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune,
-                                   train_cond=d1_s1)
+    g0_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=d0_s1)
+    g1_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=d1_s1)
 
-    if score == 'nonignorable':
+    if score == "nonignorable":
         dx = np.column_stack((x, d, z))
     else:
         dx = np.column_stack((x, d))
diff --git a/doubleml/irm/tests/conftest.py b/doubleml/irm/tests/conftest.py
index 6fe207b06..1cf1d5250 100644
--- a/doubleml/irm/tests/conftest.py
+++ b/doubleml/irm/tests/conftest.py
@@ -1,20 +1,17 @@
 import numpy as np
 import pandas as pd
-
 import pytest
 from scipy.linalg import toeplitz
-
 from sklearn.datasets import make_spd_matrix
-from doubleml.datasets import make_irm_data, make_iivm_data
+
+from doubleml.datasets import make_iivm_data, make_irm_data
 
 
 def _g(x):
     return np.power(np.sin(x), 2)
 
 
-@pytest.fixture(scope='session',
-                params=[(500, 10),
-                        (1000, 20)])
+@pytest.fixture(scope="session", params=[(500, 10), (1000, 20)])
 def generate_data_irm(request):
     n_p = request.param
     np.random.seed(1111)
@@ -24,15 +21,12 @@ def generate_data_irm(request):
     theta = 0.5
 
     # generating data
-    data = make_irm_data(n, p, theta, return_type='array')
+    data = make_irm_data(n, p, theta, return_type="array")
 
     return data
 
 
-@pytest.fixture(scope='session',
-                params=[(500, 10),
-                        (1000, 20),
-                        (1000, 100)])
+@pytest.fixture(scope="session", params=[(500, 10), (1000, 20), (1000, 100)])
 def generate_data_irm_binary(request):
     n_p = request.param
     np.random.seed(1111)
@@ -44,21 +38,51 @@ def generate_data_irm_binary(request):
     sigma = make_spd_matrix(p)
 
     # generating data
-    x = np.random.multivariate_normal(np.zeros(p), sigma, size=[n, ])
+    x = np.random.multivariate_normal(
+        np.zeros(p),
+        sigma,
+        size=[
+            n,
+        ],
+    )
     G = _g(np.dot(x, b))
-    pr = 1 / (1 + np.exp((-1) * (x[:, 0] * (-0.5) + x[:, 1] * 0.5 + np.random.standard_normal(size=[n, ]))))
-    d = np.random.binomial(p=pr, n=1, size=[n, ])
+    pr = 1 / (
+        1
+        + np.exp(
+            (-1)
+            * (
+                x[:, 0] * (-0.5)
+                + x[:, 1] * 0.5
+                + np.random.standard_normal(
+                    size=[
+                        n,
+                    ]
+                )
+            )
+        )
+    )
+    d = np.random.binomial(
+        p=pr,
+        n=1,
+        size=[
+            n,
+        ],
+    )
     err = np.random.standard_normal(n)
 
     pry = 1 / (1 + np.exp((-1) * theta * d + G + err))
-    y = np.random.binomial(p=pry, n=1, size=[n, ])
+    y = np.random.binomial(
+        p=pry,
+        n=1,
+        size=[
+            n,
+        ],
+    )
 
     return x, y, d
 
 
-@pytest.fixture(scope='session',
-                params=[(500, 10),
-                        (1000, 20)])
+@pytest.fixture(scope="session", params=[(500, 10), (1000, 20)])
 def generate_data_irm_w_missings(request):
     n_p = request.param
     np.random.seed(1111)
@@ -68,19 +92,17 @@ def generate_data_irm_w_missings(request):
     theta = 0.5
 
     # generating data
-    (x, y, d) = make_irm_data(n, p, theta, return_type='array')
+    (x, y, d) = make_irm_data(n, p, theta, return_type="array")
 
     # randomly set some entries to np.nan
-    ind = np.random.choice(np.arange(x.size), replace=False,
-                           size=int(x.size * 0.05))
+    ind = np.random.choice(np.arange(x.size), replace=False, size=int(x.size * 0.05))
     x[np.unravel_index(ind, x.shape)] = np.nan
     data = (x, y, d)
 
     return data
 
 
-@pytest.fixture(scope='session',
-                params=[(500, 11)])
+@pytest.fixture(scope="session", params=[(500, 11)])
 def generate_data_iivm(request):
     n_p = request.param
     np.random.seed(1111)
@@ -96,10 +118,7 @@ def generate_data_iivm(request):
     return data
 
 
-@pytest.fixture(scope='session',
-                params=[(500, 10),
-                        (1000, 20),
-                        (1000, 100)])
+@pytest.fixture(scope="session", params=[(500, 10), (1000, 20), (1000, 100)])
 def generate_data_iivm_binary(request):
     n_p = request.param
     np.random.seed(1111)
@@ -111,26 +130,81 @@ def generate_data_iivm_binary(request):
     sigma = make_spd_matrix(p)
 
     # generating data
-    x = np.random.multivariate_normal(np.zeros(p), sigma, size=[n, ])
+    x = np.random.multivariate_normal(
+        np.zeros(p),
+        sigma,
+        size=[
+            n,
+        ],
+    )
     G = _g(np.dot(x, b))
 
-    prz = 1 / (1 + np.exp((-1) * (x[:, 0] * (-1) * b[4] + x[:, 1] * b[2] + np.random.standard_normal(size=[n, ]))))
-    z = np.random.binomial(p=prz, n=1, size=[n, ])
-    u = np.random.standard_normal(size=[n, ])
-    pr = 1 / (1 + np.exp((-1) * (0.5 * z + x[:, 0] * (-0.5) + x[:, 1] * 0.25 - 0.5 * u
-                                 + np.random.standard_normal(size=[n, ]))))
-    d = np.random.binomial(p=pr, n=1, size=[n, ])
+    prz = 1 / (
+        1
+        + np.exp(
+            (-1)
+            * (
+                x[:, 0] * (-1) * b[4]
+                + x[:, 1] * b[2]
+                + np.random.standard_normal(
+                    size=[
+                        n,
+                    ]
+                )
+            )
+        )
+    )
+    z = np.random.binomial(
+        p=prz,
+        n=1,
+        size=[
+            n,
+        ],
+    )
+    u = np.random.standard_normal(
+        size=[
+            n,
+        ]
+    )
+    pr = 1 / (
+        1
+        + np.exp(
+            (-1)
+            * (
+                0.5 * z
+                + x[:, 0] * (-0.5)
+                + x[:, 1] * 0.25
+                - 0.5 * u
+                + np.random.standard_normal(
+                    size=[
+                        n,
+                    ]
+                )
+            )
+        )
+    )
+    d = np.random.binomial(
+        p=pr,
+        n=1,
+        size=[
+            n,
+        ],
+    )
     err = np.random.standard_normal(n)
 
     pry = 1 / (1 + np.exp((-1) * theta * d + G + 4 * u + err))
-    y = np.random.binomial(p=pry, n=1, size=[n, ])
+    y = np.random.binomial(
+        p=pry,
+        n=1,
+        size=[
+            n,
+        ],
+    )
 
     return x, y, d, z
 
 
-@pytest.fixture(scope='session',
-                params=[(500, 5),
-                        (1000, 10)])
+@pytest.fixture(scope="session", params=[(500, 5), (1000, 10)])
 def generate_data_quantiles(request):
     n_p = request.param
     np.random.seed(1111)
@@ -157,9 +231,7 @@ def f_scale(D, X):
     return data
 
 
-@pytest.fixture(scope='session',
-                params=[(5000, 5),
-                        (10000, 10)])
+@pytest.fixture(scope="session", params=[(5000, 5), (10000, 10)])
 def generate_data_local_quantiles(request):
     n_p = request.param
     np.random.seed(1111)
@@ -187,15 +259,13 @@ def generate_treatment(Z, X, X_conf):
     d = generate_treatment(z, x, x_conf)
     epsilon = np.random.normal(size=n)
 
-    y = f_loc(d, x, x_conf) + f_scale(d, x, x_conf)*epsilon
+    y = f_loc(d, x, x_conf) + f_scale(d, x, x_conf) * epsilon
     data = (x, y, d, z)
 
     return data
 
 
-@pytest.fixture(scope='session',
-                params=[(8000, 2),
-                        (16000, 5)])
+@pytest.fixture(scope="session", params=[(8000, 2), (16000, 5)])
 def generate_data_selection_mar(request):
     params = request.param
     np.random.seed(1111)
@@ -207,7 +277,13 @@ def generate_data_selection_mar(request):
     e = np.random.multivariate_normal(mean=[0, 0], cov=sigma, size=n_obs).T
 
     cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
 
     beta = [0.4 / (k**2) for k in range(1, dim_x + 1)]
 
@@ -223,9 +299,7 @@ def generate_data_selection_mar(request):
     return data
 
 
-@pytest.fixture(scope='session',
-                params=[(8000, 2),
-                        (16000, 5)])
+@pytest.fixture(scope="session", params=[(8000, 2), (16000, 5)])
 def generate_data_selection_nonignorable(request):
     params = request.param
     np.random.seed(1111)
@@ -238,7 +312,13 @@ def generate_data_selection_nonignorable(request):
     e = np.random.multivariate_normal(mean=[0, 0], cov=sigma, size=n_obs).T
 
     cov_mat = toeplitz([np.power(0.5, k) for k in range(dim_x)])
-    x = np.random.multivariate_normal(np.zeros(dim_x), cov_mat, size=[n_obs, ])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        cov_mat,
+        size=[
+            n_obs,
+        ],
+    )
 
     beta = [0.4 / (k**2) for k in range(1, dim_x + 1)]
 
diff --git a/doubleml/irm/tests/test_apo.py b/doubleml/irm/tests/test_apo.py
index 700db40a5..920f60476 100644
--- a/doubleml/irm/tests/test_apo.py
+++ b/doubleml/irm/tests/test_apo.py
@@ -1,50 +1,51 @@
+import math
+
 import numpy as np
 import pandas as pd
 import pytest
-import math
-
 from sklearn.base import clone
-
-from sklearn.linear_model import LogisticRegression, LinearRegression
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
-from doubleml.datasets import make_irm_data_discrete_treatments, make_irm_data
+from doubleml.datasets import make_irm_data, make_irm_data_discrete_treatments
 
 from ...tests._utils import draw_smpls
-from ._utils_apo_manual import fit_apo, boot_apo, fit_sensitivity_elements_apo
-
-
-@pytest.fixture(scope='module',
-                params=[[LinearRegression(),
-                         LogisticRegression(solver='lbfgs', max_iter=250, random_state=42)],
-                        [RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
-                         RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42)]])
+from ._utils_apo_manual import boot_apo, fit_apo, fit_sensitivity_elements_apo
+
+
+@pytest.fixture(
+    scope="module",
+    params=[
+        [LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250, random_state=42)],
+        [
+            RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
+            RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42),
+        ],
+    ],
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[False, True])
+@pytest.fixture(scope="module", params=[False, True])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.2, 0.15])
+@pytest.fixture(scope="module", params=[0.2, 0.15])
 def trimming_threshold(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0, 1])
+@pytest.fixture(scope="module", params=[0, 1])
 def treatment_level(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_apo_fixture(generate_data_irm, learner, normalize_ipw, trimming_threshold, treatment_level):
-    boot_methods = ['normal']
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
 
@@ -55,153 +56,169 @@ def dml_apo_fixture(generate_data_irm, learner, normalize_ipw, trimming_threshol
     np.random.seed(3141)
     n_obs = 500
     data_apo = make_irm_data_discrete_treatments(n_obs=n_obs)
-    y = data_apo['y']
-    x = data_apo['x']
-    d = data_apo['d']
+    y = data_apo["y"]
+    x = data_apo["x"]
+    d = data_apo["d"]
     df_apo = pd.DataFrame(
-        np.column_stack((y, d, x)),
-        columns=['y', 'd'] + ['x' + str(i) for i in range(data_apo['x'].shape[1])]
+        np.column_stack((y, d, x)), columns=["y", "d"] + ["x" + str(i) for i in range(data_apo["x"].shape[1])]
     )
 
-    dml_data = dml.DoubleMLData(df_apo, 'y', 'd')
+    dml_data = dml.DoubleMLData(df_apo, "y", "d")
     all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d)
 
     np.random.seed(3141)
-    dml_obj = dml.DoubleMLAPO(dml_data,
-                              ml_g, ml_m,
-                              treatment_level=treatment_level,
-                              n_folds=n_folds,
-                              score='APO',
-                              normalize_ipw=normalize_ipw,
-                              draw_sample_splitting=False,
-                              trimming_threshold=trimming_threshold)
+    dml_obj = dml.DoubleMLAPO(
+        dml_data,
+        ml_g,
+        ml_m,
+        treatment_level=treatment_level,
+        n_folds=n_folds,
+        score="APO",
+        normalize_ipw=normalize_ipw,
+        draw_sample_splitting=False,
+        trimming_threshold=trimming_threshold,
+    )
 
     # synchronize the sample splitting
     dml_obj.set_sample_splitting(all_smpls=all_smpls)
     dml_obj.fit()
 
     np.random.seed(3141)
-    res_manual = fit_apo(y, x, d,
-                         clone(learner[0]), clone(learner[1]),
-                         treatment_level=treatment_level,
-                         all_smpls=all_smpls,
-                         score='APO',
-                         normalize_ipw=normalize_ipw,
-                         trimming_threshold=trimming_threshold)
+    res_manual = fit_apo(
+        y,
+        x,
+        d,
+        clone(learner[0]),
+        clone(learner[1]),
+        treatment_level=treatment_level,
+        all_smpls=all_smpls,
+        score="APO",
+        normalize_ipw=normalize_ipw,
+        trimming_threshold=trimming_threshold,
+    )
 
     np.random.seed(3141)
     # test with external nuisance predictions
-    dml_obj_ext = dml.DoubleMLAPO(dml_data,
-                                  ml_g, ml_m,
-                                  treatment_level=treatment_level,
-                                  n_folds=n_folds,
-                                  score='APO',
-                                  normalize_ipw=normalize_ipw,
-                                  draw_sample_splitting=False,
-                                  trimming_threshold=trimming_threshold)
+    dml_obj_ext = dml.DoubleMLAPO(
+        dml_data,
+        ml_g,
+        ml_m,
+        treatment_level=treatment_level,
+        n_folds=n_folds,
+        score="APO",
+        normalize_ipw=normalize_ipw,
+        draw_sample_splitting=False,
+        trimming_threshold=trimming_threshold,
+    )
 
     # synchronize the sample splitting
     dml_obj_ext.set_sample_splitting(all_smpls=all_smpls)
 
-    prediction_dict = {'d': {'ml_g0': dml_obj.predictions['ml_g0'].reshape(-1, 1),
-                             'ml_g1': dml_obj.predictions['ml_g1'].reshape(-1, 1),
-                             'ml_m': dml_obj.predictions['ml_m'].reshape(-1, 1)}}
+    prediction_dict = {
+        "d": {
+            "ml_g0": dml_obj.predictions["ml_g0"].reshape(-1, 1),
+            "ml_g1": dml_obj.predictions["ml_g1"].reshape(-1, 1),
+            "ml_m": dml_obj.predictions["ml_m"].reshape(-1, 1),
+        }
+    }
     dml_obj_ext.fit(external_predictions=prediction_dict)
 
-    res_dict = {'coef': dml_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'coef_ext': dml_obj_ext.coef.item(),
-                'se': dml_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'se_ext': dml_obj_ext.se.item(),
-                'boot_methods': boot_methods}
+    res_dict = {
+        "coef": dml_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "coef_ext": dml_obj_ext.coef.item(),
+        "se": dml_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "se_ext": dml_obj_ext.se.item(),
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_apo(y, d, treatment_level, res_manual['thetas'], res_manual['ses'],
-                               res_manual['all_g_hat0'], res_manual['all_g_hat1'],
-                               res_manual['all_m_hat'],
-                               all_smpls,
-                               score='APO',
-                               bootstrap=bootstrap,
-                               n_rep_boot=n_rep_boot,
-                               normalize_ipw=normalize_ipw)
+        boot_t_stat = boot_apo(
+            y,
+            d,
+            treatment_level,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_g_hat0"],
+            res_manual["all_g_hat1"],
+            res_manual["all_m_hat"],
+            all_smpls,
+            score="APO",
+            bootstrap=bootstrap,
+            n_rep_boot=n_rep_boot,
+            normalize_ipw=normalize_ipw,
+        )
 
         np.random.seed(3141)
         dml_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
         np.random.seed(3141)
         dml_obj_ext.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
-        res_dict['boot_t_stat' + bootstrap + '_ext'] = dml_obj_ext.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap] = dml_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap + "_ext"] = dml_obj_ext.boot_t_stat
 
     # check if sensitivity score with rho=0 gives equal asymptotic standard deviation
     dml_obj.sensitivity_analysis(rho=0.0)
-    res_dict['sensitivity_ses'] = dml_obj.sensitivity_params['se']
+    res_dict["sensitivity_ses"] = dml_obj.sensitivity_params["se"]
 
     # sensitivity tests
-    res_dict['sensitivity_elements'] = dml_obj.sensitivity_elements
-    res_dict['sensitivity_elements_manual'] = fit_sensitivity_elements_apo(y, d,
-                                                                           treatment_level,
-                                                                           all_coef=dml_obj.all_coef,
-                                                                           predictions=dml_obj.predictions,
-                                                                           score='APO',
-                                                                           n_rep=1)
+    res_dict["sensitivity_elements"] = dml_obj.sensitivity_elements
+    res_dict["sensitivity_elements_manual"] = fit_sensitivity_elements_apo(
+        y, d, treatment_level, all_coef=dml_obj.all_coef, predictions=dml_obj.predictions, score="APO", n_rep=1
+    )
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_apo_coef(dml_apo_fixture):
-    assert math.isclose(dml_apo_fixture['coef'],
-                        dml_apo_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
-    assert math.isclose(dml_apo_fixture['coef'],
-                        dml_apo_fixture['coef_ext'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_apo_fixture["coef"], dml_apo_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_apo_fixture["coef"], dml_apo_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_apo_se(dml_apo_fixture):
-    assert math.isclose(dml_apo_fixture['se'],
-                        dml_apo_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
-    assert math.isclose(dml_apo_fixture['se'],
-                        dml_apo_fixture['se_ext'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_apo_fixture["se"], dml_apo_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_apo_fixture["se"], dml_apo_fixture["se_ext"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_apo_boot(dml_apo_fixture):
-    for bootstrap in dml_apo_fixture['boot_methods']:
-        assert np.allclose(dml_apo_fixture['boot_t_stat' + bootstrap],
-                           dml_apo_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
-        assert np.allclose(dml_apo_fixture['boot_t_stat' + bootstrap],
-                           dml_apo_fixture['boot_t_stat' + bootstrap + '_ext'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_apo_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_apo_fixture["boot_t_stat" + bootstrap],
+            dml_apo_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
+        assert np.allclose(
+            dml_apo_fixture["boot_t_stat" + bootstrap],
+            dml_apo_fixture["boot_t_stat" + bootstrap + "_ext"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
 
 
 @pytest.mark.ci
 def test_dml_apo_sensitivity_rho0(dml_apo_fixture):
-    assert np.allclose(dml_apo_fixture['se'],
-                       dml_apo_fixture['sensitivity_ses']['lower'],
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_apo_fixture['se'],
-                       dml_apo_fixture['sensitivity_ses']['upper'],
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_apo_fixture["se"], dml_apo_fixture["sensitivity_ses"]["lower"], rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_apo_fixture["se"], dml_apo_fixture["sensitivity_ses"]["upper"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_apo_sensitivity(dml_apo_fixture):
-    sensitivity_element_names = ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2']
+    sensitivity_element_names = ["sigma2", "nu2", "psi_sigma2", "psi_nu2"]
     for sensitivity_element in sensitivity_element_names:
-        assert np.allclose(dml_apo_fixture['sensitivity_elements'][sensitivity_element],
-                           dml_apo_fixture['sensitivity_elements_manual'][sensitivity_element],
-                           rtol=1e-9, atol=1e-4)
+        assert np.allclose(
+            dml_apo_fixture["sensitivity_elements"][sensitivity_element],
+            dml_apo_fixture["sensitivity_elements_manual"][sensitivity_element],
+            rtol=1e-9,
+            atol=1e-4,
+        )
 
 
-@pytest.fixture(scope='module',
-                params=["nonrobust", "HC0", "HC1", "HC2", "HC3"])
+@pytest.fixture(scope="module", params=["nonrobust", "HC0", "HC1", "HC2", "HC3"])
 def cov_type(request):
     return request.param
 
@@ -217,12 +234,9 @@ def test_dml_apo_capo_gapo(treatment_level, cov_type):
     ml_g = RandomForestRegressor(n_estimators=10)
     ml_m = RandomForestClassifier(n_estimators=10)
 
-    dml_obj = dml.DoubleMLAPO(obj_dml_data,
-                              ml_m=ml_m,
-                              ml_g=ml_g,
-                              treatment_level=treatment_level,
-                              trimming_threshold=0.05,
-                              n_folds=5)
+    dml_obj = dml.DoubleMLAPO(
+        obj_dml_data, ml_m=ml_m, ml_g=ml_g, treatment_level=treatment_level, trimming_threshold=0.05, n_folds=5
+    )
 
     dml_obj.fit()
     # create a random basis
@@ -232,10 +246,10 @@ def test_dml_apo_capo_gapo(treatment_level, cov_type):
     assert isinstance(capo.confint(), pd.DataFrame)
     assert capo.blp_model.cov_type == cov_type
 
-    groups_1 = pd.DataFrame(np.column_stack([obj_dml_data.data['X1'] <= -1.0,
-                                             obj_dml_data.data['X1'] > 0.2]),
-                            columns=['Group 1', 'Group 2'])
-    msg = ('At least one group effect is estimated with less than 6 observations.')
+    groups_1 = pd.DataFrame(
+        np.column_stack([obj_dml_data.data["X1"] <= -1.0, obj_dml_data.data["X1"] > 0.2]), columns=["Group 1", "Group 2"]
+    )
+    msg = "At least one group effect is estimated with less than 6 observations."
     with pytest.warns(UserWarning, match=msg):
         gapo_1 = dml_obj.gapo(groups_1, cov_type=cov_type)
     assert isinstance(gapo_1, dml.utils.blp.DoubleMLBLP)
@@ -245,7 +259,7 @@ def test_dml_apo_capo_gapo(treatment_level, cov_type):
 
     np.random.seed(42)
     groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n, p=[0.1, 0.9]))
-    msg = ('At least one group effect is estimated with less than 6 observations.')
+    msg = "At least one group effect is estimated with less than 6 observations."
     with pytest.warns(UserWarning, match=msg):
         gapo_2 = dml_obj.gapo(groups_2, cov_type=cov_type)
     assert isinstance(gapo_2, dml.utils.blp.DoubleMLBLP)
diff --git a/doubleml/irm/tests/test_apo_classifier.py b/doubleml/irm/tests/test_apo_classifier.py
index 8b7d8a9de..042f3fe84 100644
--- a/doubleml/irm/tests/test_apo_classifier.py
+++ b/doubleml/irm/tests/test_apo_classifier.py
@@ -1,42 +1,44 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-
-from sklearn.linear_model import LogisticRegression
 from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
 
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_apo_manual import fit_apo, boot_apo
-
-
-@pytest.fixture(scope='module',
-                params=[[LogisticRegression(solver='lbfgs', max_iter=250),
-                         LogisticRegression(solver='lbfgs', max_iter=250)],
-                        [RandomForestClassifier(max_depth=2, n_estimators=10, random_state=42),
-                         RandomForestClassifier(max_depth=2, n_estimators=10, random_state=42)]])
+from ._utils_apo_manual import boot_apo, fit_apo
+
+
+@pytest.fixture(
+    scope="module",
+    params=[
+        [LogisticRegression(solver="lbfgs", max_iter=250), LogisticRegression(solver="lbfgs", max_iter=250)],
+        [
+            RandomForestClassifier(max_depth=2, n_estimators=10, random_state=42),
+            RandomForestClassifier(max_depth=2, n_estimators=10, random_state=42),
+        ],
+    ],
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.01, 0.05])
+@pytest.fixture(scope="module", params=[0.01, 0.05])
 def trimming_threshold(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_apo_classifier_fixture(generate_data_irm_binary, learner, normalize_ipw, trimming_threshold):
-    boot_methods = ['normal']
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
 
@@ -54,64 +56,87 @@ def dml_apo_classifier_fixture(generate_data_irm_binary, learner, normalize_ipw,
 
     np.random.seed(3141)
     obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d)
-    dml_obj = dml.DoubleMLAPO(obj_dml_data,
-                              ml_g, ml_m,
-                              treatment_level=treatment_level,
-                              n_folds=n_folds,
-                              score=score,
-                              normalize_ipw=normalize_ipw,
-                              trimming_threshold=trimming_threshold,
-                              draw_sample_splitting=False)
+    dml_obj = dml.DoubleMLAPO(
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        treatment_level=treatment_level,
+        n_folds=n_folds,
+        score=score,
+        normalize_ipw=normalize_ipw,
+        trimming_threshold=trimming_threshold,
+        draw_sample_splitting=False,
+    )
     # synchronize the sample splitting
     dml_obj.set_sample_splitting(all_smpls=all_smpls)
     dml_obj.fit()
 
     np.random.seed(3141)
-    res_manual = fit_apo(y, x, d,
-                         clone(learner[0]), clone(learner[1]),
-                         treatment_level,
-                         all_smpls, score,
-                         normalize_ipw=normalize_ipw, trimming_threshold=trimming_threshold)
-
-    res_dict = {'coef': dml_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'se': dml_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_manual = fit_apo(
+        y,
+        x,
+        d,
+        clone(learner[0]),
+        clone(learner[1]),
+        treatment_level,
+        all_smpls,
+        score,
+        normalize_ipw=normalize_ipw,
+        trimming_threshold=trimming_threshold,
+    )
+
+    res_dict = {
+        "coef": dml_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "se": dml_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_apo(y, d, treatment_level, res_manual['thetas'], res_manual['ses'],
-                               res_manual['all_g_hat0'], res_manual['all_g_hat1'],
-                               res_manual['all_m_hat'],
-                               all_smpls, score, bootstrap, n_rep_boot,
-                               normalize_ipw=normalize_ipw)
+        boot_t_stat = boot_apo(
+            y,
+            d,
+            treatment_level,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_g_hat0"],
+            res_manual["all_g_hat1"],
+            res_manual["all_m_hat"],
+            all_smpls,
+            score,
+            bootstrap,
+            n_rep_boot,
+            normalize_ipw=normalize_ipw,
+        )
 
         np.random.seed(3141)
         dml_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_apo_coef(dml_apo_classifier_fixture):
-    assert math.isclose(dml_apo_classifier_fixture['coef'],
-                        dml_apo_classifier_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_apo_classifier_fixture["coef"], dml_apo_classifier_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4
+    )
 
 
 @pytest.mark.ci
 def test_dml_apo_se(dml_apo_classifier_fixture):
-    assert math.isclose(dml_apo_classifier_fixture['se'],
-                        dml_apo_classifier_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_apo_classifier_fixture["se"], dml_apo_classifier_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_apo_boot(dml_apo_classifier_fixture):
-    for bootstrap in dml_apo_classifier_fixture['boot_methods']:
-        assert np.allclose(dml_apo_classifier_fixture['boot_t_stat' + bootstrap],
-                           dml_apo_classifier_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_apo_classifier_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_apo_classifier_fixture["boot_t_stat" + bootstrap],
+            dml_apo_classifier_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
diff --git a/doubleml/irm/tests/test_apo_exceptions.py b/doubleml/irm/tests/test_apo_exceptions.py
index 31fa6b447..7692177e4 100644
--- a/doubleml/irm/tests/test_apo_exceptions.py
+++ b/doubleml/irm/tests/test_apo_exceptions.py
@@ -1,19 +1,20 @@
-import pytest
-import pandas as pd
 import numpy as np
+import pandas as pd
+import pytest
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import Lasso, LogisticRegression
 
 from doubleml import DoubleMLAPO, DoubleMLData
-from doubleml.datasets import make_irm_data_discrete_treatments, make_iivm_data, make_irm_data
-
-from sklearn.linear_model import Lasso, LogisticRegression
-from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from doubleml.datasets import make_iivm_data, make_irm_data, make_irm_data_discrete_treatments
 
 n = 100
 data_apo = make_irm_data_discrete_treatments(n_obs=n)
-df_apo = pd.DataFrame(np.column_stack((data_apo['y'], data_apo['d'], data_apo['x'])),
-                      columns=['y', 'd'] + ['x' + str(i) for i in range(data_apo['x'].shape[1])])
+df_apo = pd.DataFrame(
+    np.column_stack((data_apo["y"], data_apo["d"], data_apo["x"])),
+    columns=["y", "d"] + ["x" + str(i) for i in range(data_apo["x"].shape[1])],
+)
 
-dml_data = DoubleMLData(df_apo, 'y', 'd')
+dml_data = DoubleMLData(df_apo, "y", "d")
 
 ml_g = Lasso()
 ml_m = LogisticRegression()
@@ -21,42 +22,44 @@
 
 @pytest.mark.ci
 def test_apo_exception_data():
-    msg = 'The data must be of DoubleMLData or DoubleMLClusterData type.'
+    msg = "The data must be of DoubleMLData or DoubleMLClusterData type."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLAPO(pd.DataFrame(), ml_g, ml_m, treatment_level=0)
 
-    msg = 'Only one treatment variable is allowed. Got 2 treatment variables.'
+    msg = "Only one treatment variable is allowed. Got 2 treatment variables."
     with pytest.raises(ValueError, match=msg):
-        dml_data_multiple = DoubleMLData(df_apo, 'y', ['d', 'x1'])
+        dml_data_multiple = DoubleMLData(df_apo, "y", ["d", "x1"])
         _ = DoubleMLAPO(dml_data_multiple, ml_g, ml_m, treatment_level=0)
 
     dml_data_z = make_iivm_data()
-    msg = r'Incompatible data. z have been set as instrumental variable\(s\).'
+    msg = r"Incompatible data. z have been set as instrumental variable\(s\)."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLAPO(dml_data_z, ml_g, ml_m, treatment_level=0)
 
-    msg = 'The number of treated observations is less than 5. Number of treated observations: 0 for treatment level 1.1.'
+    msg = "The number of treated observations is less than 5. Number of treated observations: 0 for treatment level 1.1."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=1.1)
 
-    msg = r'The proportion of observations with treatment level 42 is less than 5\%. Got 0.70\%.'
+    msg = r"The proportion of observations with treatment level 42 is less than 5\%. Got 0.70\%."
     # test warning
     with pytest.warns(UserWarning, match=msg):
         data_apo_warn = make_irm_data_discrete_treatments(n_obs=1000)
-        data_apo_warn['d'][0:7] = 42
+        data_apo_warn["d"][0:7] = 42
         df_apo_warn = pd.DataFrame(
-            np.column_stack((data_apo_warn['y'], data_apo_warn['d'], data_apo_warn['x'])),
-            columns=['y', 'd'] + ['x' + str(i) for i in range(data_apo_warn['x'].shape[1])]
+            np.column_stack((data_apo_warn["y"], data_apo_warn["d"], data_apo_warn["x"])),
+            columns=["y", "d"] + ["x" + str(i) for i in range(data_apo_warn["x"].shape[1])],
         )
-        dml_data_warn = DoubleMLData(df_apo_warn, 'y', 'd')
+        dml_data_warn = DoubleMLData(df_apo_warn, "y", "d")
 
         _ = DoubleMLAPO(dml_data_warn, ml_g, ml_m, treatment_level=42)
 
 
 @pytest.mark.ci
 def test_apo_exception_learner():
-    msg = (r'The ml_g learner LogisticRegression\(\) was identified as classifier but the outcome variable is not'
-           ' binary with values 0 and 1.')
+    msg = (
+        r"The ml_g learner LogisticRegression\(\) was identified as classifier but the outcome variable is not"
+        " binary with values 0 and 1."
+    )
     with pytest.raises(ValueError, match=msg):
         ml_g_classifier = LogisticRegression()
         _ = DoubleMLAPO(dml_data, ml_g_classifier, ml_m, treatment_level=0)
@@ -64,27 +67,25 @@ def test_apo_exception_learner():
 
 @pytest.mark.ci
 def test_apo_exception_scores():
-    msg = 'Invalid score MAR. Valid score APO.'
+    msg = "Invalid score MAR. Valid score APO."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, score='MAR')
+        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, score="MAR")
 
 
 @pytest.mark.ci
 def test_apo_exception_trimming_rule():
-    msg = 'Invalid trimming_rule discard. Valid trimming_rule truncate.'
+    msg = "Invalid trimming_rule discard. Valid trimming_rule truncate."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, trimming_rule='discard')
+        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, trimming_rule="discard")
 
     # check the trimming_threshold exceptions
     msg = "trimming_threshold has to be a float. Object of type <class 'str'> passed."
     with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0,
-                        trimming_rule='truncate', trimming_threshold="0.1")
+        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, trimming_rule="truncate", trimming_threshold="0.1")
 
-    msg = 'Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5.'
+    msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0,
-                        trimming_rule='truncate', trimming_threshold=0.6)
+        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, trimming_rule="truncate", trimming_threshold=0.6)
 
 
 @pytest.mark.ci
@@ -101,7 +102,7 @@ def test_apo_exception_weights():
         _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, weights=1)
     msg = r"weights must have keys \['weights', 'weights_bar'\]. keys dict_keys\(\['d'\]\) were passed."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, weights={'d': [1, 2, 3]})
+        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, weights={"d": [1, 2, 3]})
 
     # shape checks
     msg = rf"weights must have shape \({n},\). weights of shape \(1,\) was passed."
@@ -113,41 +114,78 @@ def test_apo_exception_weights():
 
     msg = rf"weights must have shape \({n},\). weights of shape \(1,\) was passed."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0,
-                        weights={'weights': np.ones(1), 'weights_bar': np.ones(1)})
+        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, weights={"weights": np.ones(1), "weights_bar": np.ones(1)})
     msg = rf"weights must have shape \({n},\). weights of shape \({n}, 2\) was passed."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0,
-                        weights={'weights': np.ones((n, 2)), 'weights_bar': np.ones((n, 2))})
+        _ = DoubleMLAPO(
+            dml_data, ml_g, ml_m, treatment_level=0, weights={"weights": np.ones((n, 2)), "weights_bar": np.ones((n, 2))}
+        )
     msg = rf"weights_bar must have shape \({n}, 1\). weights_bar of shape \({n}, 2\) was passed."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0,
-                        weights={'weights': np.ones(n), 'weights_bar': np.ones((n, 2))})
+        _ = DoubleMLAPO(
+            dml_data, ml_g, ml_m, treatment_level=0, weights={"weights": np.ones(n), "weights_bar": np.ones((n, 2))}
+        )
 
     # value checks
     msg = "All weights values must be greater or equal 0."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0,
-                        weights=-1*np.ones(n,))
+        _ = DoubleMLAPO(
+            dml_data,
+            ml_g,
+            ml_m,
+            treatment_level=0,
+            weights=-1
+            * np.ones(
+                n,
+            ),
+        )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0,
-                        weights={'weights': -1*np.ones(n,), 'weights_bar': np.ones((n, 1))})
+        _ = DoubleMLAPO(
+            dml_data,
+            ml_g,
+            ml_m,
+            treatment_level=0,
+            weights={
+                "weights": -1
+                * np.ones(
+                    n,
+                ),
+                "weights_bar": np.ones((n, 1)),
+            },
+        )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0,
-                        weights={'weights': np.ones(n,), 'weights_bar': -1*np.ones((n, 1))})
+        _ = DoubleMLAPO(
+            dml_data,
+            ml_g,
+            ml_m,
+            treatment_level=0,
+            weights={
+                "weights": np.ones(
+                    n,
+                ),
+                "weights_bar": -1 * np.ones((n, 1)),
+            },
+        )
 
     msg = "At least one weight must be non-zero."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0,
-                        weights=np.zeros((dml_data.d.shape[0], )))
+        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, weights=np.zeros((dml_data.d.shape[0],)))
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0,
-                        weights={'weights': np.zeros((dml_data.d.shape[0], )),
-                                 'weights_bar': np.ones((dml_data.d.shape[0], 1))})
+        _ = DoubleMLAPO(
+            dml_data,
+            ml_g,
+            ml_m,
+            treatment_level=0,
+            weights={"weights": np.zeros((dml_data.d.shape[0],)), "weights_bar": np.ones((dml_data.d.shape[0], 1))},
+        )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0,
-                        weights={'weights': np.ones((dml_data.d.shape[0], )),
-                                 'weights_bar': np.zeros((dml_data.d.shape[0], 1))})
+        _ = DoubleMLAPO(
+            dml_data,
+            ml_g,
+            ml_m,
+            treatment_level=0,
+            weights={"weights": np.ones((dml_data.d.shape[0],)), "weights_bar": np.zeros((dml_data.d.shape[0], 1))},
+        )
 
 
 @pytest.mark.ci
@@ -161,10 +199,7 @@ def test_apo_exception_capo_gapo():
     ml_g = RandomForestRegressor(n_estimators=10)
     ml_m = RandomForestClassifier(n_estimators=10)
 
-    dml_obj = DoubleMLAPO(obj_dml_data,
-                          ml_m=ml_m,
-                          ml_g=ml_g,
-                          treatment_level=0)
+    dml_obj = DoubleMLAPO(obj_dml_data, ml_m=ml_m, ml_g=ml_g, treatment_level=0)
 
     dml_obj.fit()
     # create a random basis
@@ -172,10 +207,10 @@ def test_apo_exception_capo_gapo():
 
     msg = "Invalid score APO_2. Valid score APO."
     with pytest.raises(ValueError, match=msg):
-        dml_obj._score = 'APO_2'
+        dml_obj._score = "APO_2"
         _ = dml_obj.capo(random_basis)
     # reset the score
-    dml_obj._score = 'APO'
+    dml_obj._score = "APO"
 
     msg = "Only implemented for one repetition. Number of repetitions is 2."
     with pytest.raises(NotImplementedError, match=msg):
@@ -189,10 +224,11 @@ def test_apo_exception_capo_gapo():
         _ = dml_obj.gapo(1)
 
     groups_1 = pd.DataFrame(
-        np.column_stack([obj_dml_data.data['X1'] > 0.2, np.ones_like(obj_dml_data.data['X1'])]),
-        columns=['Group 1', 'Group 2']
+        np.column_stack([obj_dml_data.data["X1"] > 0.2, np.ones_like(obj_dml_data.data["X1"])]), columns=["Group 1", "Group 2"]
+    )
+    msg = (
+        r"Columns of groups must be of bool type or int type \(dummy coded\). Alternatively,"
+        " groups should only contain one column."
     )
-    msg = (r'Columns of groups must be of bool type or int type \(dummy coded\). Alternatively,'
-           ' groups should only contain one column.')
     with pytest.raises(TypeError, match=msg):
         _ = dml_obj.gapo(groups_1)
diff --git a/doubleml/irm/tests/test_apo_external_predictions.py b/doubleml/irm/tests/test_apo_external_predictions.py
index a3f77dea1..aa9b43f37 100644
--- a/doubleml/irm/tests/test_apo_external_predictions.py
+++ b/doubleml/irm/tests/test_apo_external_predictions.py
@@ -1,12 +1,13 @@
-import pytest
-import numpy as np
-import pandas as pd
 import math
 
+import numpy as np
+import pandas as pd
+import pytest
 from sklearn.linear_model import LinearRegression, LogisticRegression
+
 from doubleml import DoubleMLAPO, DoubleMLData
 from doubleml.datasets import make_irm_data_discrete_treatments
-from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
+from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
 
 from ...tests._utils import draw_smpls
 
@@ -28,7 +29,6 @@ def set_ml_g_ext(request):
 
 @pytest.fixture(scope="module")
 def doubleml_apo_ext_fixture(n_rep, set_ml_m_ext, set_ml_g_ext):
-
     score = "APO"
     treatment_level = 0
     ext_predictions = {"d": {}}
@@ -37,12 +37,12 @@ def doubleml_apo_ext_fixture(n_rep, set_ml_m_ext, set_ml_g_ext):
     n_obs = 500
     data_apo = make_irm_data_discrete_treatments(n_obs=n_obs)
     df_apo = pd.DataFrame(
-        np.column_stack((data_apo['y'], data_apo['d'], data_apo['x'])),
-        columns=['y', 'd'] + ['x' + str(i) for i in range(data_apo['x'].shape[1])]
+        np.column_stack((data_apo["y"], data_apo["d"], data_apo["x"])),
+        columns=["y", "d"] + ["x" + str(i) for i in range(data_apo["x"].shape[1])],
     )
 
-    dml_data = DoubleMLData(df_apo, 'y', 'd')
-    d = data_apo['d']
+    dml_data = DoubleMLData(df_apo, "y", "d")
+    d = data_apo["d"]
     all_smpls = draw_smpls(n_obs, n_folds=5, n_rep=n_rep, groups=d)
 
     kwargs = {
@@ -50,7 +50,7 @@ def doubleml_apo_ext_fixture(n_rep, set_ml_m_ext, set_ml_g_ext):
         "score": score,
         "treatment_level": treatment_level,
         "n_rep": n_rep,
-        "draw_sample_splitting": False
+        "draw_sample_splitting": False,
     }
 
     dml_obj = DoubleMLAPO(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
@@ -78,10 +78,7 @@ def doubleml_apo_ext_fixture(n_rep, set_ml_m_ext, set_ml_g_ext):
     np.random.seed(3141)
     dml_obj_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {
-        "coef_normal": dml_obj.coef[0],
-        "coef_ext": dml_obj_ext.coef[0]
-    }
+    res_dict = {"coef_normal": dml_obj.coef[0], "coef_ext": dml_obj_ext.coef[0]}
 
     return res_dict
 
@@ -89,8 +86,5 @@ def doubleml_apo_ext_fixture(n_rep, set_ml_m_ext, set_ml_g_ext):
 @pytest.mark.ci
 def test_doubleml_apo_ext_coef(doubleml_apo_ext_fixture):
     assert math.isclose(
-        doubleml_apo_ext_fixture["coef_normal"],
-        doubleml_apo_ext_fixture["coef_ext"],
-        rel_tol=1e-9,
-        abs_tol=1e-4
+        doubleml_apo_ext_fixture["coef_normal"], doubleml_apo_ext_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4
     )
diff --git a/doubleml/irm/tests/test_apo_tune.py b/doubleml/irm/tests/test_apo_tune.py
index dacb569bc..b7081618c 100644
--- a/doubleml/irm/tests/test_apo_tune.py
+++ b/doubleml/irm/tests/test_apo_tune.py
@@ -1,64 +1,57 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-
-from sklearn.linear_model import LogisticRegression
 from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import LogisticRegression
 
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_apo_manual import fit_apo, boot_apo, tune_nuisance_apo
+from ._utils_apo_manual import boot_apo, fit_apo, tune_nuisance_apo
 
 
-@pytest.fixture(scope='module',
-                params=[RandomForestRegressor(random_state=42)])
+@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
 def learner_g(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[LogisticRegression(random_state=42)])
+@pytest.fixture(scope="module", params=[LogisticRegression(random_state=42)])
 def learner_m(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['APO'])
+@pytest.fixture(scope="module", params=["APO"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def tune_on_folds(request):
     return request.param
 
 
 def get_par_grid(learner):
     if learner.__class__ in [RandomForestRegressor]:
-        par_grid = {'n_estimators': [5, 10, 20]}
+        par_grid = {"n_estimators": [5, 10, 20]}
     else:
         assert learner.__class__ in [LogisticRegression]
-        par_grid = {'C': np.logspace(-4, 2, 10)}
+        par_grid = {"C": np.logspace(-4, 2, 10)}
     return par_grid
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_apo_tune_fixture(generate_data_irm, learner_g, learner_m, score, normalize_ipw, tune_on_folds):
-    par_grid = {'ml_g': get_par_grid(learner_g),
-                'ml_m': get_par_grid(learner_m)}
+    par_grid = {"ml_g": get_par_grid(learner_g), "ml_m": get_par_grid(learner_m)}
     n_folds_tune = 4
 
-    boot_methods = ['normal']
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
     treatment_level = 0
@@ -74,19 +67,21 @@ def dml_apo_tune_fixture(generate_data_irm, learner_g, learner_m, score, normali
 
     np.random.seed(3141)
     obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d)
-    dml_obj = dml.DoubleMLAPO(obj_dml_data,
-                              ml_g, ml_m,
-                              treatment_level=treatment_level,
-                              n_folds=n_folds,
-                              score=score,
-                              normalize_ipw=normalize_ipw,
-                              draw_sample_splitting=False)
+    dml_obj = dml.DoubleMLAPO(
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        treatment_level=treatment_level,
+        n_folds=n_folds,
+        score=score,
+        normalize_ipw=normalize_ipw,
+        draw_sample_splitting=False,
+    )
     # synchronize the sample splitting
     dml_obj.set_sample_splitting(all_smpls=all_smpls)
     np.random.seed(3141)
     # tune hyperparameters
-    tune_res = dml_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune,
-                            return_tune_res=False)
+    tune_res = dml_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune, return_tune_res=False)
     assert isinstance(tune_res, dml.DoubleMLAPO)
 
     dml_obj.fit()
@@ -95,65 +90,103 @@ def dml_apo_tune_fixture(generate_data_irm, learner_g, learner_m, score, normali
     smpls = all_smpls[0]
 
     if tune_on_folds:
-        g0_params, g1_params, m_params = tune_nuisance_apo(y, x, d, treatment_level,
-                                                           clone(learner_g), clone(learner_m), smpls, score,
-                                                           n_folds_tune,
-                                                           par_grid['ml_g'], par_grid['ml_m'])
+        g0_params, g1_params, m_params = tune_nuisance_apo(
+            y,
+            x,
+            d,
+            treatment_level,
+            clone(learner_g),
+            clone(learner_m),
+            smpls,
+            score,
+            n_folds_tune,
+            par_grid["ml_g"],
+            par_grid["ml_m"],
+        )
     else:
         xx = [(np.arange(len(y)), np.array([]))]
-        g0_params, g1_params, m_params = tune_nuisance_apo(y, x, d, treatment_level,
-                                                           clone(learner_g), clone(learner_m), xx, score,
-                                                           n_folds_tune,
-                                                           par_grid['ml_g'], par_grid['ml_m'])
+        g0_params, g1_params, m_params = tune_nuisance_apo(
+            y,
+            x,
+            d,
+            treatment_level,
+            clone(learner_g),
+            clone(learner_m),
+            xx,
+            score,
+            n_folds_tune,
+            par_grid["ml_g"],
+            par_grid["ml_m"],
+        )
         g0_params = g0_params * n_folds
         m_params = m_params * n_folds
         g1_params = g1_params * n_folds
 
-    res_manual = fit_apo(y, x, d, clone(learner_g), clone(learner_m),
-                         treatment_level,
-                         all_smpls, score,
-                         normalize_ipw=normalize_ipw,
-                         g0_params=g0_params, g1_params=g1_params, m_params=m_params)
-
-    res_dict = {'coef': dml_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'se': dml_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_manual = fit_apo(
+        y,
+        x,
+        d,
+        clone(learner_g),
+        clone(learner_m),
+        treatment_level,
+        all_smpls,
+        score,
+        normalize_ipw=normalize_ipw,
+        g0_params=g0_params,
+        g1_params=g1_params,
+        m_params=m_params,
+    )
+
+    res_dict = {
+        "coef": dml_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "se": dml_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_apo(y, d, treatment_level, res_manual['thetas'], res_manual['ses'],
-                               res_manual['all_g_hat0'], res_manual['all_g_hat1'],
-                               res_manual['all_m_hat'],
-                               all_smpls, score, bootstrap, n_rep_boot,
-                               normalize_ipw=normalize_ipw)
+        boot_t_stat = boot_apo(
+            y,
+            d,
+            treatment_level,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_g_hat0"],
+            res_manual["all_g_hat1"],
+            res_manual["all_m_hat"],
+            all_smpls,
+            score,
+            bootstrap,
+            n_rep_boot,
+            normalize_ipw=normalize_ipw,
+        )
 
         np.random.seed(3141)
         dml_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_apo_tune_coef(dml_apo_tune_fixture):
-    assert math.isclose(dml_apo_tune_fixture['coef'],
-                        dml_apo_tune_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_apo_tune_fixture["coef"], dml_apo_tune_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_apo_tune_se(dml_apo_tune_fixture):
-    assert math.isclose(dml_apo_tune_fixture['se'],
-                        dml_apo_tune_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_apo_tune_fixture["se"], dml_apo_tune_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_apo_tune_boot(dml_apo_tune_fixture):
-    for bootstrap in dml_apo_tune_fixture['boot_methods']:
-        assert np.allclose(dml_apo_tune_fixture['boot_t_stat' + bootstrap],
-                           dml_apo_tune_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_apo_tune_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_apo_tune_fixture["boot_t_stat" + bootstrap],
+            dml_apo_tune_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
diff --git a/doubleml/irm/tests/test_apo_weighted_scores.py b/doubleml/irm/tests/test_apo_weighted_scores.py
index 5551e5dd0..63687ebdc 100644
--- a/doubleml/irm/tests/test_apo_weighted_scores.py
+++ b/doubleml/irm/tests/test_apo_weighted_scores.py
@@ -1,56 +1,55 @@
-import pytest
 import numpy as np
-
+import pytest
 from sklearn.base import clone
-from sklearn.linear_model import LogisticRegression, LinearRegression
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import LinearRegression, LogisticRegression
 
-from ...tests._utils import draw_smpls
 import doubleml as dml
 
+from ...tests._utils import draw_smpls
+
 
-@pytest.fixture(scope='module',
-                params=[[LinearRegression(),
-                         LogisticRegression(solver='lbfgs', max_iter=250)],
-                        [RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
-                         RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42)]])
+@pytest.fixture(
+    scope="module",
+    params=[
+        [LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250)],
+        [
+            RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
+            RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42),
+        ],
+    ],
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['APO'])
+@pytest.fixture(scope="module", params=["APO"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 3])
+@pytest.fixture(scope="module", params=[1, 3])
 def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[False, True])
+@pytest.fixture(scope="module", params=[False, True])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.2, 0.15])
+@pytest.fixture(scope="module", params=[0.2, 0.15])
 def trimming_threshold(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0, 1])
+@pytest.fixture(scope="module", params=[0, 1])
 def treatment_level(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
-def weighted_apo_score_fixture(generate_data_irm, learner, score, n_rep, normalize_ipw, trimming_threshold,
-                               treatment_level):
+@pytest.fixture(scope="module")
+def weighted_apo_score_fixture(generate_data_irm, learner, score, n_rep, normalize_ipw, trimming_threshold, treatment_level):
     n_folds = 2
 
     # collect data
@@ -85,36 +84,36 @@ def weighted_apo_score_fixture(generate_data_irm, learner, score, n_rep, normali
 
     np.random.seed(42)
     weights_dict = {
-        'weights': weights,
-        'weights_bar': np.tile(weights[:, np.newaxis], (1, n_rep)),
+        "weights": weights,
+        "weights_bar": np.tile(weights[:, np.newaxis], (1, n_rep)),
     }
     dml_obj_weighted_dict = dml.DoubleMLAPO(weights=weights_dict, **input_args)
     dml_obj_weighted_dict.set_sample_splitting(all_smpls=all_smpls)
     dml_obj_weighted_dict.fit()
 
     result_dict = {
-        'coef': dml_obj.coef,
-        'weighted_coef': dml_obj_weighted.coef,
-        'weighted_coef_dict': dml_obj_weighted_dict.coef,
-        'default_weights': dml_obj.weights,
+        "coef": dml_obj.coef,
+        "weighted_coef": dml_obj_weighted.coef,
+        "weighted_coef_dict": dml_obj_weighted_dict.coef,
+        "default_weights": dml_obj.weights,
     }
     return result_dict
 
 
 @pytest.mark.ci
 def test_apo_weighted_coef(weighted_apo_score_fixture):
-    assert np.allclose(0.5 * weighted_apo_score_fixture['coef'],
-                       weighted_apo_score_fixture['weighted_coef'])
-    assert np.allclose(0.5 * weighted_apo_score_fixture['coef'],
-                       weighted_apo_score_fixture['weighted_coef_dict'])
+    assert np.allclose(0.5 * weighted_apo_score_fixture["coef"], weighted_apo_score_fixture["weighted_coef"])
+    assert np.allclose(0.5 * weighted_apo_score_fixture["coef"], weighted_apo_score_fixture["weighted_coef_dict"])
 
 
 @pytest.mark.ci
 def test_apo_default_weights(weighted_apo_score_fixture):
-    assert isinstance(weighted_apo_score_fixture['default_weights'], dict)
+    assert isinstance(weighted_apo_score_fixture["default_weights"], dict)
 
-    expected_keys = {'weights'}
-    assert set(weighted_apo_score_fixture['default_weights'].keys()) == expected_keys
+    expected_keys = {"weights"}
+    assert set(weighted_apo_score_fixture["default_weights"].keys()) == expected_keys
 
-    assert np.allclose(weighted_apo_score_fixture['default_weights']['weights'],
-                       np.ones_like(weighted_apo_score_fixture['default_weights']['weights']))
+    assert np.allclose(
+        weighted_apo_score_fixture["default_weights"]["weights"],
+        np.ones_like(weighted_apo_score_fixture["default_weights"]["weights"]),
+    )
diff --git a/doubleml/irm/tests/test_apos.py b/doubleml/irm/tests/test_apos.py
index 92a372ff1..746cb63c9 100644
--- a/doubleml/irm/tests/test_apos.py
+++ b/doubleml/irm/tests/test_apos.py
@@ -1,17 +1,15 @@
 import numpy as np
 import pandas as pd
 import pytest
-
 from sklearn.base import clone
-
-from sklearn.linear_model import LogisticRegression, LinearRegression
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
-from doubleml.datasets import make_irm_data_discrete_treatments, make_irm_data
+from doubleml.datasets import make_irm_data, make_irm_data_discrete_treatments
 
-from ._utils_apos_manual import fit_apos, boot_apos
 from ...tests._utils import confint_manual
+from ._utils_apos_manual import boot_apos, fit_apos
 
 
 @pytest.mark.ci
@@ -21,10 +19,12 @@ def test_apo_properties():
     np.random.seed(42)
     obj_dml_data = make_irm_data(n_obs=n, dim_x=2)
 
-    dml_obj = dml.DoubleMLAPOS(obj_dml_data,
-                               ml_g=RandomForestRegressor(n_estimators=10),
-                               ml_m=RandomForestClassifier(n_estimators=10),
-                               treatment_levels=0)
+    dml_obj = dml.DoubleMLAPOS(
+        obj_dml_data,
+        ml_g=RandomForestRegressor(n_estimators=10),
+        ml_m=RandomForestClassifier(n_estimators=10),
+        treatment_levels=0,
+    )
 
     # check properties before fit
     assert dml_obj.n_rep_boot is None
@@ -65,70 +65,68 @@ def test_apo_properties():
     assert dml_obj.sensitivity_params is not None
 
 
-@pytest.fixture(scope='module',
-                params=[[LinearRegression(),
-                         LogisticRegression(solver='lbfgs', max_iter=250, random_state=42)],
-                        [RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
-                         RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42)]])
+@pytest.fixture(
+    scope="module",
+    params=[
+        [LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250, random_state=42)],
+        [
+            RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
+            RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42),
+        ],
+    ],
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 5])
+@pytest.fixture(scope="module", params=[1, 5])
 def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[False, True])
+@pytest.fixture(scope="module", params=[False, True])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.2, 0.15])
+@pytest.fixture(scope="module", params=[0.2, 0.15])
 def trimming_threshold(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[[0, 1, 2], [0]])
+@pytest.fixture(scope="module", params=[[0, 1, 2], [0]])
 def treatment_levels(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_apos_fixture(learner, n_rep, normalize_ipw, trimming_threshold, treatment_levels):
-    boot_methods = ['normal']
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
 
     np.random.seed(3141)
     n_obs = 500
     data = make_irm_data_discrete_treatments(n_obs=n_obs)
-    y = data['y']
-    x = data['x']
-    d = data['d']
-    df = pd.DataFrame(
-        np.column_stack((y, d, x)),
-        columns=['y', 'd'] + ['x' + str(i) for i in range(data['x'].shape[1])]
-    )
+    y = data["y"]
+    x = data["x"]
+    d = data["d"]
+    df = pd.DataFrame(np.column_stack((y, d, x)), columns=["y", "d"] + ["x" + str(i) for i in range(data["x"].shape[1])])
 
-    dml_data = dml.DoubleMLData(df, 'y', 'd')
+    dml_data = dml.DoubleMLData(df, "y", "d")
 
     input_args = {
-        'obj_dml_data': dml_data,
-        'ml_g': clone(learner[0]),
-        'ml_m': clone(learner[1]),
+        "obj_dml_data": dml_data,
+        "ml_g": clone(learner[0]),
+        "ml_m": clone(learner[1]),
         "treatment_levels": treatment_levels,
         "n_folds": n_folds,
         "n_rep": n_rep,
-        "score": 'APO',
+        "score": "APO",
         "normalize_ipw": normalize_ipw,
-        "trimming_rule": 'truncate',
+        "trimming_rule": "truncate",
         "trimming_threshold": trimming_threshold,
-        }
+    }
 
     unfitted_apos_model = dml.DoubleMLAPOS(**input_args)
     np.random.seed(42)
@@ -144,147 +142,138 @@ def dml_apos_fixture(learner, n_rep, normalize_ipw, trimming_threshold, treatmen
 
     np.random.seed(42)
     res_manual = fit_apos(
-        y, x, d,
-        clone(learner[0]), clone(learner[1]),
+        y,
+        x,
+        d,
+        clone(learner[0]),
+        clone(learner[1]),
         treatment_levels=treatment_levels,
         all_smpls=all_smpls,
         n_rep=n_rep,
-        score='APO',
-        trimming_rule='truncate',
+        score="APO",
+        trimming_rule="truncate",
         normalize_ipw=normalize_ipw,
-        trimming_threshold=trimming_threshold)
+        trimming_threshold=trimming_threshold,
+    )
 
     ci = dml_obj.confint(joint=False, level=0.95)
     ci_ext_smpls = dml_obj_ext_smpls.confint(joint=False, level=0.95)
     ci_manual = confint_manual(
-        res_manual['apos'], res_manual['se'], treatment_levels,
-        boot_t_stat=None, joint=False, level=0.95
-        )
+        res_manual["apos"], res_manual["se"], treatment_levels, boot_t_stat=None, joint=False, level=0.95
+    )
 
     res_dict = {
-        'coef': dml_obj.coef,
-        'coef_ext_smpls': dml_obj_ext_smpls.coef,
-        'coef_manual': res_manual['apos'],
-        'se': dml_obj.se,
-        'se_ext_smpls': dml_obj_ext_smpls.se,
-        'se_manual': res_manual['se'],
-        'boot_methods': boot_methods,
-        'n_treatment_levels': len(treatment_levels),
-        'n_rep': n_rep,
-        'ci': ci.to_numpy(),
-        'ci_ext_smpls': ci_ext_smpls.to_numpy(),
-        'ci_manual': ci_manual.to_numpy(),
-        'apos_model': dml_obj,
-        'unfitted_apos_model': unfitted_apos_model
+        "coef": dml_obj.coef,
+        "coef_ext_smpls": dml_obj_ext_smpls.coef,
+        "coef_manual": res_manual["apos"],
+        "se": dml_obj.se,
+        "se_ext_smpls": dml_obj_ext_smpls.se,
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+        "n_treatment_levels": len(treatment_levels),
+        "n_rep": n_rep,
+        "ci": ci.to_numpy(),
+        "ci_ext_smpls": ci_ext_smpls.to_numpy(),
+        "ci_manual": ci_manual.to_numpy(),
+        "apos_model": dml_obj,
+        "unfitted_apos_model": unfitted_apos_model,
     }
     if n_rep == 1:
         for bootstrap in boot_methods:
             np.random.seed(42)
-            boot_t_stat = boot_apos(res_manual['apo_scaled_score'], res_manual['all_se'], treatment_levels,
-                                    all_smpls, n_rep, bootstrap, n_rep_boot)
+            boot_t_stat = boot_apos(
+                res_manual["apo_scaled_score"], res_manual["all_se"], treatment_levels, all_smpls, n_rep, bootstrap, n_rep_boot
+            )
 
             np.random.seed(42)
             dml_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
 
-            res_dict['boot_t_stat_' + bootstrap] = dml_obj.boot_t_stat
-            res_dict['boot_t_stat_' + bootstrap + '_manual'] = boot_t_stat
+            res_dict["boot_t_stat_" + bootstrap] = dml_obj.boot_t_stat
+            res_dict["boot_t_stat_" + bootstrap + "_manual"] = boot_t_stat
 
             ci = dml_obj.confint(joint=True, level=0.95)
             ci_manual = confint_manual(
-                res_manual['apos'], res_manual['se'], treatment_levels,
-                boot_t_stat=boot_t_stat, joint=True, level=0.95)
-            res_dict['boot_ci_' + bootstrap] = ci.to_numpy()
-            res_dict['boot_ci_' + bootstrap + '_manual'] = ci_manual.to_numpy()
+                res_manual["apos"], res_manual["se"], treatment_levels, boot_t_stat=boot_t_stat, joint=True, level=0.95
+            )
+            res_dict["boot_ci_" + bootstrap] = ci.to_numpy()
+            res_dict["boot_ci_" + bootstrap + "_manual"] = ci_manual.to_numpy()
 
     # causal contrasts
     if len(treatment_levels) > 1:
         acc_single = dml_obj.causal_contrast(reference_levels=[treatment_levels[0]])
-        res_dict['causal_contrast_single'] = acc_single
+        res_dict["causal_contrast_single"] = acc_single
         acc_multiple = dml_obj.causal_contrast(reference_levels=treatment_levels)
-        res_dict['causal_contrast_multiple'] = acc_multiple
+        res_dict["causal_contrast_multiple"] = acc_multiple
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_apos_coef(dml_apos_fixture):
-    assert np.allclose(dml_apos_fixture['coef'],
-                       dml_apos_fixture['coef_manual'],
-                       rtol=1e-9, atol=1e-9)
-    assert np.allclose(dml_apos_fixture['coef'],
-                       dml_apos_fixture['coef_ext_smpls'],
-                       rtol=1e-9, atol=1e-9)
+    assert np.allclose(dml_apos_fixture["coef"], dml_apos_fixture["coef_manual"], rtol=1e-9, atol=1e-9)
+    assert np.allclose(dml_apos_fixture["coef"], dml_apos_fixture["coef_ext_smpls"], rtol=1e-9, atol=1e-9)
 
 
 @pytest.mark.ci
 def test_dml_apos_se(dml_apos_fixture):
-    if dml_apos_fixture['n_rep'] != 1:
+    if dml_apos_fixture["n_rep"] != 1:
         pytest.skip("Skipping test as n_rep is not 1")
-    assert np.allclose(dml_apos_fixture['se'],
-                       dml_apos_fixture['se_manual'],
-                       rtol=1e-9, atol=1e-9)
-    assert np.allclose(dml_apos_fixture['se'],
-                       dml_apos_fixture['se_ext_smpls'],
-                       rtol=1e-9, atol=1e-9)
+    assert np.allclose(dml_apos_fixture["se"], dml_apos_fixture["se_manual"], rtol=1e-9, atol=1e-9)
+    assert np.allclose(dml_apos_fixture["se"], dml_apos_fixture["se_ext_smpls"], rtol=1e-9, atol=1e-9)
 
 
 @pytest.mark.ci
 def test_dml_apos_boot(dml_apos_fixture):
-    if dml_apos_fixture['n_rep'] != 1:
+    if dml_apos_fixture["n_rep"] != 1:
         pytest.skip("Skipping test as n_rep is not 1")
-    for bootstrap in dml_apos_fixture['boot_methods']:
-        assert np.allclose(dml_apos_fixture['boot_t_stat_' + bootstrap],
-                           dml_apos_fixture['boot_t_stat_' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_apos_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_apos_fixture["boot_t_stat_" + bootstrap],
+            dml_apos_fixture["boot_t_stat_" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
 
 
 @pytest.mark.ci
 def test_dml_apos_ci(dml_apos_fixture):
-    if dml_apos_fixture['n_rep'] != 1:
+    if dml_apos_fixture["n_rep"] != 1:
         pytest.skip("Skipping test as n_rep is not 1")
-    for bootstrap in dml_apos_fixture['boot_methods']:
-        assert np.allclose(dml_apos_fixture['ci'],
-                           dml_apos_fixture['ci_manual'],
-                           rtol=1e-9, atol=1e-4)
-        assert np.allclose(dml_apos_fixture['ci'],
-                           dml_apos_fixture['ci_ext_smpls'],
-                           rtol=1e-9, atol=1e-4)
-        assert np.allclose(dml_apos_fixture['boot_ci_' + bootstrap],
-                           dml_apos_fixture['boot_ci_' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_apos_fixture["boot_methods"]:
+        assert np.allclose(dml_apos_fixture["ci"], dml_apos_fixture["ci_manual"], rtol=1e-9, atol=1e-4)
+        assert np.allclose(dml_apos_fixture["ci"], dml_apos_fixture["ci_ext_smpls"], rtol=1e-9, atol=1e-4)
+        assert np.allclose(
+            dml_apos_fixture["boot_ci_" + bootstrap],
+            dml_apos_fixture["boot_ci_" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
 
 
 @pytest.mark.ci
 def test_doubleml_apos_return_types(dml_apos_fixture):
-    assert isinstance(dml_apos_fixture['apos_model'].__str__(), str)
-    assert isinstance(dml_apos_fixture['apos_model'].summary, pd.DataFrame)
+    assert isinstance(dml_apos_fixture["apos_model"].__str__(), str)
+    assert isinstance(dml_apos_fixture["apos_model"].summary, pd.DataFrame)
 
-    assert dml_apos_fixture['apos_model'].all_coef.shape == (
-        dml_apos_fixture['n_treatment_levels'],
-        dml_apos_fixture['n_rep']
-    )
-    assert isinstance(dml_apos_fixture['unfitted_apos_model'].summary, pd.DataFrame)
-    if dml_apos_fixture['n_treatment_levels'] > 1:
-        assert isinstance(dml_apos_fixture['causal_contrast_single'], dml.DoubleMLFramework)
-        assert isinstance(dml_apos_fixture['causal_contrast_multiple'], dml.DoubleMLFramework)
+    assert dml_apos_fixture["apos_model"].all_coef.shape == (dml_apos_fixture["n_treatment_levels"], dml_apos_fixture["n_rep"])
+    assert isinstance(dml_apos_fixture["unfitted_apos_model"].summary, pd.DataFrame)
+    if dml_apos_fixture["n_treatment_levels"] > 1:
+        assert isinstance(dml_apos_fixture["causal_contrast_single"], dml.DoubleMLFramework)
+        assert isinstance(dml_apos_fixture["causal_contrast_multiple"], dml.DoubleMLFramework)
 
-    benchmark = dml_apos_fixture['apos_model'].sensitivity_benchmark(benchmarking_set=['x1'])
+    benchmark = dml_apos_fixture["apos_model"].sensitivity_benchmark(benchmarking_set=["x1"])
     assert isinstance(benchmark, pd.DataFrame)
 
 
 @pytest.mark.ci
 def test_doubleml_apos_causal_contrast(dml_apos_fixture):
-    if dml_apos_fixture['n_treatment_levels'] == 1:
+    if dml_apos_fixture["n_treatment_levels"] == 1:
         pytest.skip("Skipping test as n_treatment_levels is 1")
 
-    acc_single = dml_apos_fixture['apos_model'].all_coef[1:, ] - dml_apos_fixture['apos_model'].all_coef[0, ]
-    assert np.allclose(dml_apos_fixture['causal_contrast_single'].all_thetas,
-                       acc_single,
-                       rtol=1e-9, atol=1e-9)
-
-    acc_multiple = np.append(acc_single,
-                             dml_apos_fixture['apos_model'].all_coef[2:3, ] - dml_apos_fixture['apos_model'].all_coef[1:2, ],
-                             axis=0)
-    assert np.allclose(dml_apos_fixture['causal_contrast_multiple'].all_thetas,
-                       acc_multiple,
-                       rtol=1e-9, atol=1e-9)
+    acc_single = dml_apos_fixture["apos_model"].all_coef[1:,] - dml_apos_fixture["apos_model"].all_coef[0,]
+    assert np.allclose(dml_apos_fixture["causal_contrast_single"].all_thetas, acc_single, rtol=1e-9, atol=1e-9)
+
+    acc_multiple = np.append(
+        acc_single, dml_apos_fixture["apos_model"].all_coef[2:3,] - dml_apos_fixture["apos_model"].all_coef[1:2,], axis=0
+    )
+    assert np.allclose(dml_apos_fixture["causal_contrast_multiple"].all_thetas, acc_multiple, rtol=1e-9, atol=1e-9)
diff --git a/doubleml/irm/tests/test_apos_classfier.py b/doubleml/irm/tests/test_apos_classfier.py
index 9c3e7d351..06fdc3085 100644
--- a/doubleml/irm/tests/test_apos_classfier.py
+++ b/doubleml/irm/tests/test_apos_classfier.py
@@ -1,55 +1,54 @@
 import numpy as np
 import pandas as pd
 import pytest
-
 from sklearn.base import clone
-
-from sklearn.linear_model import LogisticRegression
 from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
 
 import doubleml as dml
 from doubleml.datasets import make_irm_data_discrete_treatments
 
-from ._utils_apos_manual import fit_apos, boot_apos
 from ...tests._utils import confint_manual
-
-
-@pytest.fixture(scope='module',
-                params=[[LogisticRegression(solver='lbfgs', max_iter=250),
-                         LogisticRegression(solver='lbfgs', max_iter=250)],
-                        [RandomForestClassifier(max_depth=2, n_estimators=10, random_state=42),
-                         RandomForestClassifier(max_depth=2, n_estimators=10, random_state=42)]])
+from ._utils_apos_manual import boot_apos, fit_apos
+
+
+@pytest.fixture(
+    scope="module",
+    params=[
+        [LogisticRegression(solver="lbfgs", max_iter=250), LogisticRegression(solver="lbfgs", max_iter=250)],
+        [
+            RandomForestClassifier(max_depth=2, n_estimators=10, random_state=42),
+            RandomForestClassifier(max_depth=2, n_estimators=10, random_state=42),
+        ],
+    ],
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1])
+@pytest.fixture(scope="module", params=[1])
 def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[False, True])
+@pytest.fixture(scope="module", params=[False, True])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.2, 0.15])
+@pytest.fixture(scope="module", params=[0.2, 0.15])
 def trimming_threshold(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[[0, 1, 2], [0]])
+@pytest.fixture(scope="module", params=[[0, 1, 2], [0]])
 def treatment_levels(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_apos_classifier_fixture(learner, n_rep, normalize_ipw, trimming_threshold, treatment_levels):
-    boot_methods = ['normal']
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
 
@@ -57,27 +56,24 @@ def dml_apos_classifier_fixture(learner, n_rep, normalize_ipw, trimming_threshol
     n_obs = 500
     data = make_irm_data_discrete_treatments(n_obs=n_obs)
     y = np.random.binomial(1, 0.5, n_obs)
-    x = data['x']
-    d = data['d']
-    df = pd.DataFrame(
-        np.column_stack((y, d, x)),
-        columns=['y', 'd'] + ['x' + str(i) for i in range(data['x'].shape[1])]
-    )
+    x = data["x"]
+    d = data["d"]
+    df = pd.DataFrame(np.column_stack((y, d, x)), columns=["y", "d"] + ["x" + str(i) for i in range(data["x"].shape[1])])
 
-    dml_data = dml.DoubleMLData(df, 'y', 'd')
+    dml_data = dml.DoubleMLData(df, "y", "d")
 
     input_args = {
-        'obj_dml_data': dml_data,
-        'ml_g': clone(learner[0]),
-        'ml_m': clone(learner[1]),
+        "obj_dml_data": dml_data,
+        "ml_g": clone(learner[0]),
+        "ml_m": clone(learner[1]),
         "treatment_levels": treatment_levels,
         "n_folds": n_folds,
         "n_rep": n_rep,
-        "score": 'APO',
+        "score": "APO",
         "normalize_ipw": normalize_ipw,
-        "trimming_rule": 'truncate',
+        "trimming_rule": "truncate",
         "trimming_threshold": trimming_threshold,
-        }
+    }
 
     unfitted_apos_model = dml.DoubleMLAPOS(**input_args)
     np.random.seed(42)
@@ -93,108 +89,111 @@ def dml_apos_classifier_fixture(learner, n_rep, normalize_ipw, trimming_threshol
 
     np.random.seed(42)
     res_manual = fit_apos(
-        y, x, d,
-        clone(learner[0]), clone(learner[1]),
+        y,
+        x,
+        d,
+        clone(learner[0]),
+        clone(learner[1]),
         treatment_levels=treatment_levels,
         all_smpls=all_smpls,
-        score='APO',
-        trimming_rule='truncate',
+        score="APO",
+        trimming_rule="truncate",
         normalize_ipw=normalize_ipw,
-        trimming_threshold=trimming_threshold)
+        trimming_threshold=trimming_threshold,
+    )
 
     ci = dml_obj.confint(joint=False, level=0.95)
     ci_ext_smpls = dml_obj_ext_smpls.confint(joint=False, level=0.95)
     ci_manual = confint_manual(
-        res_manual['apos'], res_manual['se'], treatment_levels,
-        boot_t_stat=None, joint=False, level=0.95
-        )
+        res_manual["apos"], res_manual["se"], treatment_levels, boot_t_stat=None, joint=False, level=0.95
+    )
 
     res_dict = {
-        'coef': dml_obj.coef,
-        'coef_ext_smpls': dml_obj_ext_smpls.coef,
-        'coef_manual': res_manual['apos'],
-        'se': dml_obj.se,
-        'se_ext_smpls': dml_obj_ext_smpls.se,
-        'se_manual': res_manual['se'],
-        'boot_methods': boot_methods,
-        'n_treatment_levels': len(treatment_levels),
-        'n_rep': n_rep,
-        'ci': ci.to_numpy(),
-        'ci_ext_smpls': ci_ext_smpls.to_numpy(),
-        'ci_manual': ci_manual.to_numpy(),
-        'apos_model': dml_obj,
-        'unfitted_apos_model': unfitted_apos_model
+        "coef": dml_obj.coef,
+        "coef_ext_smpls": dml_obj_ext_smpls.coef,
+        "coef_manual": res_manual["apos"],
+        "se": dml_obj.se,
+        "se_ext_smpls": dml_obj_ext_smpls.se,
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+        "n_treatment_levels": len(treatment_levels),
+        "n_rep": n_rep,
+        "ci": ci.to_numpy(),
+        "ci_ext_smpls": ci_ext_smpls.to_numpy(),
+        "ci_manual": ci_manual.to_numpy(),
+        "apos_model": dml_obj,
+        "unfitted_apos_model": unfitted_apos_model,
     }
 
     for bootstrap in boot_methods:
         np.random.seed(42)
-        boot_t_stat = boot_apos(res_manual['apo_scaled_score'], res_manual['all_se'], treatment_levels,
-                                all_smpls, n_rep, bootstrap, n_rep_boot)
+        boot_t_stat = boot_apos(
+            res_manual["apo_scaled_score"], res_manual["all_se"], treatment_levels, all_smpls, n_rep, bootstrap, n_rep_boot
+        )
 
         np.random.seed(42)
         dml_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
 
-        res_dict['boot_t_stat_' + bootstrap] = dml_obj.boot_t_stat
-        res_dict['boot_t_stat_' + bootstrap + '_manual'] = boot_t_stat
+        res_dict["boot_t_stat_" + bootstrap] = dml_obj.boot_t_stat
+        res_dict["boot_t_stat_" + bootstrap + "_manual"] = boot_t_stat
 
         ci = dml_obj.confint(joint=True, level=0.95)
-        ci_manual = confint_manual(res_manual['apos'], res_manual['se'], treatment_levels,
-                                   boot_t_stat=boot_t_stat, joint=True, level=0.95)
-        res_dict['boot_ci_' + bootstrap] = ci.to_numpy()
-        res_dict['boot_ci_' + bootstrap + '_manual'] = ci_manual.to_numpy()
+        ci_manual = confint_manual(
+            res_manual["apos"], res_manual["se"], treatment_levels, boot_t_stat=boot_t_stat, joint=True, level=0.95
+        )
+        res_dict["boot_ci_" + bootstrap] = ci.to_numpy()
+        res_dict["boot_ci_" + bootstrap + "_manual"] = ci_manual.to_numpy()
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_apos_coef(dml_apos_classifier_fixture):
-    assert np.allclose(dml_apos_classifier_fixture['coef'],
-                       dml_apos_classifier_fixture['coef_manual'],
-                       rtol=1e-9, atol=1e-9)
-    assert np.allclose(dml_apos_classifier_fixture['coef'],
-                       dml_apos_classifier_fixture['coef_ext_smpls'],
-                       rtol=1e-9, atol=1e-9)
+    assert np.allclose(dml_apos_classifier_fixture["coef"], dml_apos_classifier_fixture["coef_manual"], rtol=1e-9, atol=1e-9)
+    assert np.allclose(
+        dml_apos_classifier_fixture["coef"], dml_apos_classifier_fixture["coef_ext_smpls"], rtol=1e-9, atol=1e-9
+    )
 
 
 @pytest.mark.ci
 def test_dml_apos_se(dml_apos_classifier_fixture):
-    assert np.allclose(dml_apos_classifier_fixture['se'],
-                       dml_apos_classifier_fixture['se_manual'],
-                       rtol=1e-9, atol=1e-9)
-    assert np.allclose(dml_apos_classifier_fixture['se'],
-                       dml_apos_classifier_fixture['se_ext_smpls'],
-                       rtol=1e-9, atol=1e-9)
+    assert np.allclose(dml_apos_classifier_fixture["se"], dml_apos_classifier_fixture["se_manual"], rtol=1e-9, atol=1e-9)
+    assert np.allclose(dml_apos_classifier_fixture["se"], dml_apos_classifier_fixture["se_ext_smpls"], rtol=1e-9, atol=1e-9)
 
 
 @pytest.mark.ci
 def test_dml_apos_boot(dml_apos_classifier_fixture):
-    for bootstrap in dml_apos_classifier_fixture['boot_methods']:
-        assert np.allclose(dml_apos_classifier_fixture['boot_t_stat_' + bootstrap],
-                           dml_apos_classifier_fixture['boot_t_stat_' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_apos_classifier_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_apos_classifier_fixture["boot_t_stat_" + bootstrap],
+            dml_apos_classifier_fixture["boot_t_stat_" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
 
 
 @pytest.mark.ci
 def test_dml_apos_ci(dml_apos_classifier_fixture):
-    for bootstrap in dml_apos_classifier_fixture['boot_methods']:
-        assert np.allclose(dml_apos_classifier_fixture['ci'],
-                           dml_apos_classifier_fixture['ci_manual'],
-                           rtol=1e-9, atol=1e-4)
-        assert np.allclose(dml_apos_classifier_fixture['ci'],
-                           dml_apos_classifier_fixture['ci_ext_smpls'],
-                           rtol=1e-9, atol=1e-4)
-        assert np.allclose(dml_apos_classifier_fixture['boot_ci_' + bootstrap],
-                           dml_apos_classifier_fixture['boot_ci_' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_apos_classifier_fixture["boot_methods"]:
+        assert np.allclose(dml_apos_classifier_fixture["ci"], dml_apos_classifier_fixture["ci_manual"], rtol=1e-9, atol=1e-4)
+        assert np.allclose(
+            dml_apos_classifier_fixture["ci"], dml_apos_classifier_fixture["ci_ext_smpls"], rtol=1e-9, atol=1e-4
+        )
+        assert np.allclose(
+            dml_apos_classifier_fixture["boot_ci_" + bootstrap],
+            dml_apos_classifier_fixture["boot_ci_" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
 
 
 @pytest.mark.ci
 def test_doubleml_apos_return_types(dml_apos_classifier_fixture):
-    assert isinstance(dml_apos_classifier_fixture['apos_model'].__str__(), str)
-    assert isinstance(dml_apos_classifier_fixture['apos_model'].summary, pd.DataFrame)
+    assert isinstance(dml_apos_classifier_fixture["apos_model"].__str__(), str)
+    assert isinstance(dml_apos_classifier_fixture["apos_model"].summary, pd.DataFrame)
 
-    assert dml_apos_classifier_fixture['apos_model'].all_coef.shape == (
-        dml_apos_classifier_fixture['n_treatment_levels'],
-        dml_apos_classifier_fixture['n_rep']
+    assert dml_apos_classifier_fixture["apos_model"].all_coef.shape == (
+        dml_apos_classifier_fixture["n_treatment_levels"],
+        dml_apos_classifier_fixture["n_rep"],
     )
-    assert isinstance(dml_apos_classifier_fixture['unfitted_apos_model'].summary, pd.DataFrame)
+    assert isinstance(dml_apos_classifier_fixture["unfitted_apos_model"].summary, pd.DataFrame)
diff --git a/doubleml/irm/tests/test_apos_exceptions.py b/doubleml/irm/tests/test_apos_exceptions.py
index 0c20efe53..8e9a0b8a6 100644
--- a/doubleml/irm/tests/test_apos_exceptions.py
+++ b/doubleml/irm/tests/test_apos_exceptions.py
@@ -1,20 +1,18 @@
-import pytest
-import pandas as pd
 import numpy as np
+import pandas as pd
+import pytest
+from sklearn.linear_model import Lasso, LogisticRegression
 
 from doubleml import DoubleMLAPOS, DoubleMLData
-from doubleml.datasets import make_irm_data_discrete_treatments, make_iivm_data
-
-from sklearn.linear_model import Lasso, LogisticRegression
+from doubleml.datasets import make_iivm_data, make_irm_data_discrete_treatments
 
 n = 100
 data = make_irm_data_discrete_treatments(n_obs=n)
 df = pd.DataFrame(
-    np.column_stack((data['y'], data['d'], data['x'])),
-    columns=['y', 'd'] + ['x' + str(i) for i in range(data['x'].shape[1])]
+    np.column_stack((data["y"], data["d"], data["x"])), columns=["y", "d"] + ["x" + str(i) for i in range(data["x"].shape[1])]
 )
 
-dml_data = DoubleMLData(df, 'y', 'd')
+dml_data = DoubleMLData(df, "y", "d")
 
 ml_g = Lasso()
 ml_m = LogisticRegression()
@@ -22,17 +20,19 @@
 
 @pytest.mark.ci
 def test_apos_exception_data():
-    msg = 'The data must be of DoubleMLData or DoubleMLClusterData type.'
+    msg = "The data must be of DoubleMLData or DoubleMLClusterData type."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLAPOS(pd.DataFrame(), ml_g, ml_m, treatment_levels=0)
 
-    msg = 'The data must not contain instrumental variables.'
+    msg = "The data must not contain instrumental variables."
     with pytest.raises(ValueError, match=msg):
         dml_data_z = make_iivm_data()
         _ = DoubleMLAPOS(dml_data_z, ml_g, ml_m, treatment_levels=0)
 
-    msg = ('Invalid reference_levels. reference_levels has to be an iterable subset or '
-           'a single element of the unique treatment levels in the data.')
+    msg = (
+        "Invalid reference_levels. reference_levels has to be an iterable subset or "
+        "a single element of the unique treatment levels in the data."
+    )
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=[1.1])
     with pytest.raises(ValueError, match=msg):
@@ -43,8 +43,10 @@ def test_apos_exception_data():
 
 @pytest.mark.ci
 def test_apos_exception_learner():
-    msg = (r'The ml_g learner LogisticRegression\(\) was identified as classifier but the outcome variable is not'
-           ' binary with values 0 and 1.')
+    msg = (
+        r"The ml_g learner LogisticRegression\(\) was identified as classifier but the outcome variable is not"
+        " binary with values 0 and 1."
+    )
     with pytest.raises(ValueError, match=msg):
         ml_g_classifier = LogisticRegression()
         _ = DoubleMLAPOS(dml_data, ml_g_classifier, ml_m, treatment_levels=0)
@@ -52,27 +54,25 @@ def test_apos_exception_learner():
 
 @pytest.mark.ci
 def test_apos_exception_scores():
-    msg = 'Invalid score MAR. Valid score APO.'
+    msg = "Invalid score MAR. Valid score APO."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, score='MAR')
+        _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, score="MAR")
 
 
 @pytest.mark.ci
 def test_apos_exception_trimming_rule():
-    msg = 'Invalid trimming_rule discard. Valid trimming_rule truncate.'
+    msg = "Invalid trimming_rule discard. Valid trimming_rule truncate."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, trimming_rule='discard')
+        _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, trimming_rule="discard")
 
     # check the trimming_threshold exceptions
     msg = "trimming_threshold has to be a float. Object of type <class 'str'> passed."
     with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0,
-                         trimming_rule='truncate', trimming_threshold="0.1")
+        _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, trimming_rule="truncate", trimming_threshold="0.1")
 
-    msg = 'Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5.'
+    msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0,
-                         trimming_rule='truncate', trimming_threshold=0.6)
+        _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, trimming_rule="truncate", trimming_threshold=0.6)
 
 
 @pytest.mark.ci
@@ -86,25 +86,25 @@ def test_apos_exception_ipw_normalization():
 def test_apos_exception_properties_and_methods():
     # properties
     dml_obj = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, draw_sample_splitting=False)
-    msg = r'Sample splitting not specified. Draw samples via .draw_sample splitting\(\). External samples not implemented yet.'
+    msg = r"Sample splitting not specified. Draw samples via .draw_sample splitting\(\). External samples not implemented yet."
     with pytest.raises(ValueError, match=msg):
         _ = dml_obj.smpls
 
     # methods
     dml_obj = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0)
-    msg = r'Apply fit\(\) before confint\(\).'
+    msg = r"Apply fit\(\) before confint\(\)."
     with pytest.raises(ValueError, match=msg):
         dml_obj.confint()
-    msg = r'Apply fit\(\) before bootstrap\(\).'
+    msg = r"Apply fit\(\) before bootstrap\(\)."
     with pytest.raises(ValueError, match=msg):
         dml_obj.bootstrap()
-    msg = r'Apply fit\(\) before sensitivity_analysis\(\).'
+    msg = r"Apply fit\(\) before sensitivity_analysis\(\)."
     with pytest.raises(ValueError, match=msg):
         dml_obj.sensitivity_analysis()
-    msg = r'Apply fit\(\) before sensitivity_plot\(\).'
+    msg = r"Apply fit\(\) before sensitivity_plot\(\)."
     with pytest.raises(ValueError, match=msg):
         dml_obj.sensitivity_plot()
-    msg = r'Apply sensitivity_analysis\(\) before sensitivity_summary.'
+    msg = r"Apply sensitivity_analysis\(\) before sensitivity_summary."
     with pytest.raises(ValueError, match=msg):
         _ = dml_obj.sensitivity_summary
 
@@ -113,15 +113,12 @@ def test_apos_exception_properties_and_methods():
 def test_apos_exception_ext_pred():
     dml_obj = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0)
     external_predictions = [0, 1]
-    msg = r'external_predictions must be a dictionary. Object of type <class \'list\'> passed.'
+    msg = r"external_predictions must be a dictionary. Object of type <class \'list\'> passed."
     with pytest.raises(TypeError, match=msg):
         dml_obj.fit(external_predictions=external_predictions)
 
     # test with a level subset
-    external_predictions = {
-        0: "dummy",
-        1: "dummy"
-    }
+    external_predictions = {0: "dummy", 1: "dummy"}
     msg = (
         r"external_predictions must be a subset of all treatment levels\. "
         r"Expected keys: \{0\}\. "
@@ -133,24 +130,18 @@ def test_apos_exception_ext_pred():
     external_predictions = {
         0: "dummy",
     }
-    msg = r'external_predictions\[0\] must be a dictionary. Object of type <class \'str\'> passed.'
+    msg = r"external_predictions\[0\] must be a dictionary. Object of type <class \'str\'> passed."
     with pytest.raises(TypeError, match=msg):
         dml_obj.fit(external_predictions=external_predictions)
 
-    external_predictions = {
-        0: {"ml_g": "dummy"}
-    }
+    external_predictions = {0: {"ml_g": "dummy"}}
     msg = r"external_predictions\[0\] must be a subset of \{.*\}. Passed keys: \{'ml_g'\}\."
     with pytest.raises(ValueError, match=msg):
         dml_obj.fit(external_predictions=external_predictions)
 
     # test with all levels
     dml_obj = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=[0, 1, 2, 3])
-    external_predictions = {
-        0: "dummy",
-        1: "dummy",
-        4: "dummy"
-    }
+    external_predictions = {0: "dummy", 1: "dummy", 4: "dummy"}
     msg = (
         r"external_predictions must be a subset of all treatment levels\. "
         r"Expected keys: \{0, 1, 2, 3\}\. "
@@ -167,7 +158,7 @@ def test_causal_contrast_exceptions():
         dml_obj = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=[0, 1])
         dml_obj.causal_contrast(reference_levels=0)
 
-    msg = 'Only one treatment level. No causal contrast can be computed.'
+    msg = "Only one treatment level. No causal contrast can be computed."
     with pytest.raises(ValueError, match=msg):
         dml_obj = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=[0])
         dml_obj.fit()
@@ -175,8 +166,10 @@ def test_causal_contrast_exceptions():
 
     dml_obj = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=[0, 1])
     dml_obj.fit()
-    msg = ('Invalid reference_levels. reference_levels has to be an iterable subset of treatment_levels or '
-           'a single treatment level.')
+    msg = (
+        "Invalid reference_levels. reference_levels has to be an iterable subset of treatment_levels or "
+        "a single treatment level."
+    )
     with pytest.raises(ValueError, match=msg):
         dml_obj.causal_contrast(reference_levels=2)
     with pytest.raises(ValueError, match=msg):
diff --git a/doubleml/irm/tests/test_apos_external_predictions.py b/doubleml/irm/tests/test_apos_external_predictions.py
index b6a2c8eed..a6e2c9120 100644
--- a/doubleml/irm/tests/test_apos_external_predictions.py
+++ b/doubleml/irm/tests/test_apos_external_predictions.py
@@ -1,12 +1,13 @@
-import pytest
-import numpy as np
-import pandas as pd
 import math
 
+import numpy as np
+import pandas as pd
+import pytest
 from sklearn.linear_model import LinearRegression, LogisticRegression
+
 from doubleml import DoubleMLAPOS, DoubleMLData
 from doubleml.datasets import make_irm_data_discrete_treatments
-from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
+from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
 
 from ...tests._utils import draw_smpls
 
@@ -34,20 +35,18 @@ def set_ml_g_ext(request):
 @pytest.fixture(scope="module")
 def doubleml_apos_ext_fixture(n_rep, treatment_levels, set_ml_m_ext, set_ml_g_ext):
     score = "APO"
-    ext_predictions = {
-        treatment_level: {} for treatment_level in treatment_levels
-    }
+    ext_predictions = {treatment_level: {} for treatment_level in treatment_levels}
 
     np.random.seed(3141)
     n_obs = 500
     data_apo = make_irm_data_discrete_treatments(n_obs=n_obs)
     df_apo = pd.DataFrame(
-        np.column_stack((data_apo['y'], data_apo['d'], data_apo['x'])),
-        columns=['y', 'd'] + ['x' + str(i) for i in range(data_apo['x'].shape[1])]
+        np.column_stack((data_apo["y"], data_apo["d"], data_apo["x"])),
+        columns=["y", "d"] + ["x" + str(i) for i in range(data_apo["x"].shape[1])],
     )
 
-    dml_data = DoubleMLData(df_apo, 'y', 'd')
-    d = data_apo['d']
+    dml_data = DoubleMLData(df_apo, "y", "d")
+    d = data_apo["d"]
     all_smpls = draw_smpls(n_obs, n_folds=5, n_rep=n_rep, groups=d)
 
     kwargs = {
@@ -55,7 +54,7 @@ def doubleml_apos_ext_fixture(n_rep, treatment_levels, set_ml_m_ext, set_ml_g_ex
         "score": score,
         "treatment_levels": treatment_levels,
         "n_rep": n_rep,
-        "draw_sample_splitting": False
+        "draw_sample_splitting": False,
     }
 
     dml_obj = DoubleMLAPOS(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
@@ -90,7 +89,7 @@ def doubleml_apos_ext_fixture(n_rep, treatment_levels, set_ml_m_ext, set_ml_g_ex
         "coef_ext": dml_obj_ext.coef[0],
         "dml_obj": dml_obj,
         "dml_obj_ext": dml_obj_ext,
-        "treatment_levels": treatment_levels
+        "treatment_levels": treatment_levels,
     }
 
     return res_dict
@@ -99,10 +98,7 @@ def doubleml_apos_ext_fixture(n_rep, treatment_levels, set_ml_m_ext, set_ml_g_ex
 @pytest.mark.ci
 def test_doubleml_apos_ext_coef(doubleml_apos_ext_fixture):
     assert math.isclose(
-        doubleml_apos_ext_fixture["coef_normal"],
-        doubleml_apos_ext_fixture["coef_ext"],
-        rel_tol=1e-9,
-        abs_tol=1e-4
+        doubleml_apos_ext_fixture["coef_normal"], doubleml_apos_ext_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4
     )
 
 
@@ -114,5 +110,5 @@ def test_doubleml_apos_ext_pred_nuisance(doubleml_apos_ext_fixture):
                 doubleml_apos_ext_fixture["dml_obj"].modellist[i_level].nuisance_loss[nuisance_key],
                 doubleml_apos_ext_fixture["dml_obj_ext"].modellist[i_level].nuisance_loss[nuisance_key],
                 rtol=1e-9,
-                atol=1e-4
+                atol=1e-4,
             )
diff --git a/doubleml/irm/tests/test_apos_weighted_scores.py b/doubleml/irm/tests/test_apos_weighted_scores.py
index 3ab8db6af..ea612decf 100644
--- a/doubleml/irm/tests/test_apos_weighted_scores.py
+++ b/doubleml/irm/tests/test_apos_weighted_scores.py
@@ -1,83 +1,78 @@
-import pytest
 import numpy as np
 import pandas as pd
-
+import pytest
 from sklearn.base import clone
-from sklearn.linear_model import LogisticRegression, LinearRegression
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
 from doubleml.datasets import make_irm_data_discrete_treatments
 
 
-@pytest.fixture(scope='module',
-                params=[[LinearRegression(),
-                         LogisticRegression(solver='lbfgs', max_iter=250)],
-                        [RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
-                         RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42)]])
+@pytest.fixture(
+    scope="module",
+    params=[
+        [LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250)],
+        [
+            RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
+            RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42),
+        ],
+    ],
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['APO'])
+@pytest.fixture(scope="module", params=["APO"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 3])
+@pytest.fixture(scope="module", params=[1, 3])
 def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[False, True])
+@pytest.fixture(scope="module", params=[False, True])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.2, 0.15])
+@pytest.fixture(scope="module", params=[0.2, 0.15])
 def trimming_threshold(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[[0, 1, 2], [0]])
+@pytest.fixture(scope="module", params=[[0, 1, 2], [0]])
 def treatment_levels(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
-def weighted_apos_score_fixture(learner, score, n_rep, normalize_ipw, trimming_threshold,
-                                treatment_levels):
+@pytest.fixture(scope="module")
+def weighted_apos_score_fixture(learner, score, n_rep, normalize_ipw, trimming_threshold, treatment_levels):
     n_obs = 500
     n_folds = 2
 
     # collect data
     data = make_irm_data_discrete_treatments(n_obs=n_obs)
-    y = data['y']
-    x = data['x']
-    d = data['d']
-    df = pd.DataFrame(
-        np.column_stack((y, d, x)),
-        columns=['y', 'd'] + ['x' + str(i) for i in range(data['x'].shape[1])]
-    )
+    y = data["y"]
+    x = data["x"]
+    d = data["d"]
+    df = pd.DataFrame(np.column_stack((y, d, x)), columns=["y", "d"] + ["x" + str(i) for i in range(data["x"].shape[1])])
 
-    obj_dml_data = dml.DoubleMLData(df, 'y', 'd')
+    obj_dml_data = dml.DoubleMLData(df, "y", "d")
 
     input_args = {
-        'obj_dml_data': obj_dml_data,
-        'ml_g': clone(learner[0]),
-        'ml_m': clone(learner[1]),
-        'treatment_levels': treatment_levels,
-        'n_folds': n_folds,
-        'n_rep': n_rep,
-        'score': score,
-        'normalize_ipw': normalize_ipw,
-        'trimming_threshold': trimming_threshold,
-        'trimming_rule': 'truncate'
+        "obj_dml_data": obj_dml_data,
+        "ml_g": clone(learner[0]),
+        "ml_m": clone(learner[1]),
+        "treatment_levels": treatment_levels,
+        "n_folds": n_folds,
+        "n_rep": n_rep,
+        "score": score,
+        "normalize_ipw": normalize_ipw,
+        "trimming_threshold": trimming_threshold,
+        "trimming_rule": "truncate",
     }
 
     np.random.seed(42)
@@ -86,43 +81,38 @@ def weighted_apos_score_fixture(learner, score, n_rep, normalize_ipw, trimming_t
 
     np.random.seed(42)
     weights = 0.5 * np.ones_like(obj_dml_data.y)
-    dml_obj_weighted = dml.DoubleMLAPOS(draw_sample_splitting=False,
-                                        weights=weights,
-                                        **input_args)
+    dml_obj_weighted = dml.DoubleMLAPOS(draw_sample_splitting=False, weights=weights, **input_args)
     dml_obj_weighted.set_sample_splitting(all_smpls=dml_obj.smpls)
     dml_obj_weighted.fit()
 
     np.random.seed(42)
     weights_dict = {
-        'weights': weights,
-        'weights_bar': np.tile(weights[:, np.newaxis], (1, n_rep)),
+        "weights": weights,
+        "weights_bar": np.tile(weights[:, np.newaxis], (1, n_rep)),
     }
-    dml_obj_weighted_dict = dml.DoubleMLAPOS(draw_sample_splitting=False,
-                                             weights=weights_dict,
-                                             **input_args)
+    dml_obj_weighted_dict = dml.DoubleMLAPOS(draw_sample_splitting=False, weights=weights_dict, **input_args)
     dml_obj_weighted_dict.set_sample_splitting(all_smpls=dml_obj.smpls)
     dml_obj_weighted_dict.fit()
 
     result_dict = {
-        'coef': dml_obj.coef,
-        'weighted_coef': dml_obj_weighted.coef,
-        'weighted_coef_dict': dml_obj_weighted_dict.coef,
-        'default_weights': dml_obj.weights,
+        "coef": dml_obj.coef,
+        "weighted_coef": dml_obj_weighted.coef,
+        "weighted_coef_dict": dml_obj_weighted_dict.coef,
+        "default_weights": dml_obj.weights,
     }
     return result_dict
 
 
 @pytest.mark.ci
 def test_apos_weighted_coef(weighted_apos_score_fixture):
-    assert np.allclose(0.5 * weighted_apos_score_fixture['coef'],
-                       weighted_apos_score_fixture['weighted_coef'])
-    assert np.allclose(0.5 * weighted_apos_score_fixture['coef'],
-                       weighted_apos_score_fixture['weighted_coef_dict'])
+    assert np.allclose(0.5 * weighted_apos_score_fixture["coef"], weighted_apos_score_fixture["weighted_coef"])
+    assert np.allclose(0.5 * weighted_apos_score_fixture["coef"], weighted_apos_score_fixture["weighted_coef_dict"])
 
 
 @pytest.mark.ci
 def test_apos_default_weights(weighted_apos_score_fixture):
-    assert isinstance(weighted_apos_score_fixture['default_weights'], np.ndarray)
+    assert isinstance(weighted_apos_score_fixture["default_weights"], np.ndarray)
 
-    assert np.allclose(weighted_apos_score_fixture['default_weights'],
-                       np.ones_like(weighted_apos_score_fixture['default_weights']))
+    assert np.allclose(
+        weighted_apos_score_fixture["default_weights"], np.ones_like(weighted_apos_score_fixture["default_weights"])
+    )
diff --git a/doubleml/irm/tests/test_cvar.py b/doubleml/irm/tests/test_cvar.py
index 363f8b01b..0eee71c60 100644
--- a/doubleml/irm/tests/test_cvar.py
+++ b/doubleml/irm/tests/test_cvar.py
@@ -1,53 +1,53 @@
-import numpy as np
-import pytest
 import math
 
-import doubleml as dml
-
+import numpy as np
+import pytest
 from sklearn.base import clone
-from sklearn.linear_model import LogisticRegression, LinearRegression
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+import doubleml as dml
 
 from ...tests._utils import draw_smpls
 from ._utils_cvar_manual import fit_cvar
 
 
-@pytest.fixture(scope='module',
-                params=[0, 1])
+@pytest.fixture(scope="module", params=[0, 1])
 def treatment(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.25, 0.5, 0.75])
+@pytest.fixture(scope="module", params=[0.25, 0.5, 0.75])
 def quantile(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[[LinearRegression(),
-                         LogisticRegression(solver='lbfgs', max_iter=250)],
-                        [RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
-                         RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42)]])
+@pytest.fixture(
+    scope="module",
+    params=[
+        [LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250)],
+        [
+            RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
+            RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42),
+        ],
+    ],
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.01, 0.05])
+@pytest.fixture(scope="module", params=[0.01, 0.05])
 def trimming_threshold(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner,
-                     normalize_ipw, trimming_threshold):
+def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner, normalize_ipw, trimming_threshold):
     n_folds = 3
 
     # Set machine learning methods for m & g
@@ -62,44 +62,53 @@ def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner,
     all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d)
 
     np.random.seed(42)
-    dml_cvar_obj = dml.DoubleMLCVAR(obj_dml_data,
-                                    clone(ml_g), clone(ml_m),
-                                    treatment=treatment,
-                                    quantile=quantile,
-                                    n_folds=n_folds,
-                                    n_rep=1,
-                                    normalize_ipw=normalize_ipw,
-                                    trimming_threshold=trimming_threshold,
-                                    draw_sample_splitting=False)
+    dml_cvar_obj = dml.DoubleMLCVAR(
+        obj_dml_data,
+        clone(ml_g),
+        clone(ml_m),
+        treatment=treatment,
+        quantile=quantile,
+        n_folds=n_folds,
+        n_rep=1,
+        normalize_ipw=normalize_ipw,
+        trimming_threshold=trimming_threshold,
+        draw_sample_splitting=False,
+    )
 
     # synchronize the sample splitting
     dml_cvar_obj.set_sample_splitting(all_smpls=all_smpls)
     dml_cvar_obj.fit()
 
     np.random.seed(42)
-    res_manual = fit_cvar(y, x, d, quantile,
-                          clone(ml_g), clone(ml_m),
-                          all_smpls, treatment,
-                          normalize_ipw=normalize_ipw,
-                          n_rep=1, trimming_threshold=trimming_threshold)
-
-    res_dict = {'coef': dml_cvar_obj.coef.item(),
-                'coef_manual': res_manual['pq'],
-                'se': dml_cvar_obj.se.item(),
-                'se_manual': res_manual['se']}
+    res_manual = fit_cvar(
+        y,
+        x,
+        d,
+        quantile,
+        clone(ml_g),
+        clone(ml_m),
+        all_smpls,
+        treatment,
+        normalize_ipw=normalize_ipw,
+        n_rep=1,
+        trimming_threshold=trimming_threshold,
+    )
+
+    res_dict = {
+        "coef": dml_cvar_obj.coef.item(),
+        "coef_manual": res_manual["pq"],
+        "se": dml_cvar_obj.se.item(),
+        "se_manual": res_manual["se"],
+    }
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_cvar_coef(dml_cvar_fixture):
-    assert math.isclose(dml_cvar_fixture['coef'],
-                        dml_cvar_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_cvar_fixture["coef"], dml_cvar_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_cvar_se(dml_cvar_fixture):
-    assert math.isclose(dml_cvar_fixture['se'],
-                        dml_cvar_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_cvar_fixture["se"], dml_cvar_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
diff --git a/doubleml/irm/tests/test_cvar_tune.py b/doubleml/irm/tests/test_cvar_tune.py
index 69f6ad2d0..ade847691 100644
--- a/doubleml/irm/tests/test_cvar_tune.py
+++ b/doubleml/irm/tests/test_cvar_tune.py
@@ -1,7 +1,7 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 
@@ -11,53 +11,45 @@
 from ._utils_cvar_manual import fit_cvar, tune_nuisance_cvar
 
 
-@pytest.fixture(scope='module',
-                params=[0, 1])
+@pytest.fixture(scope="module", params=[0, 1])
 def treatment(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.25, 0.5, 0.75])
+@pytest.fixture(scope="module", params=[0.25, 0.5, 0.75])
 def quantile(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[RandomForestRegressor(max_depth=5, random_state=42)])
+@pytest.fixture(scope="module", params=[RandomForestRegressor(max_depth=5, random_state=42)])
 def learner_g(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[RandomForestClassifier(max_depth=5, random_state=42)])
+@pytest.fixture(scope="module", params=[RandomForestClassifier(max_depth=5, random_state=42)])
 def learner_m(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def tune_on_folds(request):
     return request.param
 
 
 def get_par_grid(learner):
     if learner.__class__ in [RandomForestRegressor, RandomForestClassifier]:
-        par_grid = {'n_estimators': [5, 10, 15, 20]}
+        par_grid = {"n_estimators": [5, 10, 15, 20]}
     return par_grid
 
 
-@pytest.fixture(scope='module')
-def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner_g, learner_m,
-                     normalize_ipw, tune_on_folds):
-    par_grid = {'ml_g': get_par_grid(learner_g),
-                'ml_m': get_par_grid(learner_m)}
+@pytest.fixture(scope="module")
+def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner_g, learner_m, normalize_ipw, tune_on_folds):
+    par_grid = {"ml_g": get_par_grid(learner_g), "ml_m": get_par_grid(learner_m)}
     n_folds_tune = 4
     n_folds = 2
 
@@ -70,22 +62,24 @@ def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner_g, le
     smpls = all_smpls[0]
 
     np.random.seed(42)
-    dml_cvar_obj = dml.DoubleMLCVAR(obj_dml_data,
-                                    clone(learner_g), clone(learner_m),
-                                    treatment=treatment,
-                                    quantile=quantile,
-                                    n_folds=n_folds,
-                                    n_rep=1,
-                                    normalize_ipw=normalize_ipw,
-                                    trimming_threshold=0.01,
-                                    draw_sample_splitting=False)
+    dml_cvar_obj = dml.DoubleMLCVAR(
+        obj_dml_data,
+        clone(learner_g),
+        clone(learner_m),
+        treatment=treatment,
+        quantile=quantile,
+        n_folds=n_folds,
+        n_rep=1,
+        normalize_ipw=normalize_ipw,
+        trimming_threshold=0.01,
+        draw_sample_splitting=False,
+    )
 
     # synchronize the sample splitting
     dml_cvar_obj.set_sample_splitting(all_smpls=all_smpls)
     # tune hyperparameters
     np.random.seed(42)
-    tune_res = dml_cvar_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune,
-                                 return_tune_res=False)
+    tune_res = dml_cvar_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune, return_tune_res=False)
     assert isinstance(tune_res, dml.DoubleMLCVAR)
 
     np.random.seed(42)
@@ -93,47 +87,70 @@ def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner_g, le
 
     np.random.seed(42)
     if tune_on_folds:
-        g_params, m_params = tune_nuisance_cvar(y, x, d,
-                                                clone(learner_g), clone(learner_m),
-                                                smpls, treatment, quantile,
-                                                n_folds_tune, par_grid['ml_g'], par_grid['ml_m'])
+        g_params, m_params = tune_nuisance_cvar(
+            y,
+            x,
+            d,
+            clone(learner_g),
+            clone(learner_m),
+            smpls,
+            treatment,
+            quantile,
+            n_folds_tune,
+            par_grid["ml_g"],
+            par_grid["ml_m"],
+        )
     else:
         xx = [(np.arange(len(y)), np.array([]))]
-        g_params, m_params = tune_nuisance_cvar(y, x, d,
-                                                clone(learner_g), clone(learner_m),
-                                                xx, treatment, quantile,
-                                                n_folds_tune, par_grid['ml_g'], par_grid['ml_m'])
+        g_params, m_params = tune_nuisance_cvar(
+            y,
+            x,
+            d,
+            clone(learner_g),
+            clone(learner_m),
+            xx,
+            treatment,
+            quantile,
+            n_folds_tune,
+            par_grid["ml_g"],
+            par_grid["ml_m"],
+        )
 
         g_params = g_params * n_folds
         m_params = m_params * n_folds
 
     np.random.seed(42)
-    res_manual = fit_cvar(y, x, d, quantile,
-                          learner_g=clone(learner_g),
-                          learner_m=clone(learner_m),
-                          all_smpls=all_smpls,
-                          treatment=treatment,
-                          n_rep=1, trimming_threshold=0.01,
-                          normalize_ipw=normalize_ipw,
-                          g_params=g_params, m_params=m_params)
-
-    res_dict = {'coef': dml_cvar_obj.coef.item(),
-                'coef_manual': res_manual['pq'],
-                'se': dml_cvar_obj.se.item(),
-                'se_manual': res_manual['se']}
+    res_manual = fit_cvar(
+        y,
+        x,
+        d,
+        quantile,
+        learner_g=clone(learner_g),
+        learner_m=clone(learner_m),
+        all_smpls=all_smpls,
+        treatment=treatment,
+        n_rep=1,
+        trimming_threshold=0.01,
+        normalize_ipw=normalize_ipw,
+        g_params=g_params,
+        m_params=m_params,
+    )
+
+    res_dict = {
+        "coef": dml_cvar_obj.coef.item(),
+        "coef_manual": res_manual["pq"],
+        "se": dml_cvar_obj.se.item(),
+        "se_manual": res_manual["se"],
+    }
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_cvar_coef(dml_cvar_fixture):
-    assert math.isclose(dml_cvar_fixture['coef'],
-                        dml_cvar_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_cvar_fixture["coef"], dml_cvar_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_cvar_se(dml_cvar_fixture):
-    assert math.isclose(dml_cvar_fixture['se'],
-                        dml_cvar_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_cvar_fixture["se"], dml_cvar_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
diff --git a/doubleml/irm/tests/test_iivm.py b/doubleml/irm/tests/test_iivm.py
index 153efe5d1..169f4175d 100644
--- a/doubleml/irm/tests/test_iivm.py
+++ b/doubleml/irm/tests/test_iivm.py
@@ -1,58 +1,56 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-
-from sklearn.linear_model import LogisticRegression, LinearRegression
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_iivm_manual import fit_iivm, boot_iivm
+from ._utils_iivm_manual import boot_iivm, fit_iivm
 
 
-@pytest.fixture(scope='module',
-                params=[[LinearRegression(),
-                         LogisticRegression(solver='lbfgs', max_iter=250)],
-                        [RandomForestRegressor(max_depth=2, n_estimators=10),
-                         RandomForestClassifier(max_depth=2, n_estimators=10)]])
+@pytest.fixture(
+    scope="module",
+    params=[
+        [LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250)],
+        [RandomForestRegressor(max_depth=2, n_estimators=10), RandomForestClassifier(max_depth=2, n_estimators=10)],
+    ],
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['LATE'])
+@pytest.fixture(scope="module", params=["LATE"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.01, 0.05])
+@pytest.fixture(scope="module", params=[0.01, 0.05])
 def trimming_threshold(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
 def dml_iivm_fixture(generate_data_iivm, learner, score, normalize_ipw, trimming_threshold):
-    boot_methods = ['normal']
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 491
 
     # collect data
     data = generate_data_iivm
-    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
-    y = data['y'].values
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
+    y = data["y"].values
     x = data.loc[:, x_cols].values
-    d = data['d'].values
-    z = data['z'].values
+    d = data["d"].values
+    z = data["z"].values
 
     # Set machine learning methods for m & g
     ml_g = clone(learner[0])
@@ -65,64 +63,89 @@ def dml_iivm_fixture(generate_data_iivm, learner, score, normalize_ipw, trimming
     all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=strata)
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, 'z')
-    dml_iivm_obj = dml.DoubleMLIIVM(obj_dml_data,
-                                    ml_g, ml_m, ml_r,
-                                    n_folds,
-                                    draw_sample_splitting=False,
-                                    normalize_ipw=normalize_ipw,
-                                    trimming_threshold=trimming_threshold)
+    obj_dml_data = dml.DoubleMLData(data, "y", ["d"], x_cols, "z")
+    dml_iivm_obj = dml.DoubleMLIIVM(
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        ml_r,
+        n_folds,
+        draw_sample_splitting=False,
+        normalize_ipw=normalize_ipw,
+        trimming_threshold=trimming_threshold,
+    )
     # synchronize the sample splitting
     dml_iivm_obj.set_sample_splitting(all_smpls=all_smpls)
     dml_iivm_obj.fit()
 
     np.random.seed(3141)
 
-    res_manual = fit_iivm(y, x, d, z,
-                          clone(learner[0]), clone(learner[1]), clone(learner[1]),
-                          all_smpls, score,
-                          normalize_ipw=normalize_ipw,
-                          trimming_threshold=trimming_threshold)
-
-    res_dict = {'coef': dml_iivm_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'se': dml_iivm_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_manual = fit_iivm(
+        y,
+        x,
+        d,
+        z,
+        clone(learner[0]),
+        clone(learner[1]),
+        clone(learner[1]),
+        all_smpls,
+        score,
+        normalize_ipw=normalize_ipw,
+        trimming_threshold=trimming_threshold,
+    )
+
+    res_dict = {
+        "coef": dml_iivm_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "se": dml_iivm_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_iivm(y, d, z, res_manual['thetas'], res_manual['ses'],
-                                res_manual['all_g_hat0'], res_manual['all_g_hat1'],
-                                res_manual['all_m_hat'], res_manual['all_r_hat0'], res_manual['all_r_hat1'],
-                                all_smpls, score, bootstrap, n_rep_boot,
-                                normalize_ipw=normalize_ipw)
+        boot_t_stat = boot_iivm(
+            y,
+            d,
+            z,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_g_hat0"],
+            res_manual["all_g_hat1"],
+            res_manual["all_m_hat"],
+            res_manual["all_r_hat0"],
+            res_manual["all_r_hat1"],
+            all_smpls,
+            score,
+            bootstrap,
+            n_rep_boot,
+            normalize_ipw=normalize_ipw,
+        )
 
         np.random.seed(3141)
         dml_iivm_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_iivm_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_iivm_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_iivm_coef(dml_iivm_fixture):
-    assert math.isclose(dml_iivm_fixture['coef'],
-                        dml_iivm_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_iivm_fixture["coef"], dml_iivm_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_iivm_se(dml_iivm_fixture):
-    assert math.isclose(dml_iivm_fixture['se'],
-                        dml_iivm_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_iivm_fixture["se"], dml_iivm_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_iivm_boot(dml_iivm_fixture):
-    for bootstrap in dml_iivm_fixture['boot_methods']:
-        assert np.allclose(dml_iivm_fixture['boot_t_stat' + bootstrap],
-                           dml_iivm_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_iivm_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_iivm_fixture["boot_t_stat" + bootstrap],
+            dml_iivm_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
diff --git a/doubleml/irm/tests/test_iivm_classifier.py b/doubleml/irm/tests/test_iivm_classifier.py
index 1d7a72652..983c34a77 100644
--- a/doubleml/irm/tests/test_iivm_classifier.py
+++ b/doubleml/irm/tests/test_iivm_classifier.py
@@ -1,48 +1,46 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-
-from sklearn.linear_model import LogisticRegression
 from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
 
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_iivm_manual import fit_iivm, boot_iivm
+from ._utils_iivm_manual import boot_iivm, fit_iivm
 
 
-@pytest.fixture(scope='module',
-                params=[[LogisticRegression(solver='lbfgs', max_iter=250),
-                         LogisticRegression(solver='lbfgs', max_iter=250)],
-                        [RandomForestClassifier(max_depth=2, n_estimators=10),
-                         RandomForestClassifier(max_depth=2, n_estimators=10)]])
+@pytest.fixture(
+    scope="module",
+    params=[
+        [LogisticRegression(solver="lbfgs", max_iter=250), LogisticRegression(solver="lbfgs", max_iter=250)],
+        [RandomForestClassifier(max_depth=2, n_estimators=10), RandomForestClassifier(max_depth=2, n_estimators=10)],
+    ],
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['LATE'])
+@pytest.fixture(scope="module", params=["LATE"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.01, 0.05])
+@pytest.fixture(scope="module", params=[0.01, 0.05])
 def trimming_threshold(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
 def dml_iivm_classifier_fixture(generate_data_iivm_binary, learner, score, normalize_ipw, trimming_threshold):
-    boot_methods = ['normal']
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 491
 
@@ -50,7 +48,7 @@ def dml_iivm_classifier_fixture(generate_data_iivm_binary, learner, score, norma
     (x, y, d, z) = generate_data_iivm_binary
 
     n_obs = len(y)
-    all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d+2*z)
+    all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d + 2 * z)
     # Set machine learning methods for m & g
     ml_g = clone(learner[0])
     ml_m = clone(learner[1])
@@ -58,63 +56,92 @@ def dml_iivm_classifier_fixture(generate_data_iivm_binary, learner, score, norma
 
     np.random.seed(3141)
     obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, z)
-    dml_iivm_obj = dml.DoubleMLIIVM(obj_dml_data,
-                                    ml_g, ml_m, ml_r,
-                                    n_folds,
-                                    normalize_ipw=normalize_ipw,
-                                    trimming_threshold=trimming_threshold,
-                                    draw_sample_splitting=False)
+    dml_iivm_obj = dml.DoubleMLIIVM(
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        ml_r,
+        n_folds,
+        normalize_ipw=normalize_ipw,
+        trimming_threshold=trimming_threshold,
+        draw_sample_splitting=False,
+    )
     # synchronize the sample splitting
     dml_iivm_obj.set_sample_splitting(all_smpls=all_smpls)
     np.random.seed(3141)
     dml_iivm_obj.fit()
 
     np.random.seed(3141)
-    res_manual = fit_iivm(y, x, d, z,
-                          clone(learner[0]), clone(learner[1]), clone(learner[1]),
-                          all_smpls, score,
-                          normalize_ipw=normalize_ipw,
-                          trimming_threshold=trimming_threshold)
-
-    res_dict = {'coef': dml_iivm_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'se': dml_iivm_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_manual = fit_iivm(
+        y,
+        x,
+        d,
+        z,
+        clone(learner[0]),
+        clone(learner[1]),
+        clone(learner[1]),
+        all_smpls,
+        score,
+        normalize_ipw=normalize_ipw,
+        trimming_threshold=trimming_threshold,
+    )
+
+    res_dict = {
+        "coef": dml_iivm_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "se": dml_iivm_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_iivm(y, d, z, res_manual['thetas'], res_manual['ses'],
-                                res_manual['all_g_hat0'], res_manual['all_g_hat1'],
-                                res_manual['all_m_hat'], res_manual['all_r_hat0'], res_manual['all_r_hat1'],
-                                all_smpls, score, bootstrap, n_rep_boot,
-                                normalize_ipw=normalize_ipw)
+        boot_t_stat = boot_iivm(
+            y,
+            d,
+            z,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_g_hat0"],
+            res_manual["all_g_hat1"],
+            res_manual["all_m_hat"],
+            res_manual["all_r_hat0"],
+            res_manual["all_r_hat1"],
+            all_smpls,
+            score,
+            bootstrap,
+            n_rep_boot,
+            normalize_ipw=normalize_ipw,
+        )
 
         np.random.seed(3141)
         dml_iivm_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_iivm_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_iivm_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_iivm_coef(dml_iivm_classifier_fixture):
-    assert math.isclose(dml_iivm_classifier_fixture['coef'],
-                        dml_iivm_classifier_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_iivm_classifier_fixture["coef"], dml_iivm_classifier_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4
+    )
 
 
 @pytest.mark.ci
 def test_dml_iivm_se(dml_iivm_classifier_fixture):
-    assert math.isclose(dml_iivm_classifier_fixture['se'],
-                        dml_iivm_classifier_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_iivm_classifier_fixture["se"], dml_iivm_classifier_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4
+    )
 
 
 @pytest.mark.ci
 def test_dml_iivm_boot(dml_iivm_classifier_fixture):
-    for bootstrap in dml_iivm_classifier_fixture['boot_methods']:
-        assert np.allclose(dml_iivm_classifier_fixture['boot_t_stat' + bootstrap],
-                           dml_iivm_classifier_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_iivm_classifier_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_iivm_classifier_fixture["boot_t_stat" + bootstrap],
+            dml_iivm_classifier_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
diff --git a/doubleml/irm/tests/test_iivm_external_predictions.py b/doubleml/irm/tests/test_iivm_external_predictions.py
index 8db50b859..7f4626e95 100644
--- a/doubleml/irm/tests/test_iivm_external_predictions.py
+++ b/doubleml/irm/tests/test_iivm_external_predictions.py
@@ -1,10 +1,12 @@
+import math
+
 import numpy as np
 import pytest
-import math
 from sklearn.linear_model import LinearRegression, LogisticRegression
-from doubleml import DoubleMLIIVM, DoubleMLData
+
+from doubleml import DoubleMLData, DoubleMLIIVM
 from doubleml.datasets import make_iivm_data
-from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
+from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
 
 
 @pytest.fixture(scope="module", params=[1, 3])
@@ -16,9 +18,7 @@ def n_rep(request):
 def adapted_doubleml_fixture(n_rep):
     ext_predictions = {"d": {}}
 
-    data = make_iivm_data(
-        n_obs=500, dim_x=20, theta=0.5, alpha_x=1.0, return_type="DataFrame"
-    )
+    data = make_iivm_data(n_obs=500, dim_x=20, theta=0.5, alpha_x=1.0, return_type="DataFrame")
 
     np.random.seed(3141)
 
@@ -46,9 +46,7 @@ def adapted_doubleml_fixture(n_rep):
     ext_predictions["d"]["ml_r0"] = dml_iivm.predictions["ml_r0"][:, :, 0]
     ext_predictions["d"]["ml_r1"] = dml_iivm.predictions["ml_r1"][:, :, 0]
 
-    dml_iivm_ext = DoubleMLIIVM(
-        ml_g=DMLDummyRegressor(), ml_m=DMLDummyClassifier(), ml_r=DMLDummyClassifier(), **kwargs
-    )
+    dml_iivm_ext = DoubleMLIIVM(ml_g=DMLDummyRegressor(), ml_m=DMLDummyClassifier(), ml_r=DMLDummyClassifier(), **kwargs)
 
     np.random.seed(3141)
     dml_iivm_ext.fit(external_predictions=ext_predictions)
diff --git a/doubleml/irm/tests/test_iivm_subgroups.py b/doubleml/irm/tests/test_iivm_subgroups.py
index 84bed1938..906ed8975 100644
--- a/doubleml/irm/tests/test_iivm_subgroups.py
+++ b/doubleml/irm/tests/test_iivm_subgroups.py
@@ -1,63 +1,63 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_iivm_manual import fit_iivm, boot_iivm
+from ._utils_iivm_manual import boot_iivm, fit_iivm
 
 
-@pytest.fixture(scope='module',
-                params=[[RandomForestRegressor(max_depth=2, n_estimators=10),
-                         RandomForestClassifier(max_depth=2, n_estimators=10)]])
+@pytest.fixture(
+    scope="module",
+    params=[[RandomForestRegressor(max_depth=2, n_estimators=10), RandomForestClassifier(max_depth=2, n_estimators=10)]],
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['LATE'])
+@pytest.fixture(scope="module", params=["LATE"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.01])
+@pytest.fixture(scope="module", params=[0.01])
 def trimming_threshold(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[{'always_takers': True, 'never_takers': True},
-                        {'always_takers': False, 'never_takers': True},
-                        {'always_takers': True, 'never_takers': False}])
+@pytest.fixture(
+    scope="module",
+    params=[
+        {"always_takers": True, "never_takers": True},
+        {"always_takers": False, "never_takers": True},
+        {"always_takers": True, "never_takers": False},
+    ],
+)
 def subgroups(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def dml_iivm_subgroups_fixture(generate_data_iivm, learner, score, normalize_ipw,
-                               trimming_threshold, subgroups):
-    boot_methods = ['normal']
+def dml_iivm_subgroups_fixture(generate_data_iivm, learner, score, normalize_ipw, trimming_threshold, subgroups):
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 491
 
     # collect data
     data = generate_data_iivm
-    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
 
-    n_obs = len(data['y'])
-    strata = data['d'] + 2 * data['z']
+    n_obs = len(data["y"])
+    strata = data["d"] + 2 * data["z"]
     all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=strata)
 
     # Set machine learning methods for m & g
@@ -66,83 +66,111 @@ def dml_iivm_subgroups_fixture(generate_data_iivm, learner, score, normalize_ipw
     ml_r = clone(learner[1])
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, 'z')
-    dml_iivm_obj = dml.DoubleMLIIVM(obj_dml_data,
-                                    ml_g, ml_m, ml_r,
-                                    n_folds,
-                                    subgroups=subgroups,
-                                    normalize_ipw=normalize_ipw,
-                                    trimming_threshold=trimming_threshold,
-                                    draw_sample_splitting=False)
+    obj_dml_data = dml.DoubleMLData(data, "y", ["d"], x_cols, "z")
+    dml_iivm_obj = dml.DoubleMLIIVM(
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        ml_r,
+        n_folds,
+        subgroups=subgroups,
+        normalize_ipw=normalize_ipw,
+        trimming_threshold=trimming_threshold,
+        draw_sample_splitting=False,
+    )
     # synchronize the sample splitting
     dml_iivm_obj.set_sample_splitting(all_smpls=all_smpls)
     dml_iivm_obj.fit(store_predictions=True)
 
     np.random.seed(3141)
-    y = data['y'].values
+    y = data["y"].values
     x = data.loc[:, x_cols].values
-    d = data['d'].values
-    z = data['z'].values
-
-    res_manual = fit_iivm(y, x, d, z,
-                          clone(learner[0]), clone(learner[1]), clone(learner[1]),
-                          all_smpls, score,
-                          normalize_ipw=normalize_ipw, trimming_threshold=trimming_threshold,
-                          always_takers=subgroups['always_takers'], never_takers=subgroups['never_takers'])
-
-    res_dict = {'coef': dml_iivm_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'se': dml_iivm_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods,
-                'always_takers': subgroups['always_takers'],
-                'never_takers': subgroups['never_takers'],
-                'rhat0': dml_iivm_obj.predictions['ml_r0'],
-                'rhat1': dml_iivm_obj.predictions['ml_r1'],
-                'z': z
-                }
+    d = data["d"].values
+    z = data["z"].values
+
+    res_manual = fit_iivm(
+        y,
+        x,
+        d,
+        z,
+        clone(learner[0]),
+        clone(learner[1]),
+        clone(learner[1]),
+        all_smpls,
+        score,
+        normalize_ipw=normalize_ipw,
+        trimming_threshold=trimming_threshold,
+        always_takers=subgroups["always_takers"],
+        never_takers=subgroups["never_takers"],
+    )
+
+    res_dict = {
+        "coef": dml_iivm_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "se": dml_iivm_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+        "always_takers": subgroups["always_takers"],
+        "never_takers": subgroups["never_takers"],
+        "rhat0": dml_iivm_obj.predictions["ml_r0"],
+        "rhat1": dml_iivm_obj.predictions["ml_r1"],
+        "z": z,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_iivm(y, d, z, res_manual['thetas'], res_manual['ses'],
-                                res_manual['all_g_hat0'], res_manual['all_g_hat1'],
-                                res_manual['all_m_hat'], res_manual['all_r_hat0'], res_manual['all_r_hat1'],
-                                all_smpls, score, bootstrap, n_rep_boot,
-                                normalize_ipw=normalize_ipw)
+        boot_t_stat = boot_iivm(
+            y,
+            d,
+            z,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_g_hat0"],
+            res_manual["all_g_hat1"],
+            res_manual["all_m_hat"],
+            res_manual["all_r_hat0"],
+            res_manual["all_r_hat1"],
+            all_smpls,
+            score,
+            bootstrap,
+            n_rep_boot,
+            normalize_ipw=normalize_ipw,
+        )
 
         np.random.seed(3141)
         dml_iivm_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_iivm_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_iivm_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_iivm_subgroups_coef(dml_iivm_subgroups_fixture):
-    assert math.isclose(dml_iivm_subgroups_fixture['coef'],
-                        dml_iivm_subgroups_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_iivm_subgroups_fixture["coef"], dml_iivm_subgroups_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4
+    )
 
 
 @pytest.mark.ci
 def test_dml_iivm_subgroups_se(dml_iivm_subgroups_fixture):
-    assert math.isclose(dml_iivm_subgroups_fixture['se'],
-                        dml_iivm_subgroups_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_iivm_subgroups_fixture["se"], dml_iivm_subgroups_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_iivm_subgroups_boot(dml_iivm_subgroups_fixture):
-    for bootstrap in dml_iivm_subgroups_fixture['boot_methods']:
-        assert np.allclose(dml_iivm_subgroups_fixture['boot_t_stat' + bootstrap],
-                           dml_iivm_subgroups_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_iivm_subgroups_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_iivm_subgroups_fixture["boot_t_stat" + bootstrap],
+            dml_iivm_subgroups_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
 
 
 @pytest.mark.ci
 def test_dml_iivm_subgroups(dml_iivm_subgroups_fixture):
-    if not dml_iivm_subgroups_fixture['always_takers']:
-        assert np.all(dml_iivm_subgroups_fixture['rhat0'] == 0)
-    if not dml_iivm_subgroups_fixture['never_takers']:
-        assert np.all(dml_iivm_subgroups_fixture['rhat1'] == 1)
+    if not dml_iivm_subgroups_fixture["always_takers"]:
+        assert np.all(dml_iivm_subgroups_fixture["rhat0"] == 0)
+    if not dml_iivm_subgroups_fixture["never_takers"]:
+        assert np.all(dml_iivm_subgroups_fixture["rhat1"] == 1)
diff --git a/doubleml/irm/tests/test_iivm_tune.py b/doubleml/irm/tests/test_iivm_tune.py
index 6ce4ef0f9..aadbc31fd 100644
--- a/doubleml/irm/tests/test_iivm_tune.py
+++ b/doubleml/irm/tests/test_iivm_tune.py
@@ -1,88 +1,78 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-
-from sklearn.linear_model import LogisticRegression
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import LogisticRegression
 
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_iivm_manual import fit_iivm, boot_iivm, tune_nuisance_iivm
+from ._utils_iivm_manual import boot_iivm, fit_iivm, tune_nuisance_iivm
 
 
-@pytest.fixture(scope='module',
-                params=[RandomForestRegressor()])
+@pytest.fixture(scope="module", params=[RandomForestRegressor()])
 def learner_g(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[RandomForestClassifier()])
+@pytest.fixture(scope="module", params=[RandomForestClassifier()])
 def learner_m(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[LogisticRegression()])
+@pytest.fixture(scope="module", params=[LogisticRegression()])
 def learner_r(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['LATE'])
+@pytest.fixture(scope="module", params=["LATE"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[{'always_takers': True, 'never_takers': True},
-                        {'always_takers': False, 'never_takers': False}])
+@pytest.fixture(
+    scope="module", params=[{"always_takers": True, "never_takers": True}, {"always_takers": False, "never_takers": False}]
+)
 def subgroups(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def tune_on_folds(request):
     return request.param
 
 
 def get_par_grid(learner):
     if learner.__class__ in [RandomForestRegressor, RandomForestClassifier]:
-        par_grid = {'n_estimators': [5, 10, 20]}
+        par_grid = {"n_estimators": [5, 10, 20]}
     else:
         assert learner.__class__ in [LogisticRegression]
-        par_grid = {'C': np.logspace(-4, 2, 10)}
+        par_grid = {"C": np.logspace(-4, 2, 10)}
     return par_grid
 
 
 @pytest.fixture(scope="module")
-def dml_iivm_fixture(generate_data_iivm, learner_g, learner_m, learner_r, score, normalize_ipw, subgroups,
-                     tune_on_folds):
-    par_grid = {'ml_g': get_par_grid(learner_g),
-                'ml_m': get_par_grid(learner_m),
-                'ml_r': get_par_grid(learner_r)}
+def dml_iivm_fixture(generate_data_iivm, learner_g, learner_m, learner_r, score, normalize_ipw, subgroups, tune_on_folds):
+    par_grid = {"ml_g": get_par_grid(learner_g), "ml_m": get_par_grid(learner_m), "ml_r": get_par_grid(learner_r)}
     n_folds_tune = 4
 
-    boot_methods = ['normal']
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 491
 
     # collect data
     data = generate_data_iivm
-    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
 
-    n_obs = len(data['y'])
-    strata = data['d'] + 2 * data['z']
+    n_obs = len(data["y"])
+    strata = data["d"] + 2 * data["z"]
     all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=strata)
 
     # Set machine learning methods for m, g & r
@@ -91,107 +81,148 @@ def dml_iivm_fixture(generate_data_iivm, learner_g, learner_m, learner_r, score,
     ml_r = clone(learner_r)
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, 'z')
-    dml_iivm_obj = dml.DoubleMLIIVM(obj_dml_data,
-                                    ml_g, ml_m, ml_r,
-                                    n_folds,
-                                    subgroups=subgroups,
-                                    normalize_ipw=normalize_ipw,
-                                    draw_sample_splitting=False)
+    obj_dml_data = dml.DoubleMLData(data, "y", ["d"], x_cols, "z")
+    dml_iivm_obj = dml.DoubleMLIIVM(
+        obj_dml_data, ml_g, ml_m, ml_r, n_folds, subgroups=subgroups, normalize_ipw=normalize_ipw, draw_sample_splitting=False
+    )
     # synchronize the sample splitting
     dml_iivm_obj.set_sample_splitting(all_smpls=all_smpls)
     # tune hyperparameters
-    tune_res = dml_iivm_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune,
-                                 return_tune_res=True)
+    tune_res = dml_iivm_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune, return_tune_res=True)
     assert isinstance(tune_res, list)
 
     dml_iivm_obj.fit()
 
     np.random.seed(3141)
-    y = data['y'].values
+    y = data["y"].values
     x = data.loc[:, x_cols].values
-    d = data['d'].values
-    z = data['z'].values
+    d = data["d"].values
+    z = data["z"].values
     smpls = all_smpls[0]
 
     if tune_on_folds:
-        g0_params, g1_params, m_params,  r0_params, r1_params = \
-            tune_nuisance_iivm(y, x, d, z,
-                               clone(learner_g), clone(learner_m), clone(learner_r), smpls,
-                               n_folds_tune,
-                               par_grid['ml_g'], par_grid['ml_m'], par_grid['ml_r'],
-                               always_takers=subgroups['always_takers'], never_takers=subgroups['never_takers'])
+        g0_params, g1_params, m_params, r0_params, r1_params = tune_nuisance_iivm(
+            y,
+            x,
+            d,
+            z,
+            clone(learner_g),
+            clone(learner_m),
+            clone(learner_r),
+            smpls,
+            n_folds_tune,
+            par_grid["ml_g"],
+            par_grid["ml_m"],
+            par_grid["ml_r"],
+            always_takers=subgroups["always_takers"],
+            never_takers=subgroups["never_takers"],
+        )
     else:
         xx = [(np.arange(data.shape[0]), np.array([]))]
-        g0_params, g1_params, m_params,  r0_params, r1_params = \
-            tune_nuisance_iivm(y, x, d, z,
-                               clone(learner_g), clone(learner_m), clone(learner_r), xx,
-                               n_folds_tune,
-                               par_grid['ml_g'], par_grid['ml_m'], par_grid['ml_r'],
-                               always_takers=subgroups['always_takers'], never_takers=subgroups['never_takers'])
+        g0_params, g1_params, m_params, r0_params, r1_params = tune_nuisance_iivm(
+            y,
+            x,
+            d,
+            z,
+            clone(learner_g),
+            clone(learner_m),
+            clone(learner_r),
+            xx,
+            n_folds_tune,
+            par_grid["ml_g"],
+            par_grid["ml_m"],
+            par_grid["ml_r"],
+            always_takers=subgroups["always_takers"],
+            never_takers=subgroups["never_takers"],
+        )
         g0_params = g0_params * n_folds
         g1_params = g1_params * n_folds
         m_params = m_params * n_folds
-        if subgroups['always_takers']:
+        if subgroups["always_takers"]:
             r0_params = r0_params * n_folds
         else:
             r0_params = r0_params
-        if subgroups['never_takers']:
+        if subgroups["never_takers"]:
             r1_params = r1_params * n_folds
         else:
             r1_params = r1_params
 
-    res_manual = fit_iivm(y, x, d, z,
-                          clone(learner_g), clone(learner_m), clone(learner_r),
-                          all_smpls, score,
-                          g0_params=g0_params, g1_params=g1_params,
-                          m_params=m_params, r0_params=r0_params, r1_params=r1_params,
-                          normalize_ipw=normalize_ipw,
-                          always_takers=subgroups['always_takers'], never_takers=subgroups['never_takers'])
-
-    res_dict = {'coef': dml_iivm_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'se': dml_iivm_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_manual = fit_iivm(
+        y,
+        x,
+        d,
+        z,
+        clone(learner_g),
+        clone(learner_m),
+        clone(learner_r),
+        all_smpls,
+        score,
+        g0_params=g0_params,
+        g1_params=g1_params,
+        m_params=m_params,
+        r0_params=r0_params,
+        r1_params=r1_params,
+        normalize_ipw=normalize_ipw,
+        always_takers=subgroups["always_takers"],
+        never_takers=subgroups["never_takers"],
+    )
+
+    res_dict = {
+        "coef": dml_iivm_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "se": dml_iivm_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_iivm(y, d, z, res_manual['thetas'], res_manual['ses'],
-                                res_manual['all_g_hat0'], res_manual['all_g_hat1'],
-                                res_manual['all_m_hat'], res_manual['all_r_hat0'], res_manual['all_r_hat1'],
-                                all_smpls, score, bootstrap, n_rep_boot,
-                                normalize_ipw=normalize_ipw)
+        boot_t_stat = boot_iivm(
+            y,
+            d,
+            z,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_g_hat0"],
+            res_manual["all_g_hat1"],
+            res_manual["all_m_hat"],
+            res_manual["all_r_hat0"],
+            res_manual["all_r_hat1"],
+            all_smpls,
+            score,
+            bootstrap,
+            n_rep_boot,
+            normalize_ipw=normalize_ipw,
+        )
 
         np.random.seed(3141)
         dml_iivm_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_iivm_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_iivm_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 @pytest.mark.ci
 @pytest.mark.filterwarnings(
-    r'ignore:Propensity predictions from learner RandomForestClassifier\(\) for ml_m are close to zero or one '
-    r'\(eps=1e-12\).:UserWarning'
+    r"ignore:Propensity predictions from learner RandomForestClassifier\(\) for ml_m are close to zero or one "
+    r"\(eps=1e-12\).:UserWarning"
 )
 def test_dml_iivm_coef(dml_iivm_fixture):
-    assert math.isclose(dml_iivm_fixture['coef'],
-                        dml_iivm_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_iivm_fixture["coef"], dml_iivm_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_iivm_se(dml_iivm_fixture):
-    assert math.isclose(dml_iivm_fixture['se'],
-                        dml_iivm_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_iivm_fixture["se"], dml_iivm_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_iivm_boot(dml_iivm_fixture):
-    for bootstrap in dml_iivm_fixture['boot_methods']:
-        assert np.allclose(dml_iivm_fixture['boot_t_stat' + bootstrap],
-                           dml_iivm_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_iivm_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_iivm_fixture["boot_t_stat" + bootstrap],
+            dml_iivm_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
diff --git a/doubleml/irm/tests/test_irm.py b/doubleml/irm/tests/test_irm.py
index 17fd7e6a4..0a9246264 100644
--- a/doubleml/irm/tests/test_irm.py
+++ b/doubleml/irm/tests/test_irm.py
@@ -1,51 +1,52 @@
+import math
+
 import numpy as np
 import pandas as pd
 import pytest
-import math
-
 from sklearn.base import clone
-
-from sklearn.linear_model import LogisticRegression, LinearRegression
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
 from doubleml.datasets import make_irm_data
 from doubleml.utils.resampling import DoubleMLResampling
 
 from ...tests._utils import draw_smpls
-from ._utils_irm_manual import fit_irm, boot_irm, fit_sensitivity_elements_irm
-
-
-@pytest.fixture(scope='module',
-                params=[[LinearRegression(),
-                         LogisticRegression(solver='lbfgs', max_iter=250)],
-                        [RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
-                         RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42)]])
+from ._utils_irm_manual import boot_irm, fit_irm, fit_sensitivity_elements_irm
+
+
+@pytest.fixture(
+    scope="module",
+    params=[
+        [LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250)],
+        [
+            RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
+            RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42),
+        ],
+    ],
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['ATE', 'ATTE'])
+@pytest.fixture(scope="module", params=["ATE", "ATTE"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[False, True])
+@pytest.fixture(scope="module", params=[False, True])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.2, 0.15])
+@pytest.fixture(scope="module", params=[0.2, 0.15])
 def trimming_threshold(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_irm_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_threshold):
-    boot_methods = ['normal']
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
 
@@ -62,133 +63,155 @@ def dml_irm_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_t
     obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d)
 
     np.random.seed(3141)
-    dml_irm_obj = dml.DoubleMLIRM(obj_dml_data,
-                                  ml_g, ml_m,
-                                  n_folds,
-                                  score=score,
-                                  normalize_ipw=normalize_ipw,
-                                  draw_sample_splitting=False,
-                                  trimming_threshold=trimming_threshold)
+    dml_irm_obj = dml.DoubleMLIRM(
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        n_folds,
+        score=score,
+        normalize_ipw=normalize_ipw,
+        draw_sample_splitting=False,
+        trimming_threshold=trimming_threshold,
+    )
 
     # synchronize the sample splitting
     dml_irm_obj.set_sample_splitting(all_smpls=all_smpls)
     dml_irm_obj.fit()
 
     np.random.seed(3141)
-    res_manual = fit_irm(y, x, d,
-                         clone(learner[0]), clone(learner[1]),
-                         all_smpls, score,
-                         normalize_ipw=normalize_ipw,
-                         trimming_threshold=trimming_threshold)
+    res_manual = fit_irm(
+        y,
+        x,
+        d,
+        clone(learner[0]),
+        clone(learner[1]),
+        all_smpls,
+        score,
+        normalize_ipw=normalize_ipw,
+        trimming_threshold=trimming_threshold,
+    )
 
     np.random.seed(3141)
     # test with external nuisance predictions
-    dml_irm_obj_ext = dml.DoubleMLIRM(obj_dml_data,
-                                      ml_g, ml_m,
-                                      n_folds,
-                                      score=score,
-                                      normalize_ipw=normalize_ipw,
-                                      draw_sample_splitting=False,
-                                      trimming_threshold=trimming_threshold)
+    dml_irm_obj_ext = dml.DoubleMLIRM(
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        n_folds,
+        score=score,
+        normalize_ipw=normalize_ipw,
+        draw_sample_splitting=False,
+        trimming_threshold=trimming_threshold,
+    )
 
     # synchronize the sample splitting
     dml_irm_obj_ext.set_sample_splitting(all_smpls=all_smpls)
 
-    prediction_dict = {'d': {'ml_g0': dml_irm_obj.predictions['ml_g0'].reshape(-1, 1),
-                             'ml_g1': dml_irm_obj.predictions['ml_g1'].reshape(-1, 1),
-                             'ml_m': dml_irm_obj.predictions['ml_m'].reshape(-1, 1)}}
+    prediction_dict = {
+        "d": {
+            "ml_g0": dml_irm_obj.predictions["ml_g0"].reshape(-1, 1),
+            "ml_g1": dml_irm_obj.predictions["ml_g1"].reshape(-1, 1),
+            "ml_m": dml_irm_obj.predictions["ml_m"].reshape(-1, 1),
+        }
+    }
     dml_irm_obj_ext.fit(external_predictions=prediction_dict)
 
-    res_dict = {'coef': dml_irm_obj.coef,
-                'coef_manual': res_manual['theta'],
-                'coef_ext': dml_irm_obj_ext.coef,
-                'se': dml_irm_obj.se,
-                'se_manual': res_manual['se'],
-                'se_ext': dml_irm_obj_ext.se,
-                'boot_methods': boot_methods}
+    res_dict = {
+        "coef": dml_irm_obj.coef,
+        "coef_manual": res_manual["theta"],
+        "coef_ext": dml_irm_obj_ext.coef,
+        "se": dml_irm_obj.se,
+        "se_manual": res_manual["se"],
+        "se_ext": dml_irm_obj_ext.se,
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_irm(y, d, res_manual['thetas'], res_manual['ses'],
-                               res_manual['all_g_hat0'], res_manual['all_g_hat1'],
-                               res_manual['all_m_hat'], res_manual['all_p_hat'],
-                               all_smpls, score, bootstrap, n_rep_boot,
-                               normalize_ipw=normalize_ipw)
+        boot_t_stat = boot_irm(
+            y,
+            d,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_g_hat0"],
+            res_manual["all_g_hat1"],
+            res_manual["all_m_hat"],
+            res_manual["all_p_hat"],
+            all_smpls,
+            score,
+            bootstrap,
+            n_rep_boot,
+            normalize_ipw=normalize_ipw,
+        )
 
         np.random.seed(3141)
         dml_irm_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
         np.random.seed(3141)
         dml_irm_obj_ext.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_irm_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
-        res_dict['boot_t_stat' + bootstrap + '_ext'] = dml_irm_obj_ext.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap] = dml_irm_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap + "_ext"] = dml_irm_obj_ext.boot_t_stat
 
     # sensitivity tests
-    res_dict['sensitivity_elements'] = dml_irm_obj.sensitivity_elements
-    res_dict['sensitivity_elements_manual'] = fit_sensitivity_elements_irm(y, d,
-                                                                           all_coef=dml_irm_obj.all_coef,
-                                                                           predictions=dml_irm_obj.predictions,
-                                                                           score=score,
-                                                                           n_rep=1)
+    res_dict["sensitivity_elements"] = dml_irm_obj.sensitivity_elements
+    res_dict["sensitivity_elements_manual"] = fit_sensitivity_elements_irm(
+        y, d, all_coef=dml_irm_obj.all_coef, predictions=dml_irm_obj.predictions, score=score, n_rep=1
+    )
 
     # check if sensitivity score with rho=0 gives equal asymptotic standard deviation
     dml_irm_obj.sensitivity_analysis(rho=0.0)
-    res_dict['sensitivity_ses'] = dml_irm_obj.sensitivity_params['se']
+    res_dict["sensitivity_ses"] = dml_irm_obj.sensitivity_params["se"]
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_irm_coef(dml_irm_fixture):
-    assert math.isclose(dml_irm_fixture['coef'][0],
-                        dml_irm_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
-    assert math.isclose(dml_irm_fixture['coef'][0],
-                        dml_irm_fixture['coef_ext'][0],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_irm_fixture["coef"][0], dml_irm_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_irm_fixture["coef"][0], dml_irm_fixture["coef_ext"][0], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_irm_se(dml_irm_fixture):
-    assert math.isclose(dml_irm_fixture['se'][0],
-                        dml_irm_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
-    assert math.isclose(dml_irm_fixture['se'][0],
-                        dml_irm_fixture['se_ext'][0],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_irm_fixture["se"][0], dml_irm_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_irm_fixture["se"][0], dml_irm_fixture["se_ext"][0], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_irm_boot(dml_irm_fixture):
-    for bootstrap in dml_irm_fixture['boot_methods']:
-        assert np.allclose(dml_irm_fixture['boot_t_stat' + bootstrap],
-                           dml_irm_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
-        assert np.allclose(dml_irm_fixture['boot_t_stat' + bootstrap],
-                           dml_irm_fixture['boot_t_stat' + bootstrap + '_ext'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_irm_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_irm_fixture["boot_t_stat" + bootstrap],
+            dml_irm_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
+        assert np.allclose(
+            dml_irm_fixture["boot_t_stat" + bootstrap],
+            dml_irm_fixture["boot_t_stat" + bootstrap + "_ext"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
 
 
 @pytest.mark.ci
 def test_dml_irm_sensitivity(dml_irm_fixture):
-    sensitivity_element_names = ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2']
+    sensitivity_element_names = ["sigma2", "nu2", "psi_sigma2", "psi_nu2"]
     for sensitivity_element in sensitivity_element_names:
-        assert np.allclose(dml_irm_fixture['sensitivity_elements'][sensitivity_element],
-                           dml_irm_fixture['sensitivity_elements_manual'][sensitivity_element],
-                           rtol=1e-9, atol=1e-4)
+        assert np.allclose(
+            dml_irm_fixture["sensitivity_elements"][sensitivity_element],
+            dml_irm_fixture["sensitivity_elements_manual"][sensitivity_element],
+            rtol=1e-9,
+            atol=1e-4,
+        )
 
 
 @pytest.mark.ci
 def test_dml_irm_sensitivity_rho0(dml_irm_fixture):
-    assert np.allclose(dml_irm_fixture['se'],
-                       dml_irm_fixture['sensitivity_ses']['lower'],
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_irm_fixture['se'],
-                       dml_irm_fixture['sensitivity_ses']['upper'],
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_irm_fixture["se"], dml_irm_fixture["sensitivity_ses"]["lower"], rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_irm_fixture["se"], dml_irm_fixture["sensitivity_ses"]["upper"], rtol=1e-9, atol=1e-4)
 
 
-@pytest.fixture(scope='module',
-                params=["nonrobust", "HC0", "HC1", "HC2", "HC3"])
+@pytest.fixture(scope="module", params=["nonrobust", "HC0", "HC1", "HC2", "HC3"])
 def cov_type(request):
     return request.param
 
@@ -207,11 +230,7 @@ def test_dml_irm_cate_gate(cov_type):
     ml_g = RandomForestRegressor(n_estimators=10)
     ml_m = RandomForestClassifier(n_estimators=10)
 
-    dml_irm_obj = dml.DoubleMLIRM(obj_dml_data,
-                                  ml_m=ml_m,
-                                  ml_g=ml_g,
-                                  trimming_threshold=0.05,
-                                  n_folds=5)
+    dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_m=ml_m, ml_g=ml_g, trimming_threshold=0.05, n_folds=5)
 
     dml_irm_obj.fit()
     # create a random basis
@@ -221,10 +240,10 @@ def test_dml_irm_cate_gate(cov_type):
     assert isinstance(cate.confint(), pd.DataFrame)
     assert cate.blp_model.cov_type == cov_type
 
-    groups_1 = pd.DataFrame(np.column_stack([obj_dml_data.data['X1'] <= 0,
-                                             obj_dml_data.data['X1'] > 0.2]),
-                            columns=['Group 1', 'Group 2'])
-    msg = ('At least one group effect is estimated with less than 6 observations.')
+    groups_1 = pd.DataFrame(
+        np.column_stack([obj_dml_data.data["X1"] <= 0, obj_dml_data.data["X1"] > 0.2]), columns=["Group 1", "Group 2"]
+    )
+    msg = "At least one group effect is estimated with less than 6 observations."
     with pytest.warns(UserWarning, match=msg):
         gate_1 = dml_irm_obj.gate(groups_1, cov_type=cov_type)
     assert isinstance(gate_1, dml.utils.blp.DoubleMLBLP)
@@ -234,7 +253,7 @@ def test_dml_irm_cate_gate(cov_type):
 
     np.random.seed(42)
     groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n))
-    msg = ('At least one group effect is estimated with less than 6 observations.')
+    msg = "At least one group effect is estimated with less than 6 observations."
     with pytest.warns(UserWarning, match=msg):
         gate_2 = dml_irm_obj.gate(groups_2, cov_type=cov_type)
     assert isinstance(gate_2, dml.utils.blp.DoubleMLBLP)
@@ -243,63 +262,40 @@ def test_dml_irm_cate_gate(cov_type):
     assert gate_2.blp_model.cov_type == cov_type
 
 
-@pytest.fixture(scope='module',
-                params=[1, 3])
+@pytest.fixture(scope="module", params=[1, 3])
 def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_irm_weights_fixture(n_rep):
     n = 10000
     # collect data
     np.random.seed(42)
     obj_dml_data = make_irm_data(n_obs=n, dim_x=2)
-    kwargs = {
-        "trimming_threshold": 0.05,
-        "n_folds": 5,
-        "n_rep": n_rep,
-        "draw_sample_splitting": False
-    }
+    kwargs = {"trimming_threshold": 0.05, "n_folds": 5, "n_rep": n_rep, "draw_sample_splitting": False}
 
-    smpls = DoubleMLResampling(
-        n_folds=5,
-        n_rep=n_rep,
-        n_obs=n,
-        stratify=obj_dml_data.d).split_samples()
+    smpls = DoubleMLResampling(n_folds=5, n_rep=n_rep, n_obs=n, stratify=obj_dml_data.d).split_samples()
 
     # First stage estimation
     ml_g = LinearRegression()
-    ml_m = LogisticRegression(penalty='l2', random_state=42)
+    ml_m = LogisticRegression(penalty="l2", random_state=42)
 
     # ATE with and without weights
-    dml_irm_obj_ate_no_weights = dml.DoubleMLIRM(
-        obj_dml_data,
-        ml_g=clone(ml_g),
-        ml_m=clone(ml_m),
-        score='ATE',
-        **kwargs)
+    dml_irm_obj_ate_no_weights = dml.DoubleMLIRM(obj_dml_data, ml_g=clone(ml_g), ml_m=clone(ml_m), score="ATE", **kwargs)
     dml_irm_obj_ate_no_weights.set_sample_splitting(smpls)
     np.random.seed(42)
     dml_irm_obj_ate_no_weights.fit()
 
     dml_irm_obj_ate_weights = dml.DoubleMLIRM(
-        obj_dml_data,
-        ml_g=clone(ml_g),
-        ml_m=clone(ml_m),
-        score='ATE',
-        weights=np.ones_like(obj_dml_data.y), **kwargs)
+        obj_dml_data, ml_g=clone(ml_g), ml_m=clone(ml_m), score="ATE", weights=np.ones_like(obj_dml_data.y), **kwargs
+    )
     dml_irm_obj_ate_weights.set_sample_splitting(smpls)
     np.random.seed(42)
     dml_irm_obj_ate_weights.fit()
 
     # ATTE with and without weights
-    dml_irm_obj_atte_no_weights = dml.DoubleMLIRM(
-        obj_dml_data,
-        ml_g=clone(ml_g),
-        ml_m=clone(ml_m),
-        score='ATTE',
-        **kwargs)
+    dml_irm_obj_atte_no_weights = dml.DoubleMLIRM(obj_dml_data, ml_g=clone(ml_g), ml_m=clone(ml_m), score="ATTE", **kwargs)
     dml_irm_obj_atte_no_weights.set_sample_splitting(smpls)
     np.random.seed(42)
     dml_irm_obj_atte_no_weights.fit()
@@ -308,46 +304,43 @@ def dml_irm_weights_fixture(n_rep):
     p_hat = obj_dml_data.d.mean()
     weights = obj_dml_data.d / p_hat
     weights_bar = m_hat / p_hat
-    weight_dict = {'weights': weights, 'weights_bar': weights_bar}
+    weight_dict = {"weights": weights, "weights_bar": weights_bar}
     dml_irm_obj_atte_weights = dml.DoubleMLIRM(
-        obj_dml_data,
-        ml_g=clone(ml_g),
-        ml_m=clone(ml_m),
-        score='ATE',
-        weights=weight_dict, **kwargs)
+        obj_dml_data, ml_g=clone(ml_g), ml_m=clone(ml_m), score="ATE", weights=weight_dict, **kwargs
+    )
     dml_irm_obj_atte_weights.set_sample_splitting(smpls)
     np.random.seed(42)
     dml_irm_obj_atte_weights.fit()
 
     res_dict = {
-        'coef_ate': dml_irm_obj_ate_no_weights.coef.item(),
-        'coef_ate_weights': dml_irm_obj_ate_weights.coef.item(),
-        'coef_atte': dml_irm_obj_atte_no_weights.coef.item(),
-        'coef_atte_weights': dml_irm_obj_atte_weights.coef.item(),
-        'se_ate': dml_irm_obj_ate_no_weights.se.item(),
-        'se_ate_weights': dml_irm_obj_ate_weights.se.item(),
-        'se_atte': dml_irm_obj_atte_no_weights.se.item(),
-        'se_atte_weights': dml_irm_obj_atte_weights.se.item(),
+        "coef_ate": dml_irm_obj_ate_no_weights.coef.item(),
+        "coef_ate_weights": dml_irm_obj_ate_weights.coef.item(),
+        "coef_atte": dml_irm_obj_atte_no_weights.coef.item(),
+        "coef_atte_weights": dml_irm_obj_atte_weights.coef.item(),
+        "se_ate": dml_irm_obj_ate_no_weights.se.item(),
+        "se_ate_weights": dml_irm_obj_ate_weights.se.item(),
+        "se_atte": dml_irm_obj_atte_no_weights.se.item(),
+        "se_atte_weights": dml_irm_obj_atte_weights.se.item(),
     }
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_irm_ate_weights(dml_irm_weights_fixture):
-    assert math.isclose(dml_irm_weights_fixture['coef_ate'],
-                        dml_irm_weights_fixture['coef_ate_weights'],
-                        rel_tol=1e-9, abs_tol=1e-4)
-    assert math.isclose(dml_irm_weights_fixture['se_ate'],
-                        dml_irm_weights_fixture['se_ate_weights'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_irm_weights_fixture["coef_ate"], dml_irm_weights_fixture["coef_ate_weights"], rel_tol=1e-9, abs_tol=1e-4
+    )
+    assert math.isclose(
+        dml_irm_weights_fixture["se_ate"], dml_irm_weights_fixture["se_ate_weights"], rel_tol=1e-9, abs_tol=1e-4
+    )
 
 
 @pytest.mark.ci
 def test_dml_irm_atte_weights(dml_irm_weights_fixture):
-    assert math.isclose(dml_irm_weights_fixture['coef_atte'],
-                        dml_irm_weights_fixture['coef_atte_weights'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_irm_weights_fixture["coef_atte"], dml_irm_weights_fixture["coef_atte_weights"], rel_tol=1e-9, abs_tol=1e-4
+    )
     # Remark that the scores are slightly different (Y instead of g(1,X) and coefficient of theta)
-    assert math.isclose(dml_irm_weights_fixture['se_atte'],
-                        dml_irm_weights_fixture['se_atte_weights'],
-                        rel_tol=1e-5, abs_tol=1e-3)
+    assert math.isclose(
+        dml_irm_weights_fixture["se_atte"], dml_irm_weights_fixture["se_atte_weights"], rel_tol=1e-5, abs_tol=1e-3
+    )
diff --git a/doubleml/irm/tests/test_irm_classifier.py b/doubleml/irm/tests/test_irm_classifier.py
index 38e5a798b..9389439d3 100644
--- a/doubleml/irm/tests/test_irm_classifier.py
+++ b/doubleml/irm/tests/test_irm_classifier.py
@@ -1,48 +1,49 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-
-from sklearn.linear_model import LogisticRegression
 from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
 
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_irm_manual import fit_irm, boot_irm
-
-
-@pytest.fixture(scope='module',
-                params=[[LogisticRegression(solver='lbfgs', max_iter=250),
-                         LogisticRegression(solver='lbfgs', max_iter=250)],
-                        [RandomForestClassifier(max_depth=2, n_estimators=10, random_state=42),
-                         RandomForestClassifier(max_depth=2, n_estimators=10, random_state=42)]])
+from ._utils_irm_manual import boot_irm, fit_irm
+
+
+@pytest.fixture(
+    scope="module",
+    params=[
+        [LogisticRegression(solver="lbfgs", max_iter=250), LogisticRegression(solver="lbfgs", max_iter=250)],
+        [
+            RandomForestClassifier(max_depth=2, n_estimators=10, random_state=42),
+            RandomForestClassifier(max_depth=2, n_estimators=10, random_state=42),
+        ],
+    ],
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['ATE', 'ATTE'])
+@pytest.fixture(scope="module", params=["ATE", "ATTE"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.01, 0.05])
+@pytest.fixture(scope="module", params=[0.01, 0.05])
 def trimming_threshold(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_irm_classifier_fixture(generate_data_irm_binary, learner, score, normalize_ipw, trimming_threshold):
-    boot_methods = ['normal']
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
 
@@ -57,62 +58,85 @@ def dml_irm_classifier_fixture(generate_data_irm_binary, learner, score, normali
 
     np.random.seed(3141)
     obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d)
-    dml_irm_obj = dml.DoubleMLIRM(obj_dml_data,
-                                  ml_g, ml_m,
-                                  n_folds,
-                                  score=score,
-                                  normalize_ipw=normalize_ipw,
-                                  trimming_threshold=trimming_threshold,
-                                  draw_sample_splitting=False)
+    dml_irm_obj = dml.DoubleMLIRM(
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        n_folds,
+        score=score,
+        normalize_ipw=normalize_ipw,
+        trimming_threshold=trimming_threshold,
+        draw_sample_splitting=False,
+    )
     # synchronize the sample splitting
     dml_irm_obj.set_sample_splitting(all_smpls=all_smpls)
     dml_irm_obj.fit()
 
     np.random.seed(3141)
-    res_manual = fit_irm(y, x, d,
-                         clone(learner[0]), clone(learner[1]),
-                         all_smpls, score,
-                         normalize_ipw=normalize_ipw, trimming_threshold=trimming_threshold)
-
-    res_dict = {'coef': dml_irm_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'se': dml_irm_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_manual = fit_irm(
+        y,
+        x,
+        d,
+        clone(learner[0]),
+        clone(learner[1]),
+        all_smpls,
+        score,
+        normalize_ipw=normalize_ipw,
+        trimming_threshold=trimming_threshold,
+    )
+
+    res_dict = {
+        "coef": dml_irm_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "se": dml_irm_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_irm(y, d, res_manual['thetas'], res_manual['ses'],
-                               res_manual['all_g_hat0'], res_manual['all_g_hat1'],
-                               res_manual['all_m_hat'], res_manual['all_p_hat'],
-                               all_smpls, score, bootstrap, n_rep_boot,
-                               normalize_ipw=normalize_ipw)
+        boot_t_stat = boot_irm(
+            y,
+            d,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_g_hat0"],
+            res_manual["all_g_hat1"],
+            res_manual["all_m_hat"],
+            res_manual["all_p_hat"],
+            all_smpls,
+            score,
+            bootstrap,
+            n_rep_boot,
+            normalize_ipw=normalize_ipw,
+        )
 
         np.random.seed(3141)
         dml_irm_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_irm_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_irm_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_irm_coef(dml_irm_classifier_fixture):
-    assert math.isclose(dml_irm_classifier_fixture['coef'],
-                        dml_irm_classifier_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_irm_classifier_fixture["coef"], dml_irm_classifier_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4
+    )
 
 
 @pytest.mark.ci
 def test_dml_irm_se(dml_irm_classifier_fixture):
-    assert math.isclose(dml_irm_classifier_fixture['se'],
-                        dml_irm_classifier_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_irm_classifier_fixture["se"], dml_irm_classifier_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_irm_boot(dml_irm_classifier_fixture):
-    for bootstrap in dml_irm_classifier_fixture['boot_methods']:
-        assert np.allclose(dml_irm_classifier_fixture['boot_t_stat' + bootstrap],
-                           dml_irm_classifier_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_irm_classifier_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_irm_classifier_fixture["boot_t_stat" + bootstrap],
+            dml_irm_classifier_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
diff --git a/doubleml/irm/tests/test_irm_external_predictions.py b/doubleml/irm/tests/test_irm_external_predictions.py
index a7a2b6249..dabf6c0eb 100644
--- a/doubleml/irm/tests/test_irm_external_predictions.py
+++ b/doubleml/irm/tests/test_irm_external_predictions.py
@@ -1,10 +1,12 @@
+import math
+
 import numpy as np
 import pytest
-import math
 from sklearn.linear_model import LinearRegression, LogisticRegression
-from doubleml import DoubleMLIRM, DoubleMLData
+
+from doubleml import DoubleMLData, DoubleMLIRM
 from doubleml.datasets import make_irm_data
-from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
+from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
 
 
 @pytest.fixture(scope="module", params=["ATE", "ATTE"])
diff --git a/doubleml/irm/tests/test_irm_tune.py b/doubleml/irm/tests/test_irm_tune.py
index 21338bb43..7e995f938 100644
--- a/doubleml/irm/tests/test_irm_tune.py
+++ b/doubleml/irm/tests/test_irm_tune.py
@@ -1,64 +1,57 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-
-from sklearn.linear_model import LogisticRegression
 from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import LogisticRegression
 
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_irm_manual import fit_irm, boot_irm, tune_nuisance_irm
+from ._utils_irm_manual import boot_irm, fit_irm, tune_nuisance_irm
 
 
-@pytest.fixture(scope='module',
-                params=[RandomForestRegressor(random_state=42)])
+@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
 def learner_g(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[LogisticRegression()])
+@pytest.fixture(scope="module", params=[LogisticRegression()])
 def learner_m(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['ATE', 'ATTE'])
+@pytest.fixture(scope="module", params=["ATE", "ATTE"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def tune_on_folds(request):
     return request.param
 
 
 def get_par_grid(learner):
     if learner.__class__ in [RandomForestRegressor]:
-        par_grid = {'n_estimators': [5, 10, 20]}
+        par_grid = {"n_estimators": [5, 10, 20]}
     else:
         assert learner.__class__ in [LogisticRegression]
-        par_grid = {'C': np.logspace(-4, 2, 10)}
+        par_grid = {"C": np.logspace(-4, 2, 10)}
     return par_grid
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_irm_fixture(generate_data_irm, learner_g, learner_m, score, normalize_ipw, tune_on_folds):
-    par_grid = {'ml_g': get_par_grid(learner_g),
-                'ml_m': get_par_grid(learner_m)}
+    par_grid = {"ml_g": get_par_grid(learner_g), "ml_m": get_par_grid(learner_m)}
     n_folds_tune = 4
 
-    boot_methods = ['normal']
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
 
@@ -73,17 +66,12 @@ def dml_irm_fixture(generate_data_irm, learner_g, learner_m, score, normalize_ip
 
     np.random.seed(3141)
     obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d)
-    dml_irm_obj = dml.DoubleMLIRM(obj_dml_data,
-                                  ml_g, ml_m,
-                                  n_folds,
-                                  score=score,
-                                  normalize_ipw=normalize_ipw)
+    dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_g, ml_m, n_folds, score=score, normalize_ipw=normalize_ipw)
     # synchronize the sample splitting
     dml_irm_obj.set_sample_splitting(all_smpls=all_smpls)
     np.random.seed(3141)
     # tune hyperparameters
-    tune_res = dml_irm_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune,
-                                return_tune_res=False)
+    tune_res = dml_irm_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune, return_tune_res=False)
     assert isinstance(tune_res, dml.DoubleMLIRM)
 
     dml_irm_obj.fit()
@@ -92,68 +80,86 @@ def dml_irm_fixture(generate_data_irm, learner_g, learner_m, score, normalize_ip
     smpls = all_smpls[0]
 
     if tune_on_folds:
-        g0_params, g1_params, m_params = tune_nuisance_irm(y, x, d,
-                                                           clone(learner_g), clone(learner_m), smpls, score,
-                                                           n_folds_tune,
-                                                           par_grid['ml_g'], par_grid['ml_m'])
+        g0_params, g1_params, m_params = tune_nuisance_irm(
+            y, x, d, clone(learner_g), clone(learner_m), smpls, score, n_folds_tune, par_grid["ml_g"], par_grid["ml_m"]
+        )
     else:
         xx = [(np.arange(len(y)), np.array([]))]
-        g0_params, g1_params, m_params = tune_nuisance_irm(y, x, d,
-                                                           clone(learner_g), clone(learner_m), xx, score,
-                                                           n_folds_tune,
-                                                           par_grid['ml_g'], par_grid['ml_m'])
+        g0_params, g1_params, m_params = tune_nuisance_irm(
+            y, x, d, clone(learner_g), clone(learner_m), xx, score, n_folds_tune, par_grid["ml_g"], par_grid["ml_m"]
+        )
         g0_params = g0_params * n_folds
         m_params = m_params * n_folds
-        if score == 'ATE':
+        if score == "ATE":
             g1_params = g1_params * n_folds
         else:
-            assert score == 'ATTE'
+            assert score == "ATTE"
             g1_params = None
 
-    res_manual = fit_irm(y, x, d, clone(learner_g), clone(learner_m),
-                         all_smpls, score,
-                         normalize_ipw=normalize_ipw,
-                         g0_params=g0_params, g1_params=g1_params, m_params=m_params)
-
-    res_dict = {'coef': dml_irm_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'se': dml_irm_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_manual = fit_irm(
+        y,
+        x,
+        d,
+        clone(learner_g),
+        clone(learner_m),
+        all_smpls,
+        score,
+        normalize_ipw=normalize_ipw,
+        g0_params=g0_params,
+        g1_params=g1_params,
+        m_params=m_params,
+    )
+
+    res_dict = {
+        "coef": dml_irm_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "se": dml_irm_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_irm(y, d, res_manual['thetas'], res_manual['ses'],
-                               res_manual['all_g_hat0'], res_manual['all_g_hat1'],
-                               res_manual['all_m_hat'], res_manual['all_p_hat'],
-                               all_smpls, score, bootstrap, n_rep_boot,
-                               normalize_ipw=normalize_ipw)
+        boot_t_stat = boot_irm(
+            y,
+            d,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_g_hat0"],
+            res_manual["all_g_hat1"],
+            res_manual["all_m_hat"],
+            res_manual["all_p_hat"],
+            all_smpls,
+            score,
+            bootstrap,
+            n_rep_boot,
+            normalize_ipw=normalize_ipw,
+        )
 
         np.random.seed(3141)
         dml_irm_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_irm_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_irm_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_irm_coef(dml_irm_fixture):
-    assert math.isclose(dml_irm_fixture['coef'],
-                        dml_irm_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_irm_fixture["coef"], dml_irm_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_irm_se(dml_irm_fixture):
-    assert math.isclose(dml_irm_fixture['se'],
-                        dml_irm_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_irm_fixture["se"], dml_irm_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_irm_boot(dml_irm_fixture):
-    for bootstrap in dml_irm_fixture['boot_methods']:
-        assert np.allclose(dml_irm_fixture['boot_t_stat' + bootstrap],
-                           dml_irm_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_irm_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_irm_fixture["boot_t_stat" + bootstrap],
+            dml_irm_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
diff --git a/doubleml/irm/tests/test_irm_weighted_scores.py b/doubleml/irm/tests/test_irm_weighted_scores.py
index 4bdd9bd34..a79844788 100644
--- a/doubleml/irm/tests/test_irm_weighted_scores.py
+++ b/doubleml/irm/tests/test_irm_weighted_scores.py
@@ -1,9 +1,8 @@
-import pytest
 import numpy as np
-
+import pytest
 from sklearn.base import clone
-from sklearn.linear_model import LogisticRegression, LinearRegression
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
 from doubleml.utils._estimation import _normalize_ipw
@@ -12,7 +11,7 @@
 def old_score_elements(y, d, g_hat0, g_hat1, m_hat, score, normalize_ipw):
     # fraction of treated for ATTE
     p_hat = None
-    if score == 'ATTE':
+    if score == "ATTE":
         p_hat = np.mean(d)
 
     if normalize_ipw:
@@ -21,54 +20,56 @@ def old_score_elements(y, d, g_hat0, g_hat1, m_hat, score, normalize_ipw):
     # compute residuals
     u_hat0 = y - g_hat0
     u_hat1 = None
-    if score == 'ATE':
+    if score == "ATE":
         u_hat1 = y - g_hat1
 
     psi_a = np.full_like(y, np.nan)
     psi_b = np.full_like(y, np.nan)
-    if score == 'ATE':
-        psi_b = g_hat1 - g_hat0 \
-            + np.divide(np.multiply(d, u_hat1), m_hat) \
-            - np.divide(np.multiply(1.0-d, u_hat0), 1.0 - m_hat)
+    if score == "ATE":
+        psi_b = (
+            g_hat1 - g_hat0 + np.divide(np.multiply(d, u_hat1), m_hat) - np.divide(np.multiply(1.0 - d, u_hat0), 1.0 - m_hat)
+        )
         psi_a = np.full_like(m_hat, -1.0)
     else:
-        assert score == 'ATTE'
-        psi_b = np.divide(np.multiply(d, u_hat0), p_hat) \
-            - np.divide(np.multiply(m_hat, np.multiply(1.0-d, u_hat0)),
-                        np.multiply(p_hat, (1.0 - m_hat)))
-        psi_a = - np.divide(d, p_hat)
+        assert score == "ATTE"
+        psi_b = np.divide(np.multiply(d, u_hat0), p_hat) - np.divide(
+            np.multiply(m_hat, np.multiply(1.0 - d, u_hat0)), np.multiply(p_hat, (1.0 - m_hat))
+        )
+        psi_a = -np.divide(d, p_hat)
 
     return psi_a, psi_b
 
 
-@pytest.fixture(scope='module',
-                params=[[LinearRegression(),
-                         LogisticRegression(solver='lbfgs', max_iter=250)],
-                        [RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
-                         RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42)]])
+@pytest.fixture(
+    scope="module",
+    params=[
+        [LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250)],
+        [
+            RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
+            RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42),
+        ],
+    ],
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['ATE', 'ATTE'])
+@pytest.fixture(scope="module", params=["ATE", "ATTE"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[False, True])
+@pytest.fixture(scope="module", params=[False, True])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.2, 0.15])
+@pytest.fixture(scope="module", params=[0.2, 0.15])
 def trimming_threshold(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def old_vs_weighted_score_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_threshold):
     n_folds = 2
 
@@ -81,51 +82,45 @@ def old_vs_weighted_score_fixture(generate_data_irm, learner, score, normalize_i
     ml_m = clone(learner[1])
 
     np.random.seed(3141)
-    dml_irm_obj = dml.DoubleMLIRM(obj_dml_data,
-                                  ml_g, ml_m,
-                                  n_folds,
-                                  score=score,
-                                  normalize_ipw=normalize_ipw,
-                                  trimming_threshold=trimming_threshold)
+    dml_irm_obj = dml.DoubleMLIRM(
+        obj_dml_data, ml_g, ml_m, n_folds, score=score, normalize_ipw=normalize_ipw, trimming_threshold=trimming_threshold
+    )
     dml_irm_obj.fit()
 
     # old score
     psi_a_old, psi_b_old = old_score_elements(
         y=y,
         d=d,
-        g_hat0=np.squeeze(dml_irm_obj.predictions['ml_g0']),
-        g_hat1=np.squeeze(dml_irm_obj.predictions['ml_g1']),
-        m_hat=np.squeeze(dml_irm_obj.predictions['ml_m']),
+        g_hat0=np.squeeze(dml_irm_obj.predictions["ml_g0"]),
+        g_hat1=np.squeeze(dml_irm_obj.predictions["ml_g1"]),
+        m_hat=np.squeeze(dml_irm_obj.predictions["ml_m"]),
         score=score,
-        normalize_ipw=normalize_ipw
+        normalize_ipw=normalize_ipw,
     )
 
     old_coef = -np.mean(psi_b_old) / np.mean(psi_a_old)
 
     result_dict = {
-        'psi_a': np.squeeze(dml_irm_obj.psi_elements['psi_a']),
-        'psi_b': np.squeeze(dml_irm_obj.psi_elements['psi_b']),
-        'psi_a_old': psi_a_old,
-        'psi_b_old': psi_b_old,
-        'coef': np.squeeze(dml_irm_obj.coef),
-        'old_coef': old_coef,
+        "psi_a": np.squeeze(dml_irm_obj.psi_elements["psi_a"]),
+        "psi_b": np.squeeze(dml_irm_obj.psi_elements["psi_b"]),
+        "psi_a_old": psi_a_old,
+        "psi_b_old": psi_b_old,
+        "coef": np.squeeze(dml_irm_obj.coef),
+        "old_coef": old_coef,
     }
     return result_dict
 
 
 @pytest.mark.ci
 def test_irm_old_vs_weighted_score_psi_b(old_vs_weighted_score_fixture):
-    assert np.allclose(old_vs_weighted_score_fixture['psi_b'],
-                       old_vs_weighted_score_fixture['psi_b_old'])
+    assert np.allclose(old_vs_weighted_score_fixture["psi_b"], old_vs_weighted_score_fixture["psi_b_old"])
 
 
 @pytest.mark.ci
 def test_irm_old_vs_weighted_score_psi_a(old_vs_weighted_score_fixture):
-    assert np.allclose(old_vs_weighted_score_fixture['psi_a'],
-                       old_vs_weighted_score_fixture['psi_a_old'])
+    assert np.allclose(old_vs_weighted_score_fixture["psi_a"], old_vs_weighted_score_fixture["psi_a_old"])
 
 
 @pytest.mark.ci
 def test_irm_old_vs_weighted_coef(old_vs_weighted_score_fixture):
-    assert np.allclose(old_vs_weighted_score_fixture['coef'],
-                       old_vs_weighted_score_fixture['old_coef'])
+    assert np.allclose(old_vs_weighted_score_fixture["coef"], old_vs_weighted_score_fixture["old_coef"])
diff --git a/doubleml/irm/tests/test_irm_with_missings.py b/doubleml/irm/tests/test_irm_with_missings.py
index 18eac7ef7..a6c30cae8 100644
--- a/doubleml/irm/tests/test_irm_with_missings.py
+++ b/doubleml/irm/tests/test_irm_with_missings.py
@@ -1,58 +1,55 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
 
 # TODO: Maybe add some learner which cannot handle missings in x and test the exception
-from sklearn.linear_model import LogisticRegression, LinearRegression
+from sklearn.linear_model import LinearRegression, LogisticRegression
 from xgboost import XGBClassifier, XGBRegressor
 
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_irm_manual import fit_irm, boot_irm
-
-
-@pytest.fixture(scope='module',
-                params=[[XGBRegressor(n_jobs=1, objective="reg:squarederror",
-                                      eta=0.1, n_estimators=10),
-                         XGBClassifier(n_jobs=1,
-                                       objective="binary:logistic", eval_metric="logloss",
-                                       eta=0.1, n_estimators=10)]])
+from ._utils_irm_manual import boot_irm, fit_irm
+
+
+@pytest.fixture(
+    scope="module",
+    params=[
+        [
+            XGBRegressor(n_jobs=1, objective="reg:squarederror", eta=0.1, n_estimators=10),
+            XGBClassifier(n_jobs=1, objective="binary:logistic", eval_metric="logloss", eta=0.1, n_estimators=10),
+        ]
+    ],
+)
 def learner_xgboost(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[[LinearRegression(),
-                         LogisticRegression(solver='lbfgs', max_iter=250)]])
+@pytest.fixture(scope="module", params=[[LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250)]])
 def learner_sklearn(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['ATE', 'ATTE'])
+@pytest.fixture(scope="module", params=["ATE", "ATTE"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.01, 0.05])
+@pytest.fixture(scope="module", params=[0.01, 0.05])
 def trimming_threshold(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
-def dml_irm_w_missing_fixture(generate_data_irm_w_missings, learner_xgboost, score,
-                              normalize_ipw, trimming_threshold):
-    boot_methods = ['normal']
+@pytest.fixture(scope="module")
+def dml_irm_w_missing_fixture(generate_data_irm_w_missings, learner_xgboost, score, normalize_ipw, trimming_threshold):
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
 
@@ -67,68 +64,83 @@ def dml_irm_w_missing_fixture(generate_data_irm_w_missings, learner_xgboost, sco
     ml_m = clone(learner_xgboost[1])
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d,
-                                                force_all_x_finite='allow-nan')
-    dml_irm_obj = dml.DoubleMLIRM(obj_dml_data,
-                                  ml_g, ml_m,
-                                  n_folds,
-                                  score=score,
-                                  normalize_ipw=normalize_ipw,
-                                  trimming_threshold=trimming_threshold)
+    obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, force_all_x_finite="allow-nan")
+    dml_irm_obj = dml.DoubleMLIRM(
+        obj_dml_data, ml_g, ml_m, n_folds, score=score, normalize_ipw=normalize_ipw, trimming_threshold=trimming_threshold
+    )
     # synchronize the sample splitting
     dml_irm_obj.set_sample_splitting(all_smpls=all_smpls)
     np.random.seed(3141)
     dml_irm_obj.fit()
 
     np.random.seed(3141)
-    res_manual = fit_irm(y, x, d,
-                         clone(learner_xgboost[0]), clone(learner_xgboost[1]),
-                         all_smpls, score,
-                         normalize_ipw=normalize_ipw,
-                         trimming_threshold=trimming_threshold)
-
-    res_dict = {'coef': dml_irm_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'se': dml_irm_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_manual = fit_irm(
+        y,
+        x,
+        d,
+        clone(learner_xgboost[0]),
+        clone(learner_xgboost[1]),
+        all_smpls,
+        score,
+        normalize_ipw=normalize_ipw,
+        trimming_threshold=trimming_threshold,
+    )
+
+    res_dict = {
+        "coef": dml_irm_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "se": dml_irm_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_irm(y, d, res_manual['thetas'], res_manual['ses'],
-                               res_manual['all_g_hat0'], res_manual['all_g_hat1'],
-                               res_manual['all_m_hat'], res_manual['all_p_hat'],
-                               all_smpls, score, bootstrap, n_rep_boot,
-                               normalize_ipw=normalize_ipw)
+        boot_t_stat = boot_irm(
+            y,
+            d,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_g_hat0"],
+            res_manual["all_g_hat1"],
+            res_manual["all_m_hat"],
+            res_manual["all_p_hat"],
+            all_smpls,
+            score,
+            bootstrap,
+            n_rep_boot,
+            normalize_ipw=normalize_ipw,
+        )
 
         np.random.seed(3141)
         dml_irm_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_irm_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_irm_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_irm_w_missing_coef(dml_irm_w_missing_fixture):
-    assert math.isclose(dml_irm_w_missing_fixture['coef'],
-                        dml_irm_w_missing_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_irm_w_missing_fixture["coef"], dml_irm_w_missing_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4
+    )
 
 
 @pytest.mark.ci
 def test_dml_irm_w_missing_se(dml_irm_w_missing_fixture):
-    assert math.isclose(dml_irm_w_missing_fixture['se'],
-                        dml_irm_w_missing_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_irm_w_missing_fixture["se"], dml_irm_w_missing_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_irm_w_missing_boot(dml_irm_w_missing_fixture):
-    for bootstrap in dml_irm_w_missing_fixture['boot_methods']:
-        assert np.allclose(dml_irm_w_missing_fixture['boot_t_stat' + bootstrap],
-                           dml_irm_w_missing_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_irm_w_missing_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_irm_w_missing_fixture["boot_t_stat" + bootstrap],
+            dml_irm_w_missing_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
 
 
 def test_irm_exception_with_missings(generate_data_irm_w_missings, learner_sklearn):
@@ -140,10 +152,8 @@ def test_irm_exception_with_missings(generate_data_irm_w_missings, learner_sklea
     ml_m = clone(learner_sklearn[1])
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d,
-                                                force_all_x_finite='allow-nan')
-    dml_irm_obj = dml.DoubleMLIRM(obj_dml_data,
-                                  ml_g, ml_m)
+    obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, force_all_x_finite="allow-nan")
+    dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_g, ml_m)
 
     msg = r"Input X contains NaN.\nLinearRegression does not accept missing values encoded as NaN natively."
     with pytest.raises(ValueError, match=msg):
diff --git a/doubleml/irm/tests/test_lpq.py b/doubleml/irm/tests/test_lpq.py
index 089d9d5ce..3e0049b80 100644
--- a/doubleml/irm/tests/test_lpq.py
+++ b/doubleml/irm/tests/test_lpq.py
@@ -1,64 +1,57 @@
-import numpy as np
-import pytest
 import math
 
-import doubleml as dml
-
+import numpy as np
+import pytest
 from sklearn.base import clone
 from sklearn.linear_model import LogisticRegression
 from statsmodels.nonparametric.kde import KDEUnivariate
 
+import doubleml as dml
+
 from ...tests._utils import draw_smpls
-from ._utils_lpq_manual import fit_lpq
 from ...utils._estimation import _default_kde
+from ._utils_lpq_manual import fit_lpq
 
 
 def custom_kde(u, weights):
     dens = KDEUnivariate(u)
-    dens.fit(kernel='epa', bw='silverman', weights=weights, fft=False)
+    dens.fit(kernel="epa", bw="silverman", weights=weights, fft=False)
 
     return dens.evaluate(0)
 
 
-@pytest.fixture(scope='module',
-                params=[0, 1])
+@pytest.fixture(scope="module", params=[0, 1])
 def treatment(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.25, 0.75])
+@pytest.fixture(scope="module", params=[0.25, 0.75])
 def quantile(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[LogisticRegression()])
+@pytest.fixture(scope="module", params=[LogisticRegression()])
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.05])
+@pytest.fixture(scope="module", params=[0.05])
 def trimming_threshold(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['default', custom_kde])
+@pytest.fixture(scope="module", params=["default", custom_kde])
 def kde(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner,
-                    normalize_ipw, trimming_threshold, kde):
+def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner, normalize_ipw, trimming_threshold, kde):
     n_folds = 3
 
     # collect data
@@ -70,64 +63,90 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner,
     all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=strata)
 
     np.random.seed(42)
-    if kde == 'default':
-        dml_lpq_obj = dml.DoubleMLLPQ(obj_dml_data,
-                                      clone(learner), clone(learner),
-                                      treatment=treatment,
-                                      quantile=quantile,
-                                      n_folds=n_folds,
-                                      n_rep=1,
-                                      normalize_ipw=normalize_ipw,
-                                      trimming_threshold=trimming_threshold,
-                                      draw_sample_splitting=False)
+    if kde == "default":
+        dml_lpq_obj = dml.DoubleMLLPQ(
+            obj_dml_data,
+            clone(learner),
+            clone(learner),
+            treatment=treatment,
+            quantile=quantile,
+            n_folds=n_folds,
+            n_rep=1,
+            normalize_ipw=normalize_ipw,
+            trimming_threshold=trimming_threshold,
+            draw_sample_splitting=False,
+        )
         # synchronize the sample splitting
         dml_lpq_obj.set_sample_splitting(all_smpls=all_smpls)
         dml_lpq_obj.fit()
 
         np.random.seed(42)
-        res_manual = fit_lpq(y, x, d, z, quantile, clone(learner), clone(learner),
-                             all_smpls, treatment,
-                             normalize_ipw=normalize_ipw, kde=_default_kde,
-                             n_rep=1, trimming_threshold=trimming_threshold)
+        res_manual = fit_lpq(
+            y,
+            x,
+            d,
+            z,
+            quantile,
+            clone(learner),
+            clone(learner),
+            all_smpls,
+            treatment,
+            normalize_ipw=normalize_ipw,
+            kde=_default_kde,
+            n_rep=1,
+            trimming_threshold=trimming_threshold,
+        )
     else:
-        dml_lpq_obj = dml.DoubleMLLPQ(obj_dml_data,
-                                      clone(learner), clone(learner),
-                                      treatment=treatment,
-                                      quantile=quantile,
-                                      n_folds=n_folds,
-                                      n_rep=1,
-                                      normalize_ipw=normalize_ipw,
-                                      kde=kde,
-                                      trimming_threshold=trimming_threshold,
-                                      draw_sample_splitting=False)
+        dml_lpq_obj = dml.DoubleMLLPQ(
+            obj_dml_data,
+            clone(learner),
+            clone(learner),
+            treatment=treatment,
+            quantile=quantile,
+            n_folds=n_folds,
+            n_rep=1,
+            normalize_ipw=normalize_ipw,
+            kde=kde,
+            trimming_threshold=trimming_threshold,
+            draw_sample_splitting=False,
+        )
 
         # synchronize the sample splitting
         dml_lpq_obj.set_sample_splitting(all_smpls=all_smpls)
         dml_lpq_obj.fit()
 
         np.random.seed(42)
-        res_manual = fit_lpq(y, x, d, z, quantile, clone(learner), clone(learner),
-                             all_smpls, treatment,
-                             normalize_ipw=normalize_ipw, kde=kde,
-                             n_rep=1, trimming_threshold=trimming_threshold)
-
-    res_dict = {'coef': dml_lpq_obj.coef.item(),
-                'coef_manual': res_manual['lpq'],
-                'se': dml_lpq_obj.se.item(),
-                'se_manual': res_manual['se']}
+        res_manual = fit_lpq(
+            y,
+            x,
+            d,
+            z,
+            quantile,
+            clone(learner),
+            clone(learner),
+            all_smpls,
+            treatment,
+            normalize_ipw=normalize_ipw,
+            kde=kde,
+            n_rep=1,
+            trimming_threshold=trimming_threshold,
+        )
+
+    res_dict = {
+        "coef": dml_lpq_obj.coef.item(),
+        "coef_manual": res_manual["lpq"],
+        "se": dml_lpq_obj.se.item(),
+        "se_manual": res_manual["se"],
+    }
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_lpq_coef(dml_lpq_fixture):
-    assert math.isclose(dml_lpq_fixture['coef'],
-                        dml_lpq_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_lpq_fixture["coef"], dml_lpq_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_lpq_se(dml_lpq_fixture):
-    assert math.isclose(dml_lpq_fixture['se'],
-                        dml_lpq_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_lpq_fixture["se"], dml_lpq_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
diff --git a/doubleml/irm/tests/test_lpq_external_predictions.py b/doubleml/irm/tests/test_lpq_external_predictions.py
index eb0a468a5..66f2ece6e 100644
--- a/doubleml/irm/tests/test_lpq_external_predictions.py
+++ b/doubleml/irm/tests/test_lpq_external_predictions.py
@@ -1,10 +1,13 @@
+import math
+
 import numpy as np
 import pytest
-import math
 from sklearn.linear_model import LogisticRegression
-from doubleml import DoubleMLLPQ, DoubleMLData
+
+from doubleml import DoubleMLData, DoubleMLLPQ
 from doubleml.datasets import make_iivm_data
 from doubleml.utils import DMLDummyClassifier
+
 from ...tests._utils import draw_smpls
 
 
@@ -56,10 +59,7 @@ def doubleml_lpq_fixture(n_rep, normalize_ipw):
     np.random.seed(3141)
     dml_lpq_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {
-        "coef_normal": dml_lpq.coef.item(),
-        "coef_ext": dml_lpq_ext.coef.item()
-    }
+    res_dict = {"coef_normal": dml_lpq.coef.item(), "coef_ext": dml_lpq_ext.coef.item()}
 
     return res_dict
 
diff --git a/doubleml/irm/tests/test_lpq_tune.py b/doubleml/irm/tests/test_lpq_tune.py
index 7cf1cdf96..c2b7d1923 100644
--- a/doubleml/irm/tests/test_lpq_tune.py
+++ b/doubleml/irm/tests/test_lpq_tune.py
@@ -1,7 +1,7 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
 from sklearn.ensemble import RandomForestClassifier
 
@@ -11,50 +11,46 @@
 from ._utils_lpq_manual import fit_lpq, tune_nuisance_lpq
 
 
-@pytest.fixture(scope='module',
-                params=[0])
+@pytest.fixture(scope="module", params=[0])
 def treatment(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.5])
+@pytest.fixture(scope="module", params=[0.5])
 def quantile(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[RandomForestClassifier(max_depth=2, n_estimators=5, random_state=42)])
+@pytest.fixture(scope="module", params=[RandomForestClassifier(max_depth=2, n_estimators=5, random_state=42)])
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True])
+@pytest.fixture(scope="module", params=[True])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def tune_on_folds(request):
     return request.param
 
 
 def get_par_grid(learner):
     if learner.__class__ in [RandomForestClassifier]:
-        par_grid = {'n_estimators': [5, 10, 20]}
+        par_grid = {"n_estimators": [5, 10, 20]}
     return par_grid
 
 
-@pytest.fixture(scope='module')
-def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner, normalize_ipw,
-                    tune_on_folds):
-    par_grid = {'ml_m_z': get_par_grid(learner),
-                'ml_m_d_z0': get_par_grid(learner),
-                'ml_m_d_z1': get_par_grid(learner),
-                'ml_g_du_z0': get_par_grid(learner),
-                'ml_g_du_z1': get_par_grid(learner)}
+@pytest.fixture(scope="module")
+def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner, normalize_ipw, tune_on_folds):
+    par_grid = {
+        "ml_m_z": get_par_grid(learner),
+        "ml_m_d_z0": get_par_grid(learner),
+        "ml_m_d_z1": get_par_grid(learner),
+        "ml_g_du_z0": get_par_grid(learner),
+        "ml_g_du_z1": get_par_grid(learner),
+    }
     n_folds_tune = 4
     n_folds = 2
 
@@ -68,15 +64,18 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner,
     smpls = all_smpls[0]
 
     np.random.seed(42)
-    dml_lpq_obj = dml.DoubleMLLPQ(obj_dml_data,
-                                  clone(learner), clone(learner),
-                                  treatment=treatment,
-                                  quantile=quantile,
-                                  n_folds=n_folds,
-                                  n_rep=1,
-                                  normalize_ipw=normalize_ipw,
-                                  trimming_threshold=0.01,
-                                  draw_sample_splitting=False)
+    dml_lpq_obj = dml.DoubleMLLPQ(
+        obj_dml_data,
+        clone(learner),
+        clone(learner),
+        treatment=treatment,
+        quantile=quantile,
+        n_folds=n_folds,
+        n_rep=1,
+        normalize_ipw=normalize_ipw,
+        trimming_threshold=0.01,
+        draw_sample_splitting=False,
+    )
 
     # synchronize the sample splitting
     dml_lpq_obj.set_sample_splitting(all_smpls=all_smpls)
@@ -90,26 +89,48 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner,
 
     np.random.seed(42)
     if tune_on_folds:
-        m_z_params, m_d_z0_params, m_d_z1_params, \
-            g_du_z0_params, g_du_z1_params = tune_nuisance_lpq(y, x, d, z,
-                                                               clone(learner),
-                                                               clone(learner), clone(learner),
-                                                               clone(learner), clone(learner),
-                                                               smpls, treatment, quantile, n_folds_tune,
-                                                               par_grid['ml_m_z'],
-                                                               par_grid['ml_m_d_z0'], par_grid['ml_m_d_z1'],
-                                                               par_grid['ml_g_du_z0'], par_grid['ml_g_du_z1'])
+        m_z_params, m_d_z0_params, m_d_z1_params, g_du_z0_params, g_du_z1_params = tune_nuisance_lpq(
+            y,
+            x,
+            d,
+            z,
+            clone(learner),
+            clone(learner),
+            clone(learner),
+            clone(learner),
+            clone(learner),
+            smpls,
+            treatment,
+            quantile,
+            n_folds_tune,
+            par_grid["ml_m_z"],
+            par_grid["ml_m_d_z0"],
+            par_grid["ml_m_d_z1"],
+            par_grid["ml_g_du_z0"],
+            par_grid["ml_g_du_z1"],
+        )
     else:
         xx = [(np.arange(len(y)), np.array([]))]
-        m_z_params, m_d_z0_params, m_d_z1_params, \
-            g_du_z0_params, g_du_z1_params = tune_nuisance_lpq(y, x, d, z,
-                                                               clone(learner),
-                                                               clone(learner), clone(learner),
-                                                               clone(learner), clone(learner),
-                                                               xx, treatment, quantile, n_folds_tune,
-                                                               par_grid['ml_m_z'],
-                                                               par_grid['ml_m_d_z0'], par_grid['ml_m_d_z1'],
-                                                               par_grid['ml_g_du_z0'], par_grid['ml_g_du_z1'])
+        m_z_params, m_d_z0_params, m_d_z1_params, g_du_z0_params, g_du_z1_params = tune_nuisance_lpq(
+            y,
+            x,
+            d,
+            z,
+            clone(learner),
+            clone(learner),
+            clone(learner),
+            clone(learner),
+            clone(learner),
+            xx,
+            treatment,
+            quantile,
+            n_folds_tune,
+            par_grid["ml_m_z"],
+            par_grid["ml_m_d_z0"],
+            par_grid["ml_m_d_z1"],
+            par_grid["ml_g_du_z0"],
+            par_grid["ml_g_du_z1"],
+        )
 
         m_z_params = m_z_params * n_folds
         m_d_z0_params = m_d_z0_params * n_folds
@@ -118,35 +139,41 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner,
         g_du_z1_params = g_du_z1_params * n_folds
 
     np.random.seed(42)
-    res_manual = fit_lpq(y, x, d, z,
-                         quantile=quantile,
-                         learner_g=clone(learner),
-                         learner_m=clone(learner),
-                         all_smpls=all_smpls,
-                         treatment=treatment,
-                         n_rep=1, trimming_threshold=0.01,
-                         normalize_ipw=normalize_ipw,
-                         m_z_params=m_z_params,
-                         m_d_z0_params=m_d_z0_params, m_d_z1_params=m_d_z1_params,
-                         g_du_z0_params=g_du_z0_params, g_du_z1_params=g_du_z1_params)
-
-    res_dict = {'coef': dml_lpq_obj.coef.item(),
-                'coef_manual': res_manual['lpq'],
-                'se': dml_lpq_obj.se.item(),
-                'se_manual': res_manual['se']}
+    res_manual = fit_lpq(
+        y,
+        x,
+        d,
+        z,
+        quantile=quantile,
+        learner_g=clone(learner),
+        learner_m=clone(learner),
+        all_smpls=all_smpls,
+        treatment=treatment,
+        n_rep=1,
+        trimming_threshold=0.01,
+        normalize_ipw=normalize_ipw,
+        m_z_params=m_z_params,
+        m_d_z0_params=m_d_z0_params,
+        m_d_z1_params=m_d_z1_params,
+        g_du_z0_params=g_du_z0_params,
+        g_du_z1_params=g_du_z1_params,
+    )
+
+    res_dict = {
+        "coef": dml_lpq_obj.coef.item(),
+        "coef_manual": res_manual["lpq"],
+        "se": dml_lpq_obj.se.item(),
+        "se_manual": res_manual["se"],
+    }
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_lpq_coef(dml_lpq_fixture):
-    assert math.isclose(dml_lpq_fixture['coef'],
-                        dml_lpq_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_lpq_fixture["coef"], dml_lpq_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_lpq_se(dml_lpq_fixture):
-    assert math.isclose(dml_lpq_fixture['se'],
-                        dml_lpq_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_lpq_fixture["se"], dml_lpq_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
diff --git a/doubleml/irm/tests/test_pq.py b/doubleml/irm/tests/test_pq.py
index 0447daa0b..62e69d532 100644
--- a/doubleml/irm/tests/test_pq.py
+++ b/doubleml/irm/tests/test_pq.py
@@ -1,51 +1,46 @@
-import numpy as np
-import pytest
 import math
 
-import doubleml as dml
-
+import numpy as np
+import pytest
 from sklearn.base import clone
-from sklearn.linear_model import LogisticRegression
 from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
+
+import doubleml as dml
 
 from ...tests._utils import draw_smpls
 from ._utils_pq_manual import fit_pq
 
 
-@pytest.fixture(scope='module',
-                params=[0, 1])
+@pytest.fixture(scope="module", params=[0, 1])
 def treatment(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.25, 0.5, 0.75])
+@pytest.fixture(scope="module", params=[0.25, 0.5, 0.75])
 def quantile(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[RandomForestClassifier(max_depth=2, n_estimators=10, random_state=42),
-                        LogisticRegression()])
+@pytest.fixture(
+    scope="module", params=[RandomForestClassifier(max_depth=2, n_estimators=10, random_state=42), LogisticRegression()]
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.01, 0.05])
+@pytest.fixture(scope="module", params=[0.01, 0.05])
 def trimming_threshold(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner,
-                   normalize_ipw, trimming_threshold):
+def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner, normalize_ipw, trimming_threshold):
     n_folds = 3
 
     # collect data
@@ -56,15 +51,18 @@ def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner,
     all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d)
 
     np.random.seed(42)
-    dml_pq_obj = dml.DoubleMLPQ(obj_dml_data,
-                                clone(learner), clone(learner),
-                                treatment=treatment,
-                                quantile=quantile,
-                                n_folds=n_folds,
-                                n_rep=1,
-                                trimming_threshold=trimming_threshold,
-                                normalize_ipw=normalize_ipw,
-                                draw_sample_splitting=False)
+    dml_pq_obj = dml.DoubleMLPQ(
+        obj_dml_data,
+        clone(learner),
+        clone(learner),
+        treatment=treatment,
+        quantile=quantile,
+        n_folds=n_folds,
+        n_rep=1,
+        trimming_threshold=trimming_threshold,
+        normalize_ipw=normalize_ipw,
+        draw_sample_splitting=False,
+    )
 
     # synchronize the sample splitting
     dml_pq_obj.set_sample_splitting(all_smpls=all_smpls)
@@ -72,30 +70,35 @@ def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner,
     dml_pq_obj.fit()
 
     np.random.seed(42)
-    res_manual = fit_pq(y, x, d, quantile,
-                        clone(learner), clone(learner),
-                        all_smpls, treatment,
-                        n_rep=1,
-                        trimming_threshold=trimming_threshold,
-                        normalize_ipw=normalize_ipw)
-
-    res_dict = {'coef': dml_pq_obj.coef.item(),
-                'coef_manual': res_manual['pq'],
-                'se': dml_pq_obj.se.item(),
-                'se_manual': res_manual['se']}
+    res_manual = fit_pq(
+        y,
+        x,
+        d,
+        quantile,
+        clone(learner),
+        clone(learner),
+        all_smpls,
+        treatment,
+        n_rep=1,
+        trimming_threshold=trimming_threshold,
+        normalize_ipw=normalize_ipw,
+    )
+
+    res_dict = {
+        "coef": dml_pq_obj.coef.item(),
+        "coef_manual": res_manual["pq"],
+        "se": dml_pq_obj.se.item(),
+        "se_manual": res_manual["se"],
+    }
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_pq_coef(dml_pq_fixture):
-    assert math.isclose(dml_pq_fixture['coef'],
-                        dml_pq_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_pq_fixture["coef"], dml_pq_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_pq_se(dml_pq_fixture):
-    assert math.isclose(dml_pq_fixture['se'],
-                        dml_pq_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_pq_fixture["se"], dml_pq_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
diff --git a/doubleml/irm/tests/test_pq_external_predictions.py b/doubleml/irm/tests/test_pq_external_predictions.py
index 3bbc0c56b..28f8ec660 100644
--- a/doubleml/irm/tests/test_pq_external_predictions.py
+++ b/doubleml/irm/tests/test_pq_external_predictions.py
@@ -1,10 +1,13 @@
+import math
+
 import numpy as np
 import pytest
-import math
 from sklearn.linear_model import LogisticRegression
-from doubleml import DoubleMLPQ, DoubleMLData
+
+from doubleml import DoubleMLData, DoubleMLPQ
 from doubleml.datasets import make_irm_data
 from doubleml.utils import DMLDummyClassifier
+
 from ...tests._utils import draw_smpls
 
 
@@ -80,11 +83,7 @@ def doubleml_pq_fixture(n_rep, normalize_ipw, set_ml_m_ext, set_ml_g_ext):
         tol_rel = 1e-9
         tol_abs = 1e-4
 
-    res_dict = {
-        "coef_normal": dml_pq.coef.item(),
-        "coef_ext": dml_pq_ext.coef.item(),
-        "tol_rel": tol_rel, "tol_abs":
-        tol_abs}
+    res_dict = {"coef_normal": dml_pq.coef.item(), "coef_ext": dml_pq_ext.coef.item(), "tol_rel": tol_rel, "tol_abs": tol_abs}
 
     return res_dict
 
diff --git a/doubleml/irm/tests/test_pq_tune.py b/doubleml/irm/tests/test_pq_tune.py
index 2459b99ab..322b93fd7 100644
--- a/doubleml/irm/tests/test_pq_tune.py
+++ b/doubleml/irm/tests/test_pq_tune.py
@@ -1,7 +1,7 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
 from sklearn.ensemble import RandomForestClassifier
 
@@ -11,53 +11,45 @@
 from ._utils_pq_manual import fit_pq, tune_nuisance_pq
 
 
-@pytest.fixture(scope='module',
-                params=[0, 1])
+@pytest.fixture(scope="module", params=[0, 1])
 def treatment(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.25, 0.5, 0.75])
+@pytest.fixture(scope="module", params=[0.25, 0.5, 0.75])
 def quantile(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[RandomForestClassifier(max_depth=5, random_state=42)])
+@pytest.fixture(scope="module", params=[RandomForestClassifier(max_depth=5, random_state=42)])
 def learner_g(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[RandomForestClassifier(max_depth=5, random_state=42)])
+@pytest.fixture(scope="module", params=[RandomForestClassifier(max_depth=5, random_state=42)])
 def learner_m(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def tune_on_folds(request):
     return request.param
 
 
 def get_par_grid(learner):
     if learner.__class__ in [RandomForestClassifier]:
-        par_grid = {'n_estimators': [5, 10, 15, 20]}
+        par_grid = {"n_estimators": [5, 10, 15, 20]}
     return par_grid
 
 
-@pytest.fixture(scope='module')
-def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner_g, learner_m, normalize_ipw,
-                   tune_on_folds):
-    par_grid = {'ml_g': get_par_grid(learner_g),
-                'ml_m': get_par_grid(learner_m)}
+@pytest.fixture(scope="module")
+def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner_g, learner_m, normalize_ipw, tune_on_folds):
+    par_grid = {"ml_g": get_par_grid(learner_g), "ml_m": get_par_grid(learner_m)}
     n_folds_tune = 4
     n_folds = 2
 
@@ -70,15 +62,18 @@ def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner_g, lear
     smpls = all_smpls[0]
 
     np.random.seed(42)
-    dml_pq_obj = dml.DoubleMLPQ(obj_dml_data,
-                                clone(learner_g), clone(learner_m),
-                                treatment=treatment,
-                                quantile=quantile,
-                                n_folds=n_folds,
-                                n_rep=1,
-                                normalize_ipw=normalize_ipw,
-                                trimming_threshold=0.01,
-                                draw_sample_splitting=False)
+    dml_pq_obj = dml.DoubleMLPQ(
+        obj_dml_data,
+        clone(learner_g),
+        clone(learner_m),
+        treatment=treatment,
+        quantile=quantile,
+        n_folds=n_folds,
+        n_rep=1,
+        normalize_ipw=normalize_ipw,
+        trimming_threshold=0.01,
+        draw_sample_splitting=False,
+    )
 
     # synchronize the sample splitting
     dml_pq_obj.set_sample_splitting(all_smpls=all_smpls)
@@ -92,47 +87,70 @@ def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner_g, lear
 
     np.random.seed(42)
     if tune_on_folds:
-        g_params, m_params = tune_nuisance_pq(y, x, d,
-                                              clone(learner_g), clone(learner_m),
-                                              smpls, treatment, quantile,
-                                              n_folds_tune, par_grid['ml_g'], par_grid['ml_m'])
+        g_params, m_params = tune_nuisance_pq(
+            y,
+            x,
+            d,
+            clone(learner_g),
+            clone(learner_m),
+            smpls,
+            treatment,
+            quantile,
+            n_folds_tune,
+            par_grid["ml_g"],
+            par_grid["ml_m"],
+        )
     else:
         xx = [(np.arange(len(y)), np.array([]))]
-        g_params, m_params = tune_nuisance_pq(y, x, d,
-                                              clone(learner_g), clone(learner_m),
-                                              xx, treatment, quantile,
-                                              n_folds_tune, par_grid['ml_g'], par_grid['ml_m'])
+        g_params, m_params = tune_nuisance_pq(
+            y,
+            x,
+            d,
+            clone(learner_g),
+            clone(learner_m),
+            xx,
+            treatment,
+            quantile,
+            n_folds_tune,
+            par_grid["ml_g"],
+            par_grid["ml_m"],
+        )
 
         g_params = g_params * n_folds
         m_params = m_params * n_folds
 
     np.random.seed(42)
-    res_manual = fit_pq(y, x, d, quantile,
-                        learner_g=clone(learner_g),
-                        learner_m=clone(learner_m),
-                        all_smpls=all_smpls,
-                        treatment=treatment,
-                        n_rep=1, trimming_threshold=0.01,
-                        normalize_ipw=normalize_ipw,
-                        g_params=g_params, m_params=m_params)
-
-    res_dict = {'coef': dml_pq_obj.coef.item(),
-                'coef_manual': res_manual['pq'],
-                'se': dml_pq_obj.se.item(),
-                'se_manual': res_manual['se']}
+    res_manual = fit_pq(
+        y,
+        x,
+        d,
+        quantile,
+        learner_g=clone(learner_g),
+        learner_m=clone(learner_m),
+        all_smpls=all_smpls,
+        treatment=treatment,
+        n_rep=1,
+        trimming_threshold=0.01,
+        normalize_ipw=normalize_ipw,
+        g_params=g_params,
+        m_params=m_params,
+    )
+
+    res_dict = {
+        "coef": dml_pq_obj.coef.item(),
+        "coef_manual": res_manual["pq"],
+        "se": dml_pq_obj.se.item(),
+        "se_manual": res_manual["se"],
+    }
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_pq_coef(dml_pq_fixture):
-    assert math.isclose(dml_pq_fixture['coef'],
-                        dml_pq_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_pq_fixture["coef"], dml_pq_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_pq_se(dml_pq_fixture):
-    assert math.isclose(dml_pq_fixture['se'],
-                        dml_pq_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_pq_fixture["se"], dml_pq_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
diff --git a/doubleml/irm/tests/test_qte.py b/doubleml/irm/tests/test_qte.py
index 7c7b8c1df..0557c85b8 100644
--- a/doubleml/irm/tests/test_qte.py
+++ b/doubleml/irm/tests/test_qte.py
@@ -1,41 +1,37 @@
+import copy
+
 import numpy as np
 import pandas as pd
 import pytest
-import copy
-
-import doubleml as dml
-
 from sklearn.base import clone
-from sklearn.linear_model import LogisticRegression
 from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
 
-from ...tests._utils import draw_smpls, confint_manual
-from ._utils_qte_manual import fit_qte, boot_qte
-
+import doubleml as dml
 from doubleml.datasets import make_irm_data
-from ...utils._estimation import _default_kde
 
+from ...tests._utils import confint_manual, draw_smpls
+from ...utils._estimation import _default_kde
+from ._utils_qte_manual import boot_qte, fit_qte
 
 quantiles = [0.25, 0.5, 0.75]
 n_quantiles = len(quantiles)
 n_rep = 1
 
 
-@pytest.fixture(scope='module',
-                params=[RandomForestClassifier(max_depth=2, n_estimators=10, random_state=42),
-                        LogisticRegression()])
+@pytest.fixture(
+    scope="module", params=[RandomForestClassifier(max_depth=2, n_estimators=10, random_state=42), LogisticRegression()]
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[None, _default_kde])
+@pytest.fixture(scope="module", params=[None, _default_kde])
 def kde(request):
     return request.param
 
@@ -43,7 +39,7 @@ def kde(request):
 @pytest.fixture(scope="module")
 def dml_qte_fixture(generate_data_quantiles, learner, normalize_ipw, kde):
     n_folds = 3
-    boot_methods = ['normal']
+    boot_methods = ["normal"]
     n_rep_boot = 2
 
     # collect data
@@ -60,26 +56,17 @@ def dml_qte_fixture(generate_data_quantiles, learner, normalize_ipw, kde):
         "n_rep": n_rep,
         "normalize_ipw": normalize_ipw,
         "trimming_threshold": 1e-12,
-        "kde": kde
+        "kde": kde,
     }
 
     np.random.seed(42)
-    dml_qte_obj = dml.DoubleMLQTE(
-        obj_dml_data,
-        ml_g, ml_m,
-        **input_args
-    )
+    dml_qte_obj = dml.DoubleMLQTE(obj_dml_data, ml_g, ml_m, **input_args)
     unfitted_qte_model = copy.copy(dml_qte_obj)
     np.random.seed(42)
     dml_qte_obj.fit()
 
     np.random.seed(42)
-    dml_qte_obj_ext_smpls = dml.DoubleMLQTE(
-        obj_dml_data,
-        ml_g, ml_m,
-        draw_sample_splitting=False,
-        **input_args
-    )
+    dml_qte_obj_ext_smpls = dml.DoubleMLQTE(obj_dml_data, ml_g, ml_m, draw_sample_splitting=False, **input_args)
     dml_qte_obj_ext_smpls.set_sample_splitting(dml_qte_obj.smpls)
     np.random.seed(42)
     dml_qte_obj_ext_smpls.fit()
@@ -87,95 +74,103 @@ def dml_qte_fixture(generate_data_quantiles, learner, normalize_ipw, kde):
     np.random.seed(42)
     n_obs = len(y)
     all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d)
-    res_manual = fit_qte(y, x, d, quantiles, ml_g, ml_g, all_smpls,
-                         n_rep=n_rep,
-                         normalize_ipw=normalize_ipw,
-                         trimming_rule='truncate', trimming_threshold=1e-12, kde=kde,
-                         draw_sample_splitting=True)
+    res_manual = fit_qte(
+        y,
+        x,
+        d,
+        quantiles,
+        ml_g,
+        ml_g,
+        all_smpls,
+        n_rep=n_rep,
+        normalize_ipw=normalize_ipw,
+        trimming_rule="truncate",
+        trimming_threshold=1e-12,
+        kde=kde,
+        draw_sample_splitting=True,
+    )
 
     ci = dml_qte_obj.confint(joint=False, level=0.95)
-    ci_manual = confint_manual(res_manual['qte'], res_manual['se'], quantiles,
-                               boot_t_stat=None, joint=False, level=0.95)
-    res_dict = {'coef': dml_qte_obj.coef,
-                'coef_manual': res_manual['qte'],
-                'coef_ext_smpls': dml_qte_obj_ext_smpls.coef,
-                'se': dml_qte_obj.se,
-                'se_manual': res_manual['se'],
-                'se_ext_smpls': dml_qte_obj_ext_smpls.se,
-                'boot_methods': boot_methods,
-                'ci': ci.to_numpy(),
-                'ci_manual': ci_manual.to_numpy(),
-                'qte_model': dml_qte_obj,
-                'unfitted_qte_model': unfitted_qte_model}
+    ci_manual = confint_manual(res_manual["qte"], res_manual["se"], quantiles, boot_t_stat=None, joint=False, level=0.95)
+    res_dict = {
+        "coef": dml_qte_obj.coef,
+        "coef_manual": res_manual["qte"],
+        "coef_ext_smpls": dml_qte_obj_ext_smpls.coef,
+        "se": dml_qte_obj.se,
+        "se_manual": res_manual["se"],
+        "se_ext_smpls": dml_qte_obj_ext_smpls.se,
+        "boot_methods": boot_methods,
+        "ci": ci.to_numpy(),
+        "ci_manual": ci_manual.to_numpy(),
+        "qte_model": dml_qte_obj,
+        "unfitted_qte_model": unfitted_qte_model,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(42)
-        boot_t_stat = boot_qte(res_manual['scaled_scores'], res_manual['ses'], quantiles,
-                               all_smpls, n_rep, bootstrap, n_rep_boot)
+        boot_t_stat = boot_qte(
+            res_manual["scaled_scores"], res_manual["ses"], quantiles, all_smpls, n_rep, bootstrap, n_rep_boot
+        )
 
         np.random.seed(42)
         dml_qte_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
 
-        res_dict['boot_t_stat_' + bootstrap] = dml_qte_obj.boot_t_stat
-        res_dict['boot_t_stat_' + bootstrap + '_manual'] = boot_t_stat
+        res_dict["boot_t_stat_" + bootstrap] = dml_qte_obj.boot_t_stat
+        res_dict["boot_t_stat_" + bootstrap + "_manual"] = boot_t_stat
 
         ci = dml_qte_obj.confint(joint=True, level=0.95)
-        ci_manual = confint_manual(res_manual['qte'], res_manual['se'], quantiles,
-                                   boot_t_stat=boot_t_stat, joint=True, level=0.95)
-        res_dict['boot_ci_' + bootstrap] = ci.to_numpy()
-        res_dict['boot_ci_' + bootstrap + '_manual'] = ci_manual.to_numpy()
+        ci_manual = confint_manual(
+            res_manual["qte"], res_manual["se"], quantiles, boot_t_stat=boot_t_stat, joint=True, level=0.95
+        )
+        res_dict["boot_ci_" + bootstrap] = ci.to_numpy()
+        res_dict["boot_ci_" + bootstrap + "_manual"] = ci_manual.to_numpy()
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_qte_coef(dml_qte_fixture):
-    assert np.allclose(dml_qte_fixture['coef'],
-                       dml_qte_fixture['coef_manual'],
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_qte_fixture['coef'],
-                       dml_qte_fixture['coef_ext_smpls'],
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_qte_fixture["coef"], dml_qte_fixture["coef_manual"], rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_qte_fixture["coef"], dml_qte_fixture["coef_ext_smpls"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_qte_se(dml_qte_fixture):
-    assert np.allclose(dml_qte_fixture['se'],
-                       dml_qte_fixture['se_manual'],
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_qte_fixture['se'],
-                       dml_qte_fixture['se_ext_smpls'],
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_qte_fixture["se"], dml_qte_fixture["se_manual"], rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_qte_fixture["se"], dml_qte_fixture["se_ext_smpls"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_qte_boot(dml_qte_fixture):
-    for bootstrap in dml_qte_fixture['boot_methods']:
-        assert np.allclose(dml_qte_fixture['boot_t_stat_' + bootstrap],
-                           dml_qte_fixture['boot_t_stat_' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_qte_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_qte_fixture["boot_t_stat_" + bootstrap],
+            dml_qte_fixture["boot_t_stat_" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
 
 
 @pytest.mark.ci
 def test_dml_qte_ci(dml_qte_fixture):
-    assert np.allclose(dml_qte_fixture['ci'],
-                       dml_qte_fixture['ci_manual'],
-                       rtol=1e-9, atol=1e-4)
-    for bootstrap in dml_qte_fixture['boot_methods']:
-        assert np.allclose(dml_qte_fixture['boot_ci_' + bootstrap],
-                           dml_qte_fixture['boot_ci_' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_qte_fixture["ci"], dml_qte_fixture["ci_manual"], rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_qte_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_qte_fixture["boot_ci_" + bootstrap], dml_qte_fixture["boot_ci_" + bootstrap + "_manual"], rtol=1e-9, atol=1e-4
+        )
 
 
 @pytest.mark.ci
 def test_doubleml_qte_exceptions():
     np.random.seed(42)
-    (x, y, d) = make_irm_data(1000, 5, 2, return_type='array')
+    (x, y, d) = make_irm_data(1000, 5, 2, return_type="array")
     obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d)
     ml_g = RandomForestClassifier(n_estimators=20)
     ml_m = RandomForestClassifier(n_estimators=20)
 
-    msg = ('Sample splitting not specified. '
-           r'Either draw samples via .draw_sample splitting\(\) or set external samples via .set_sample_splitting\(\).')
+    msg = (
+        "Sample splitting not specified. "
+        r"Either draw samples via .draw_sample splitting\(\) or set external samples via .set_sample_splitting\(\)."
+    )
     with pytest.raises(ValueError, match=msg):
         dml_obj = dml.DoubleMLQTE(obj_dml_data, ml_g, ml_m, draw_sample_splitting=False)
         _ = dml_obj.smpls
@@ -183,8 +178,8 @@ def test_doubleml_qte_exceptions():
 
 @pytest.mark.ci
 def test_doubleml_qte_return_types(dml_qte_fixture):
-    assert isinstance(dml_qte_fixture['qte_model'].__str__(), str)
-    assert isinstance(dml_qte_fixture['qte_model'].summary, pd.DataFrame)
+    assert isinstance(dml_qte_fixture["qte_model"].__str__(), str)
+    assert isinstance(dml_qte_fixture["qte_model"].summary, pd.DataFrame)
 
-    assert dml_qte_fixture['qte_model'].all_coef.shape == (n_quantiles, n_rep)
-    assert isinstance(dml_qte_fixture['unfitted_qte_model'].summary, pd.DataFrame)
+    assert dml_qte_fixture["qte_model"].all_coef.shape == (n_quantiles, n_rep)
+    assert isinstance(dml_qte_fixture["unfitted_qte_model"].summary, pd.DataFrame)
diff --git a/doubleml/irm/tests/test_qte_exceptions.py b/doubleml/irm/tests/test_qte_exceptions.py
index 0aca62a80..32193c30a 100644
--- a/doubleml/irm/tests/test_qte_exceptions.py
+++ b/doubleml/irm/tests/test_qte_exceptions.py
@@ -1,14 +1,13 @@
-import pytest
 import numpy as np
 import pandas as pd
+import pytest
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import Lasso, LogisticRegression
 
-from doubleml import DoubleMLQTE, DoubleMLData
+from doubleml import DoubleMLData, DoubleMLQTE
 from doubleml.datasets import make_irm_data
 from doubleml.double_ml_data import DoubleMLBaseData
 
-from sklearn.linear_model import Lasso, LogisticRegression
-from sklearn.ensemble import RandomForestClassifier
-
 np.random.seed(42)
 n = 100
 dml_data_irm = make_irm_data(n_obs=n)
@@ -17,8 +16,7 @@
 
 
 class DummyDataClass(DoubleMLBaseData):
-    def __init__(self,
-                 data):
+    def __init__(self, data):
         DoubleMLBaseData.__init__(self, data)
 
     @property
@@ -28,56 +26,58 @@ def n_coefs(self):
 
 @pytest.mark.ci
 def test_exception_data():
-    msg = 'The data must be of DoubleMLData type.'
+    msg = "The data must be of DoubleMLData type."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLQTE(DummyDataClass(pd.DataFrame(np.zeros((100, 10)))), ml_g, ml_m)
 
-    msg = ('Incompatible data. To fit an PQ model with DML exactly one binary variable with values 0 and 1 '
-           'needs to be specified as treatment variable.')
+    msg = (
+        "Incompatible data. To fit an PQ model with DML exactly one binary variable with values 0 and 1 "
+        "needs to be specified as treatment variable."
+    )
     df_irm = dml_data_irm.data.copy()
-    df_irm['d'] = df_irm['d'] * 2
+    df_irm["d"] = df_irm["d"] * 2
     with pytest.raises(ValueError, match=msg):
         # non-binary D for QTE
-        _ = DoubleMLQTE(DoubleMLData(df_irm, 'y', 'd'),
-                        LogisticRegression(), LogisticRegression())
+        _ = DoubleMLQTE(DoubleMLData(df_irm, "y", "d"), LogisticRegression(), LogisticRegression())
     df_irm = dml_data_irm.data.copy()
     with pytest.raises(ValueError, match=msg):
         # multiple D for QTE
-        _ = DoubleMLQTE(DoubleMLData(df_irm, 'y', ['d', 'X1']),
-                        LogisticRegression(), LogisticRegression())
+        _ = DoubleMLQTE(DoubleMLData(df_irm, "y", ["d", "X1"]), LogisticRegression(), LogisticRegression())
 
 
 @pytest.mark.ci
 def test_exception_score():
     # QTE
-    msg = 'Invalid score IV. Valid score PQ or LPQ or CVaR.'
+    msg = "Invalid score IV. Valid score PQ or LPQ or CVaR."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(), score='IV')
-    msg = 'score should be a string. 2 was passed.'
+        _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(), score="IV")
+    msg = "score should be a string. 2 was passed."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(), score=2)
 
 
 @pytest.mark.ci
 def test_exception_trimming_rule():
-    msg = 'Invalid trimming_rule discard. Valid trimming_rule truncate.'
+    msg = "Invalid trimming_rule discard. Valid trimming_rule truncate."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule='discard')
+        _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="discard")
 
     msg = "trimming_threshold has to be a float. Object of type <class 'str'> passed."
     with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(),
-                        trimming_rule='truncate', trimming_threshold="0.1")
+        _ = DoubleMLQTE(
+            dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="truncate", trimming_threshold="0.1"
+        )
 
-    msg = 'Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5.'
+    msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(),
-                        trimming_rule='truncate', trimming_threshold=0.6)
+        _ = DoubleMLQTE(
+            dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="truncate", trimming_threshold=0.6
+        )
 
 
 @pytest.mark.ci
 def test_exception_quantiles():
-    msg = r'Quantiles have be between 0 or 1. Quantiles \[0.2 2. \] passed.'
+    msg = r"Quantiles have be between 0 or 1. Quantiles \[0.2 2. \] passed."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLQTE(dml_data_irm, ml_g, ml_m, quantiles=[0.2, 2])
 
@@ -92,18 +92,18 @@ def test_exception_ipw_normalization():
 @pytest.mark.ci
 def test_exception_bootstrap():
     dml_qte_boot = DoubleMLQTE(dml_data_irm, RandomForestClassifier(), RandomForestClassifier())
-    msg = r'Apply fit\(\) before bootstrap\(\).'
+    msg = r"Apply fit\(\) before bootstrap\(\)."
     with pytest.raises(ValueError, match=msg):
         dml_qte_boot.bootstrap()
 
     dml_qte_boot.fit()
     msg = 'Method must be "Bayes", "normal" or "wild". Got Gaussian.'
     with pytest.raises(ValueError, match=msg):
-        dml_qte_boot.bootstrap(method='Gaussian')
+        dml_qte_boot.bootstrap(method="Gaussian")
     msg = "The number of bootstrap replications must be of int type. 500 of type <class 'str'> was passed."
     with pytest.raises(TypeError, match=msg):
-        dml_qte_boot.bootstrap(n_rep_boot='500')
-    msg = 'The number of bootstrap replications must be positive. 0 was passed.'
+        dml_qte_boot.bootstrap(n_rep_boot="500")
+    msg = "The number of bootstrap replications must be positive. 0 was passed."
     with pytest.raises(ValueError, match=msg):
         dml_qte_boot.bootstrap(n_rep_boot=0)
 
@@ -113,21 +113,21 @@ def test_doubleml_exception_confint():
     dml_qte_confint = DoubleMLQTE(dml_data_irm, RandomForestClassifier(), RandomForestClassifier())
     dml_qte_confint.fit()
 
-    msg = 'joint must be True or False. Got 1.'
+    msg = "joint must be True or False. Got 1."
     with pytest.raises(TypeError, match=msg):
         dml_qte_confint.confint(joint=1)
     msg = "The confidence level must be of float type. 5% of type <class 'str'> was passed."
     with pytest.raises(TypeError, match=msg):
-        dml_qte_confint.confint(level='5%')
-    msg = r'The confidence level must be in \(0,1\). 0.0 was passed.'
+        dml_qte_confint.confint(level="5%")
+    msg = r"The confidence level must be in \(0,1\). 0.0 was passed."
     with pytest.raises(ValueError, match=msg):
-        dml_qte_confint.confint(level=0.)
+        dml_qte_confint.confint(level=0.0)
 
     dml_qte_confint_not_fitted = DoubleMLQTE(dml_data_irm, RandomForestClassifier(), RandomForestClassifier())
-    msg = r'Apply fit\(\) before confint\(\).'
+    msg = r"Apply fit\(\) before confint\(\)."
     with pytest.raises(ValueError, match=msg):
         dml_qte_confint_not_fitted.confint()
-    msg = r'Apply bootstrap\(\) before confint\(joint=True\).'
+    msg = r"Apply bootstrap\(\) before confint\(joint=True\)."
     with pytest.raises(ValueError, match=msg):
         dml_qte_confint.confint(joint=True)
     dml_qte_confint.bootstrap()
@@ -139,15 +139,15 @@ def test_doubleml_exception_confint():
 def test_doubleml_exception_p_adjust():
     dml_qte_p_adjust = DoubleMLQTE(dml_data_irm, RandomForestClassifier(), RandomForestClassifier())
 
-    msg = r'Apply fit\(\) before p_adjust\(\).'
+    msg = r"Apply fit\(\) before p_adjust\(\)."
     with pytest.raises(ValueError, match=msg):
         dml_qte_p_adjust.p_adjust()
     dml_qte_p_adjust.fit()
     msg = r'Apply bootstrap\(\) before p_adjust\("romano-wolf"\).'
     with pytest.raises(ValueError, match=msg):
-        dml_qte_p_adjust.p_adjust(method='romano-wolf')
+        dml_qte_p_adjust.p_adjust(method="romano-wolf")
     dml_qte_p_adjust.bootstrap()
-    p_val = dml_qte_p_adjust.p_adjust(method='romano-wolf')
+    p_val = dml_qte_p_adjust.p_adjust(method="romano-wolf")
     assert isinstance(p_val, pd.DataFrame)
 
     msg = "The p_adjust method must be of str type. 0.05 of type <class 'float'> was passed."
diff --git a/doubleml/irm/tests/test_ssm.py b/doubleml/irm/tests/test_ssm.py
index 1419b450f..b157794b8 100644
--- a/doubleml/irm/tests/test_ssm.py
+++ b/doubleml/irm/tests/test_ssm.py
@@ -1,9 +1,8 @@
-import pytest
 import math
-import numpy as np
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-
 from sklearn.linear_model import LassoCV, LogisticRegressionCV
 
 import doubleml as dml
@@ -12,40 +11,35 @@
 from ._utils_ssm_manual import fit_selection
 
 
-@pytest.fixture(scope='module',
-                params=[[LassoCV(),
-                         LogisticRegressionCV(penalty='l1', solver='liblinear')]])
+@pytest.fixture(scope="module", params=[[LassoCV(), LogisticRegressionCV(penalty="l1", solver="liblinear")]])
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['missing-at-random', 'nonignorable'])
+@pytest.fixture(scope="module", params=["missing-at-random", "nonignorable"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.01])
+@pytest.fixture(scope="module", params=[0.01])
 def trimming_threshold(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
-def dml_selection_fixture(generate_data_selection_mar, generate_data_selection_nonignorable,
-                          learner, score,
-                          trimming_threshold, normalize_ipw):
+@pytest.fixture(scope="module")
+def dml_selection_fixture(
+    generate_data_selection_mar, generate_data_selection_nonignorable, learner, score, trimming_threshold, normalize_ipw
+):
     n_folds = 3
 
     # collect data
     np.random.seed(42)
-    if score == 'missing-at-random':
+    if score == "missing-at-random":
         (x, y, d, z, s) = generate_data_selection_mar
     else:
         (x, y, d, z, s) = generate_data_selection_nonignorable
@@ -59,36 +53,41 @@ def dml_selection_fixture(generate_data_selection_mar, generate_data_selection_n
     all_smpls = draw_smpls(n_obs, n_folds)
 
     np.random.seed(42)
-    if score == 'missing-at-random':
+    if score == "missing-at-random":
         obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, z=None, s=s)
-        dml_sel_obj = dml.DoubleMLSSM(obj_dml_data,
-                                      ml_g, ml_pi, ml_m,
-                                      n_folds=n_folds,
-                                      score=score)
+        dml_sel_obj = dml.DoubleMLSSM(obj_dml_data, ml_g, ml_pi, ml_m, n_folds=n_folds, score=score)
     else:
-        assert score == 'nonignorable'
+        assert score == "nonignorable"
         obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, z=z, s=s)
-        dml_sel_obj = dml.DoubleMLSSM(obj_dml_data,
-                                      ml_g, ml_pi, ml_m,
-                                      n_folds=n_folds,
-                                      score=score)
+        dml_sel_obj = dml.DoubleMLSSM(obj_dml_data, ml_g, ml_pi, ml_m, n_folds=n_folds, score=score)
 
     np.random.seed(42)
     dml_sel_obj.set_sample_splitting(all_smpls=all_smpls)
     dml_sel_obj.fit()
 
     np.random.seed(42)
-    res_manual = fit_selection(y, x, d, z, s,
-                               clone(learner[0]), clone(learner[1]), clone(learner[1]),
-                               all_smpls, score,
-                               trimming_rule='truncate',
-                               trimming_threshold=trimming_threshold,
-                               normalize_ipw=normalize_ipw)
-
-    res_dict = {'coef': dml_sel_obj.coef[0],
-                'coef_manual': res_manual['theta'],
-                'se': dml_sel_obj.se[0],
-                'se_manual': res_manual['se']}
+    res_manual = fit_selection(
+        y,
+        x,
+        d,
+        z,
+        s,
+        clone(learner[0]),
+        clone(learner[1]),
+        clone(learner[1]),
+        all_smpls,
+        score,
+        trimming_rule="truncate",
+        trimming_threshold=trimming_threshold,
+        normalize_ipw=normalize_ipw,
+    )
+
+    res_dict = {
+        "coef": dml_sel_obj.coef[0],
+        "coef_manual": res_manual["theta"],
+        "se": dml_sel_obj.se[0],
+        "se_manual": res_manual["se"],
+    }
 
     # sensitivity tests
     # TODO
@@ -98,13 +97,9 @@ def dml_selection_fixture(generate_data_selection_mar, generate_data_selection_n
 
 @pytest.mark.ci
 def test_dml_selection_coef(dml_selection_fixture):
-    assert math.isclose(dml_selection_fixture['coef'],
-                        dml_selection_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-2)
+    assert math.isclose(dml_selection_fixture["coef"], dml_selection_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-2)
 
 
 @pytest.mark.ci
 def test_dml_selection_se(dml_selection_fixture):
-    assert math.isclose(dml_selection_fixture['se'],
-                        dml_selection_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=5e-2)
+    assert math.isclose(dml_selection_fixture["se"], dml_selection_fixture["se_manual"], rel_tol=1e-9, abs_tol=5e-2)
diff --git a/doubleml/irm/tests/test_ssm_exceptions.py b/doubleml/irm/tests/test_ssm_exceptions.py
index aed1d6bf4..d2de330ba 100644
--- a/doubleml/irm/tests/test_ssm_exceptions.py
+++ b/doubleml/irm/tests/test_ssm_exceptions.py
@@ -1,14 +1,13 @@
-import pytest
-import pandas as pd
 import numpy as np
+import pandas as pd
+import pytest
+from sklearn.base import BaseEstimator
+from sklearn.linear_model import Lasso, LogisticRegression
 
 from doubleml import DoubleMLSSM
 from doubleml.datasets import make_ssm_data
 from doubleml.double_ml_data import DoubleMLBaseData
 
-from sklearn.linear_model import Lasso, LogisticRegression
-from sklearn.base import BaseEstimator
-
 np.random.seed(3141)
 n = 100
 dml_data_mar = make_ssm_data(n_obs=n, mar=True)
@@ -17,12 +16,11 @@
 ml_pi = LogisticRegression()
 ml_m = LogisticRegression()
 dml_ssm_mar = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m)
-dml_ssm_nonignorable = DoubleMLSSM(dml_data_nonignorable, ml_g, ml_pi, ml_m, score='nonignorable')
+dml_ssm_nonignorable = DoubleMLSSM(dml_data_nonignorable, ml_g, ml_pi, ml_m, score="nonignorable")
 
 
 class DummyDataClass(DoubleMLBaseData):
-    def __init__(self,
-                 data):
+    def __init__(self, data):
         DoubleMLBaseData.__init__(self, data)
 
     @property
@@ -32,49 +30,47 @@ def n_coefs(self):
 
 @pytest.mark.ci
 def test_ssm_exception_data():
-    msg = 'The data must be of DoubleMLData or DoubleMLClusterData type.'
+    msg = "The data must be of DoubleMLData or DoubleMLClusterData type."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLSSM(pd.DataFrame(), ml_g, ml_pi, ml_m)
 
-    msg = 'The data must be of DoubleMLData type.'
+    msg = "The data must be of DoubleMLData type."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLSSM(DummyDataClass(pd.DataFrame(np.zeros((100, 10)))), ml_g, ml_pi, ml_m)
 
     # Nonignorable nonresponse without instrument
-    msg = ('Sample selection by nonignorable nonresponse was set but instrumental variable \
+    msg = "Sample selection by nonignorable nonresponse was set but instrumental variable \
                              is None. To estimate treatment effect under nonignorable nonresponse, \
-                             specify an instrument for the selection variable.')
+                             specify an instrument for the selection variable."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLSSM(dml_data_mar, Lasso(), LogisticRegression(), LogisticRegression(), score='nonignorable')
+        _ = DoubleMLSSM(dml_data_mar, Lasso(), LogisticRegression(), LogisticRegression(), score="nonignorable")
 
 
 @pytest.mark.ci
 def test_ssm_exception_scores():
     # MAR
-    msg = 'Invalid score MAR. Valid score missing-at-random or nonignorable.'
+    msg = "Invalid score MAR. Valid score missing-at-random or nonignorable."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, score='MAR')
-    msg = 'score should be either a string or a callable. 0 was passed.'
+        _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, score="MAR")
+    msg = "score should be either a string or a callable. 0 was passed."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, score=0)
 
 
 @pytest.mark.ci
 def test_ssm_exception_trimming_rule():
-    msg = 'Invalid trimming_rule discard. Valid trimming_rule truncate.'
+    msg = "Invalid trimming_rule discard. Valid trimming_rule truncate."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, trimming_rule='discard')
+        _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, trimming_rule="discard")
 
     # check the trimming_threshold exceptions
     msg = "trimming_threshold has to be a float. Object of type <class 'str'> passed."
     with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m,
-                        trimming_rule='truncate', trimming_threshold="0.1")
+        _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, trimming_rule="truncate", trimming_threshold="0.1")
 
-    msg = 'Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5.'
+    msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m,
-                        trimming_rule='truncate', trimming_threshold=0.6)
+        _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, trimming_rule="truncate", trimming_threshold=0.6)
 
 
 @pytest.mark.ci
@@ -89,32 +85,33 @@ def test_ssm_exception_resampling():
     msg = "The number of folds must be of int type. 1.5 of type <class 'float'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, n_folds=1.5)
-    msg = ('The number of repetitions for the sample splitting must be of int type. '
-           "1.5 of type <class 'float'> was passed.")
+    msg = "The number of repetitions for the sample splitting must be of int type. 1.5 of type <class 'float'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, n_rep=1.5)
-    msg = 'The number of folds must be positive. 0 was passed.'
+    msg = "The number of folds must be positive. 0 was passed."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, n_folds=0)
-    msg = 'The number of repetitions for the sample splitting must be positive. 0 was passed.'
+    msg = "The number of repetitions for the sample splitting must be positive. 0 was passed."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, n_rep=0)
-    msg = 'draw_sample_splitting must be True or False. Got true.'
+    msg = "draw_sample_splitting must be True or False. Got true."
     with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, draw_sample_splitting='true')
+        _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, draw_sample_splitting="true")
 
 
 @pytest.mark.ci
 def test_ssm_exception_get_params():
-    msg = 'Invalid nuisance learner ml_r. Valid nuisance learner ml_g_d0 or ml_g_d1 or ml_pi or ml_m.'
+    msg = "Invalid nuisance learner ml_r. Valid nuisance learner ml_g_d0 or ml_g_d1 or ml_pi or ml_m."
     with pytest.raises(ValueError, match=msg):
-        dml_ssm_mar.get_params('ml_r')
+        dml_ssm_mar.get_params("ml_r")
 
 
 @pytest.mark.ci
 def test_ssm_exception_smpls():
-    msg = ('Sample splitting not specified. '
-           r'Either draw samples via .draw_sample splitting\(\) or set external samples via .set_sample_splitting\(\).')
+    msg = (
+        "Sample splitting not specified. "
+        r"Either draw samples via .draw_sample splitting\(\) or set external samples via .set_sample_splitting\(\)."
+    )
     dml_plr_no_smpls = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, draw_sample_splitting=False)
     with pytest.raises(ValueError, match=msg):
         _ = dml_plr_no_smpls.smpls
@@ -124,11 +121,11 @@ def test_ssm_exception_smpls():
 def test_ssm_exception_fit():
     msg = "The number of CPUs used to fit the learners must be of int type. 5 of type <class 'str'> was passed."
     with pytest.raises(TypeError, match=msg):
-        dml_ssm_mar.fit(n_jobs_cv='5')
-    msg = 'store_predictions must be True or False. Got 1.'
+        dml_ssm_mar.fit(n_jobs_cv="5")
+    msg = "store_predictions must be True or False. Got 1."
     with pytest.raises(TypeError, match=msg):
         dml_ssm_mar.fit(store_predictions=1)
-    msg = 'store_models must be True or False. Got 1.'
+    msg = "store_models must be True or False. Got 1."
     with pytest.raises(TypeError, match=msg):
         dml_ssm_mar.fit(store_models=1)
 
@@ -136,18 +133,18 @@ def test_ssm_exception_fit():
 @pytest.mark.ci
 def test_ssm_exception_bootstrap():
     dml_ssm_boot = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m)
-    msg = r'Apply fit\(\) before bootstrap\(\).'
+    msg = r"Apply fit\(\) before bootstrap\(\)."
     with pytest.raises(ValueError, match=msg):
         dml_ssm_boot.bootstrap()
 
     dml_ssm_boot.fit()
     msg = 'Method must be "Bayes", "normal" or "wild". Got Gaussian.'
     with pytest.raises(ValueError, match=msg):
-        dml_ssm_boot.bootstrap(method='Gaussian')
+        dml_ssm_boot.bootstrap(method="Gaussian")
     msg = "The number of bootstrap replications must be of int type. 500 of type <class 'str'> was passed."
     with pytest.raises(TypeError, match=msg):
-        dml_ssm_boot.bootstrap(n_rep_boot='500')
-    msg = 'The number of bootstrap replications must be positive. 0 was passed.'
+        dml_ssm_boot.bootstrap(n_rep_boot="500")
+    msg = "The number of bootstrap replications must be positive. 0 was passed."
     with pytest.raises(ValueError, match=msg):
         dml_ssm_boot.bootstrap(n_rep_boot=0)
 
@@ -155,22 +152,22 @@ def test_ssm_exception_bootstrap():
 @pytest.mark.ci
 def test_ssm_exception_confint():
     dml_ssm_confint = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m)
-    msg = r'Apply fit\(\) before confint\(\).'
+    msg = r"Apply fit\(\) before confint\(\)."
     with pytest.raises(ValueError, match=msg):
         dml_ssm_confint.confint()
     dml_ssm_confint.fit()
 
-    msg = 'joint must be True or False. Got 1.'
+    msg = "joint must be True or False. Got 1."
     with pytest.raises(TypeError, match=msg):
         dml_ssm_confint.confint(joint=1)
     msg = "The confidence level must be of float type. 5% of type <class 'str'> was passed."
     with pytest.raises(TypeError, match=msg):
-        dml_ssm_confint.confint(level='5%')
-    msg = r'The confidence level must be in \(0,1\). 0.0 was passed.'
+        dml_ssm_confint.confint(level="5%")
+    msg = r"The confidence level must be in \(0,1\). 0.0 was passed."
     with pytest.raises(ValueError, match=msg):
-        dml_ssm_confint.confint(level=0.)
+        dml_ssm_confint.confint(level=0.0)
 
-    msg = r'Apply bootstrap\(\) before confint\(joint=True\).'
+    msg = r"Apply bootstrap\(\) before confint\(joint=True\)."
     with pytest.raises(ValueError, match=msg):
         dml_ssm_confint.confint(joint=True)
     dml_ssm_confint.bootstrap()
@@ -180,12 +177,12 @@ def test_ssm_exception_confint():
 
 @pytest.mark.ci
 def test_ssm_exception_set_ml_nuisance_params():
-    msg = 'Invalid nuisance learner g. Valid nuisance learner ml_g_d0 or ml_g_d1 or ml_pi or ml_m.'
+    msg = "Invalid nuisance learner g. Valid nuisance learner ml_g_d0 or ml_g_d1 or ml_pi or ml_m."
     with pytest.raises(ValueError, match=msg):
-        dml_ssm_mar.set_ml_nuisance_params('g', 'd', {'alpha': 0.1})
-    msg = 'Invalid treatment variable y. Valid treatment variable d.'
+        dml_ssm_mar.set_ml_nuisance_params("g", "d", {"alpha": 0.1})
+    msg = "Invalid treatment variable y. Valid treatment variable d."
     with pytest.raises(ValueError, match=msg):
-        dml_ssm_mar.set_ml_nuisance_params('ml_g_d0', 'y', {'alpha': 0.1})
+        dml_ssm_mar.set_ml_nuisance_params("ml_g_d0", "y", {"alpha": 0.1})
 
 
 class _DummyNoSetParams:
@@ -211,24 +208,26 @@ def predict_proba(self):
     r"ignore:.*is \(probably\) neither a regressor nor a classifier.*:UserWarning",
 )
 def test_ssm_exception_learner():
-    err_msg_prefix = 'Invalid learner provided for ml_g: '
+    err_msg_prefix = "Invalid learner provided for ml_g: "
 
-    msg = err_msg_prefix + 'provide an instance of a learner instead of a class.'
+    msg = err_msg_prefix + "provide an instance of a learner instead of a class."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLSSM(dml_data_mar, Lasso, ml_pi, ml_m)
-    msg = err_msg_prefix + r'BaseEstimator\(\) has no method .fit\(\).'
+    msg = err_msg_prefix + r"BaseEstimator\(\) has no method .fit\(\)."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLSSM(dml_data_mar, BaseEstimator(), ml_pi, ml_m)
-    msg = r'has no method .set_params\(\).'
+    msg = r"has no method .set_params\(\)."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLSSM(dml_data_mar, _DummyNoSetParams(), ml_pi, ml_m)
-    msg = r'has no method .get_params\(\).'
+    msg = r"has no method .get_params\(\)."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLSSM(dml_data_mar, _DummyNoGetParams(), ml_pi, ml_m)
 
     # allow classifiers for ml_g, but only for binary outcome
-    msg = (r'The ml_g learner LogisticRegression\(\) was identified as classifier '
-           'but the outcome is not binary with values 0 and 1.')
+    msg = (
+        r"The ml_g learner LogisticRegression\(\) was identified as classifier "
+        "but the outcome is not binary with values 0 and 1."
+    )
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLSSM(dml_data_mar, LogisticRegression(), ml_pi, ml_m)
 
@@ -236,7 +235,7 @@ def test_ssm_exception_learner():
     # it then predicts labels and therefore an exception will be thrown
     log_reg = LogisticRegression()
     log_reg._estimator_type = None
-    msg = (r'Learner provided for ml_m is probably invalid: LogisticRegression\(\) is \(probably\) no classifier.')
+    msg = r"Learner provided for ml_m is probably invalid: LogisticRegression\(\) is \(probably\) no classifier."
     with pytest.warns(UserWarning, match=msg):
         _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, log_reg)
 
@@ -245,16 +244,16 @@ def test_ssm_exception_learner():
 @pytest.mark.filterwarnings(
     r"ignore:.*is \(probably\) neither a regressor nor a classifier.*:UserWarning",
     r"ignore: Learner provided for ml_m is probably invalid.*is \(probably\) no classifier.*:UserWarning",
-    r"ignore: Learner provided for l_pi is probably invalid.*is \(probably\) no classifier.*:UserWarning"
+    r"ignore: Learner provided for ml_pi is probably invalid.*is \(probably\) no classifier.*:UserWarning",
 )
 def test_ssm_exception_and_warning_learner():
     # msg = err_msg_prefix + r'_DummyNoClassifier\(\) has no method .predict\(\).'
     with pytest.raises(TypeError):
         _ = DoubleMLSSM(dml_data_mar, _DummyNoClassifier(), ml_pi, ml_m)
-    msg = 'Invalid learner provided for ml_pi: ' + r'Lasso\(\) has no method .predict_proba\(\).'
+    msg = "Invalid learner provided for ml_pi: " + r"Lasso\(\) has no method .predict_proba\(\)."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLSSM(dml_data_mar, Lasso(), Lasso(), ml_m)
-    msg = 'Invalid learner provided for ml_m: ' + r'Lasso\(\) has no method .predict_proba\(\).'
+    msg = "Invalid learner provided for ml_m: " + r"Lasso\(\) has no method .predict_proba\(\)."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLSSM(dml_data_mar, Lasso(), ml_pi, Lasso())
 
@@ -277,25 +276,27 @@ def predict(self, X):
 
 @pytest.mark.ci
 def test_ssm_nan_prediction():
-    msg = r'Predictions from learner LassoWithNanPred\(\) for ml_g_d1 are not finite.'
+    msg = r"Predictions from learner LassoWithNanPred\(\) for ml_g_d1 are not finite."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLSSM(dml_data_mar, LassoWithNanPred(), ml_pi, ml_m).fit()
-    msg = r'Predictions from learner LassoWithInfPred\(\) for ml_g_d1 are not finite.'
+    msg = r"Predictions from learner LassoWithInfPred\(\) for ml_g_d1 are not finite."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLSSM(dml_data_mar, LassoWithInfPred(), ml_pi, ml_m).fit()
 
 
 @pytest.mark.ci
 def test_double_ml_exception_evaluate_learner():
-    dml_ssm_obj = DoubleMLSSM(dml_data_mar,
-                              ml_g=Lasso(),
-                              ml_pi=LogisticRegression(),
-                              ml_m=LogisticRegression(),
-                              trimming_threshold=0.05,
-                              n_folds=5,
-                              score='missing-at-random')
-
-    msg = r'Apply fit\(\) before evaluate_learners\(\).'
+    dml_ssm_obj = DoubleMLSSM(
+        dml_data_mar,
+        ml_g=Lasso(),
+        ml_pi=LogisticRegression(),
+        ml_m=LogisticRegression(),
+        trimming_threshold=0.05,
+        n_folds=5,
+        score="missing-at-random",
+    )
+
+    msg = r"Apply fit\(\) before evaluate_learners\(\)."
     with pytest.raises(ValueError, match=msg):
         dml_ssm_obj.evaluate_learners()
 
@@ -305,14 +306,17 @@ def test_double_ml_exception_evaluate_learner():
     with pytest.raises(TypeError, match=msg):
         dml_ssm_obj.evaluate_learners(metric="mse")
 
-    msg = (r"The learners have to be a subset of \['ml_g_d0', 'ml_g_d1', 'ml_pi', 'ml_m'\]. "
-           r"Learners \['ml_mu', 'ml_p'\] provided.")
+    msg = (
+        r"The learners have to be a subset of \['ml_g_d0', 'ml_g_d1', 'ml_pi', 'ml_m'\]. "
+        r"Learners \['ml_mu', 'ml_p'\] provided."
+    )
     with pytest.raises(ValueError, match=msg):
-        dml_ssm_obj.evaluate_learners(learners=['ml_mu', 'ml_p'])
+        dml_ssm_obj.evaluate_learners(learners=["ml_mu", "ml_p"])
 
-    msg = 'Evaluation from learner ml_g_d0 is not finite.'
+    msg = "Evaluation from learner ml_g_d0 is not finite."
 
     def eval_fct(y_pred, y_true):
         return np.nan
+
     with pytest.raises(ValueError, match=msg):
         dml_ssm_obj.evaluate_learners(metric=eval_fct)
diff --git a/doubleml/irm/tests/test_ssm_tune.py b/doubleml/irm/tests/test_ssm_tune.py
index 1f9e93558..0fafbc134 100644
--- a/doubleml/irm/tests/test_ssm_tune.py
+++ b/doubleml/irm/tests/test_ssm_tune.py
@@ -1,11 +1,10 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-
-from sklearn.linear_model import LogisticRegression
 from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import LogisticRegression
 
 import doubleml as dml
 
@@ -13,58 +12,57 @@
 from ._utils_ssm_manual import fit_selection, tune_nuisance_ssm
 
 
-@pytest.fixture(scope='module',
-                params=[RandomForestRegressor(random_state=42)])
+@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)])
 def learner_g(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[LogisticRegression(random_state=42)])
+@pytest.fixture(scope="module", params=[LogisticRegression(random_state=42)])
 def learner_m(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['missing-at-random', 'nonignorable'])
+@pytest.fixture(scope="module", params=["missing-at-random", "nonignorable"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def tune_on_folds(request):
     return request.param
 
 
 def get_par_grid(learner):
     if learner.__class__ in [RandomForestRegressor]:
-        par_grid = {'n_estimators': [5, 10, 20]}
+        par_grid = {"n_estimators": [5, 10, 20]}
     else:
         assert learner.__class__ in [LogisticRegression]
-        par_grid = {'C': np.logspace(-2, 2, 10)}
+        par_grid = {"C": np.logspace(-2, 2, 10)}
     return par_grid
 
 
-@pytest.fixture(scope='module')
-def dml_ssm_fixture(generate_data_selection_mar, generate_data_selection_nonignorable,
-                    learner_g, learner_m, score,
-                    normalize_ipw, tune_on_folds):
-    par_grid = {'ml_g': get_par_grid(learner_g),
-                'ml_pi': get_par_grid(learner_m),
-                'ml_m': get_par_grid(learner_m)}
+@pytest.fixture(scope="module")
+def dml_ssm_fixture(
+    generate_data_selection_mar,
+    generate_data_selection_nonignorable,
+    learner_g,
+    learner_m,
+    score,
+    normalize_ipw,
+    tune_on_folds,
+):
+    par_grid = {"ml_g": get_par_grid(learner_g), "ml_pi": get_par_grid(learner_m), "ml_m": get_par_grid(learner_m)}
     n_folds_tune = 4
     n_folds = 2
 
     # collect data
     np.random.seed(42)
-    if score == 'missing-at-random':
+    if score == "missing-at-random":
         (x, y, d, z, s) = generate_data_selection_mar
     else:
         (x, y, d, z, s) = generate_data_selection_nonignorable
@@ -77,23 +75,31 @@ def dml_ssm_fixture(generate_data_selection_mar, generate_data_selection_nonigno
     ml_m = clone(learner_m)
 
     np.random.seed(42)
-    if score == 'missing-at-random':
+    if score == "missing-at-random":
         obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, z=None, s=s)
-        dml_sel_obj = dml.DoubleMLSSM(obj_dml_data,
-                                      ml_g, ml_pi, ml_m,
-                                      n_folds=n_folds,
-                                      score=score,
-                                      normalize_ipw=normalize_ipw,
-                                      draw_sample_splitting=False)
+        dml_sel_obj = dml.DoubleMLSSM(
+            obj_dml_data,
+            ml_g,
+            ml_pi,
+            ml_m,
+            n_folds=n_folds,
+            score=score,
+            normalize_ipw=normalize_ipw,
+            draw_sample_splitting=False,
+        )
     else:
-        assert score == 'nonignorable'
+        assert score == "nonignorable"
         obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, z=z, s=s)
-        dml_sel_obj = dml.DoubleMLSSM(obj_dml_data,
-                                      ml_g, ml_pi, ml_m,
-                                      n_folds=n_folds,
-                                      score=score,
-                                      normalize_ipw=normalize_ipw,
-                                      draw_sample_splitting=False)
+        dml_sel_obj = dml.DoubleMLSSM(
+            obj_dml_data,
+            ml_g,
+            ml_pi,
+            ml_m,
+            n_folds=n_folds,
+            score=score,
+            normalize_ipw=normalize_ipw,
+            draw_sample_splitting=False,
+        )
 
     # synchronize the sample splitting
     np.random.seed(42)
@@ -101,8 +107,7 @@ def dml_ssm_fixture(generate_data_selection_mar, generate_data_selection_nonigno
 
     np.random.seed(42)
     # tune hyperparameters
-    tune_res = dml_sel_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune,
-                                return_tune_res=False)
+    tune_res = dml_sel_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune, return_tune_res=False)
     assert isinstance(tune_res, dml.DoubleMLSSM)
 
     dml_sel_obj.fit()
@@ -111,18 +116,40 @@ def dml_ssm_fixture(generate_data_selection_mar, generate_data_selection_nonigno
     smpls = all_smpls[0]
     if tune_on_folds:
         g0_best_params, g1_best_params, pi_best_params, m_best_params = tune_nuisance_ssm(
-            y, x, d, z, s,
-            clone(learner_g), clone(learner_m), clone(learner_m),
-            smpls, score, n_folds_tune,
-            par_grid['ml_g'], par_grid['ml_pi'], par_grid['ml_m'])
+            y,
+            x,
+            d,
+            z,
+            s,
+            clone(learner_g),
+            clone(learner_m),
+            clone(learner_m),
+            smpls,
+            score,
+            n_folds_tune,
+            par_grid["ml_g"],
+            par_grid["ml_pi"],
+            par_grid["ml_m"],
+        )
 
     else:
         xx = [(np.arange(len(y)), np.array([]))]
         g0_best_params, g1_best_params, pi_best_params, m_best_params = tune_nuisance_ssm(
-            y, x, d, z, s,
-            clone(learner_g), clone(learner_m), clone(learner_m),
-            xx, score, n_folds_tune,
-            par_grid['ml_g'], par_grid['ml_pi'], par_grid['ml_m'])
+            y,
+            x,
+            d,
+            z,
+            s,
+            clone(learner_g),
+            clone(learner_m),
+            clone(learner_m),
+            xx,
+            score,
+            n_folds_tune,
+            par_grid["ml_g"],
+            par_grid["ml_pi"],
+            par_grid["ml_m"],
+        )
 
         g0_best_params = g0_best_params * n_folds
         g1_best_params = g1_best_params * n_folds
@@ -130,30 +157,39 @@ def dml_ssm_fixture(generate_data_selection_mar, generate_data_selection_nonigno
         m_best_params = m_best_params * n_folds
 
     np.random.seed(42)
-    res_manual = fit_selection(y, x, d, z, s,
-                               clone(learner_g), clone(learner_m), clone(learner_m),
-                               all_smpls, score,
-                               normalize_ipw=normalize_ipw,
-                               g_d0_params=g0_best_params, g_d1_params=g1_best_params,
-                               pi_params=pi_best_params, m_params=m_best_params)
-
-    res_dict = {'coef': dml_sel_obj.coef[0],
-                'coef_manual': res_manual['theta'],
-                'se': dml_sel_obj.se[0],
-                'se_manual': res_manual['se']}
+    res_manual = fit_selection(
+        y,
+        x,
+        d,
+        z,
+        s,
+        clone(learner_g),
+        clone(learner_m),
+        clone(learner_m),
+        all_smpls,
+        score,
+        normalize_ipw=normalize_ipw,
+        g_d0_params=g0_best_params,
+        g_d1_params=g1_best_params,
+        pi_params=pi_best_params,
+        m_params=m_best_params,
+    )
+
+    res_dict = {
+        "coef": dml_sel_obj.coef[0],
+        "coef_manual": res_manual["theta"],
+        "se": dml_sel_obj.se[0],
+        "se_manual": res_manual["se"],
+    }
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_ssm_coef(dml_ssm_fixture):
-    assert math.isclose(dml_ssm_fixture['coef'],
-                        dml_ssm_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_ssm_fixture["coef"], dml_ssm_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_ssm_se(dml_ssm_fixture):
-    assert math.isclose(dml_ssm_fixture['se'],
-                        dml_ssm_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_ssm_fixture["se"], dml_ssm_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
diff --git a/doubleml/plm/__init__.py b/doubleml/plm/__init__.py
index 7cfe61f0d..e81f00c52 100644
--- a/doubleml/plm/__init__.py
+++ b/doubleml/plm/__init__.py
@@ -2,8 +2,8 @@
 The :mod:`doubleml.plm` module implements double machine learning estimates based on partially linear models.
 """
 
-from .plr import DoubleMLPLR
 from .pliv import DoubleMLPLIV
+from .plr import DoubleMLPLR
 
 __all__ = [
     "DoubleMLPLR",
diff --git a/doubleml/plm/pliv.py b/doubleml/plm/pliv.py
index 100d4f69f..dc0fbd293 100644
--- a/doubleml/plm/pliv.py
+++ b/doubleml/plm/pliv.py
@@ -1,18 +1,16 @@
+import warnings
+
 import numpy as np
-from sklearn.utils import check_X_y
-from sklearn.model_selection import KFold
-from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
-from sklearn.linear_model import LinearRegression
 from sklearn.dummy import DummyRegressor
-
-import warnings
+from sklearn.linear_model import LinearRegression
+from sklearn.model_selection import GridSearchCV, KFold, RandomizedSearchCV
+from sklearn.utils import check_X_y
 
 from ..double_ml import DoubleML
 from ..double_ml_data import DoubleMLData
 from ..double_ml_score_mixins import LinearScoreMixin
-
-from ..utils._estimation import _dml_cv_predict, _dml_tune
 from ..utils._checks import _check_finite_predictions
+from ..utils._estimation import _dml_cv_predict, _dml_tune
 
 
 class DoubleMLPLIV(LinearScoreMixin, DoubleML):
@@ -95,169 +93,127 @@ class DoubleMLPLIV(LinearScoreMixin, DoubleML):
     :math:`V` are stochastic errors.
     """
 
-    def __init__(self,
-                 obj_dml_data,
-                 ml_l,
-                 ml_m,
-                 ml_r,
-                 ml_g=None,
-                 n_folds=5,
-                 n_rep=1,
-                 score='partialling out',
-                 draw_sample_splitting=True):
-        super().__init__(obj_dml_data,
-                         n_folds,
-                         n_rep,
-                         score,
-                         draw_sample_splitting)
+    def __init__(
+        self,
+        obj_dml_data,
+        ml_l,
+        ml_m,
+        ml_r,
+        ml_g=None,
+        n_folds=5,
+        n_rep=1,
+        score="partialling out",
+        draw_sample_splitting=True,
+    ):
+        super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
 
         self._check_data(self._dml_data)
         self.partialX = True
         self.partialZ = False
         self._check_score(self.score)
-        _ = self._check_learner(ml_l, 'ml_l', regressor=True, classifier=False)
-        _ = self._check_learner(ml_m, 'ml_m', regressor=True, classifier=False)
-        _ = self._check_learner(ml_r, 'ml_r', regressor=True, classifier=False)
-        self._learner = {'ml_l': ml_l, 'ml_m': ml_m, 'ml_r': ml_r}
+        _ = self._check_learner(ml_l, "ml_l", regressor=True, classifier=False)
+        _ = self._check_learner(ml_m, "ml_m", regressor=True, classifier=False)
+        _ = self._check_learner(ml_r, "ml_r", regressor=True, classifier=False)
+        self._learner = {"ml_l": ml_l, "ml_m": ml_m, "ml_r": ml_r}
         if ml_g is not None:
-            if (isinstance(self.score, str) & (self.score == 'IV-type')) | callable(self.score):
-                _ = self._check_learner(ml_g, 'ml_g', regressor=True, classifier=False)
-                self._learner['ml_g'] = ml_g
+            if (isinstance(self.score, str) & (self.score == "IV-type")) | callable(self.score):
+                _ = self._check_learner(ml_g, "ml_g", regressor=True, classifier=False)
+                self._learner["ml_g"] = ml_g
             else:
-                assert (isinstance(self.score, str) & (self.score == 'partialling out'))
-                warnings.warn(('A learner ml_g has been provided for score = "partialling out" but will be ignored. "'
-                               'A learner ml_g is not required for estimation.'))
-        elif isinstance(self.score, str) & (self.score == 'IV-type'):
+                assert isinstance(self.score, str) & (self.score == "partialling out")
+                warnings.warn(
+                    (
+                        'A learner ml_g has been provided for score = "partialling out" but will be ignored. "'
+                        "A learner ml_g is not required for estimation."
+                    )
+                )
+        elif isinstance(self.score, str) & (self.score == "IV-type"):
             raise ValueError("For score = 'IV-type', learners ml_l, ml_m, ml_r and ml_g need to be specified.")
-        self._predict_method = {'ml_l': 'predict', 'ml_m': 'predict', 'ml_r': 'predict'}
-        if 'ml_g' in self._learner:
-            self._predict_method['ml_g'] = 'predict'
+        self._predict_method = {"ml_l": "predict", "ml_m": "predict", "ml_r": "predict"}
+        if "ml_g" in self._learner:
+            self._predict_method["ml_g"] = "predict"
         self._initialize_ml_nuisance_params()
         self._external_predictions_implemented = True
 
     @classmethod
-    def _partialX(cls,
-                  obj_dml_data,
-                  ml_l,
-                  ml_m,
-                  ml_r,
-                  ml_g=None,
-                  n_folds=5,
-                  n_rep=1,
-                  score='partialling out',
-                  draw_sample_splitting=True):
-        obj = cls(obj_dml_data,
-                  ml_l,
-                  ml_m,
-                  ml_r,
-                  ml_g,
-                  n_folds,
-                  n_rep,
-                  score,
-                  draw_sample_splitting)
+    def _partialX(
+        cls, obj_dml_data, ml_l, ml_m, ml_r, ml_g=None, n_folds=5, n_rep=1, score="partialling out", draw_sample_splitting=True
+    ):
+        obj = cls(obj_dml_data, ml_l, ml_m, ml_r, ml_g, n_folds, n_rep, score, draw_sample_splitting)
         obj._check_data(obj._dml_data)
         obj.partialX = True
         obj.partialZ = False
         obj._check_score(obj.score)
-        _ = obj._check_learner(ml_l, 'ml_l', regressor=True, classifier=False)
-        _ = obj._check_learner(ml_m, 'ml_m', regressor=True, classifier=False)
-        _ = obj._check_learner(ml_r, 'ml_r', regressor=True, classifier=False)
-        obj._learner = {'ml_l': ml_l, 'ml_m': ml_m, 'ml_r': ml_r}
-        obj._predict_method = {'ml_l': 'predict', 'ml_m': 'predict', 'ml_r': 'predict'}
+        _ = obj._check_learner(ml_l, "ml_l", regressor=True, classifier=False)
+        _ = obj._check_learner(ml_m, "ml_m", regressor=True, classifier=False)
+        _ = obj._check_learner(ml_r, "ml_r", regressor=True, classifier=False)
+        obj._learner = {"ml_l": ml_l, "ml_m": ml_m, "ml_r": ml_r}
+        obj._predict_method = {"ml_l": "predict", "ml_m": "predict", "ml_r": "predict"}
         obj._initialize_ml_nuisance_params()
         return obj
 
     @classmethod
-    def _partialZ(cls,
-                  obj_dml_data,
-                  ml_r,
-                  n_folds=5,
-                  n_rep=1,
-                  score='partialling out',
-                  draw_sample_splitting=True):
+    def _partialZ(cls, obj_dml_data, ml_r, n_folds=5, n_rep=1, score="partialling out", draw_sample_splitting=True):
         # to pass the checks for the learners, we temporarily set ml_l and ml_m to DummyRegressor()
-        obj = cls(obj_dml_data,
-                  DummyRegressor(),
-                  DummyRegressor(),
-                  ml_r,
-                  None,
-                  n_folds,
-                  n_rep,
-                  score,
-                  draw_sample_splitting)
+        obj = cls(obj_dml_data, DummyRegressor(), DummyRegressor(), ml_r, None, n_folds, n_rep, score, draw_sample_splitting)
         obj._check_data(obj._dml_data)
         obj.partialX = False
         obj.partialZ = True
         obj._check_score(obj.score)
-        _ = obj._check_learner(ml_r, 'ml_r', regressor=True, classifier=False)
-        obj._learner = {'ml_r': ml_r}
-        obj._predict_method = {'ml_r': 'predict'}
+        _ = obj._check_learner(ml_r, "ml_r", regressor=True, classifier=False)
+        obj._learner = {"ml_r": ml_r}
+        obj._predict_method = {"ml_r": "predict"}
         obj._initialize_ml_nuisance_params()
         return obj
 
     @classmethod
-    def _partialXZ(cls,
-                   obj_dml_data,
-                   ml_l,
-                   ml_m,
-                   ml_r,
-                   n_folds=5,
-                   n_rep=1,
-                   score='partialling out',
-                   draw_sample_splitting=True):
-        obj = cls(obj_dml_data,
-                  ml_l,
-                  ml_m,
-                  ml_r,
-                  None,
-                  n_folds,
-                  n_rep,
-                  score,
-                  draw_sample_splitting)
+    def _partialXZ(
+        cls, obj_dml_data, ml_l, ml_m, ml_r, n_folds=5, n_rep=1, score="partialling out", draw_sample_splitting=True
+    ):
+        obj = cls(obj_dml_data, ml_l, ml_m, ml_r, None, n_folds, n_rep, score, draw_sample_splitting)
         obj._check_data(obj._dml_data)
         obj.partialX = True
         obj.partialZ = True
         obj._check_score(obj.score)
-        _ = obj._check_learner(ml_l, 'ml_l', regressor=True, classifier=False)
-        _ = obj._check_learner(ml_m, 'ml_m', regressor=True, classifier=False)
-        _ = obj._check_learner(ml_r, 'ml_r', regressor=True, classifier=False)
-        obj._learner = {'ml_l': ml_l, 'ml_m': ml_m, 'ml_r': ml_r}
-        obj._predict_method = {'ml_l': 'predict', 'ml_m': 'predict', 'ml_r': 'predict'}
+        _ = obj._check_learner(ml_l, "ml_l", regressor=True, classifier=False)
+        _ = obj._check_learner(ml_m, "ml_m", regressor=True, classifier=False)
+        _ = obj._check_learner(ml_r, "ml_r", regressor=True, classifier=False)
+        obj._learner = {"ml_l": ml_l, "ml_m": ml_m, "ml_r": ml_r}
+        obj._predict_method = {"ml_l": "predict", "ml_m": "predict", "ml_r": "predict"}
         obj._initialize_ml_nuisance_params()
         return obj
 
     def _initialize_ml_nuisance_params(self):
         if self.partialX & (not self.partialZ) & (self._dml_data.n_instr > 1):
-            param_names = ['ml_l', 'ml_r'] + ['ml_m_' + z_col for z_col in self._dml_data.z_cols]
+            param_names = ["ml_l", "ml_r"] + ["ml_m_" + z_col for z_col in self._dml_data.z_cols]
         else:
             param_names = self._learner.keys()
-        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
-                        for learner in param_names}
+        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in param_names}
 
     def _check_score(self, score):
         if isinstance(score, str):
             if self.partialX & (not self.partialZ) & (self._dml_data.n_instr == 1):
-                valid_score = ['partialling out', 'IV-type']
+                valid_score = ["partialling out", "IV-type"]
             else:
-                valid_score = ['partialling out']
+                valid_score = ["partialling out"]
             if score not in valid_score:
-                raise ValueError('Invalid score ' + score + '. ' +
-                                 'Valid score ' + ' or '.join(valid_score) + '.')
+                raise ValueError("Invalid score " + score + ". " + "Valid score " + " or ".join(valid_score) + ".")
         else:
             if not callable(score):
-                raise TypeError('score should be either a string or a callable. '
-                                '%r was passed.' % score)
+                raise TypeError("score should be either a string or a callable. %r was passed." % score)
         return score
 
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
-            raise TypeError('The data must be of DoubleMLData type. '
-                            f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.')
+            raise TypeError(
+                f"The data must be of DoubleMLData type. {str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+            )
         if obj_dml_data.n_instr == 0:
-            raise ValueError('Incompatible data. ' +
-                             'At least one variable must be set as instrumental variable. '
-                             'To fit a partially linear regression model without instrumental variable(s) '
-                             'use DoubleMLPLR instead of DoubleMLPLIV.')
+            raise ValueError(
+                "Incompatible data. " + "At least one variable must be set as instrumental variable. "
+                "To fit a partially linear regression model without instrumental variable(s) "
+                "use DoubleMLPLR instead of DoubleMLPLIV."
+            )
         return
 
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
@@ -266,134 +222,158 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         elif (not self.partialX) & self.partialZ:
             psi_elements, preds = self._nuisance_est_partial_z(smpls, n_jobs_cv, return_models)
         else:
-            assert (self.partialX & self.partialZ)
+            assert self.partialX & self.partialZ
             psi_elements, preds = self._nuisance_est_partial_xz(smpls, n_jobs_cv, return_models)
 
         return psi_elements, preds
 
-    def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                         search_mode, n_iter_randomized_search):
+    def _nuisance_tuning(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
         if self.partialX & (not self.partialZ):
-            res = self._nuisance_tuning_partial_x(smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                                                  search_mode, n_iter_randomized_search)
+            res = self._nuisance_tuning_partial_x(
+                smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+            )
         elif (not self.partialX) & self.partialZ:
-            res = self._nuisance_tuning_partial_z(smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                                                  search_mode, n_iter_randomized_search)
+            res = self._nuisance_tuning_partial_z(
+                smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+            )
         else:
-            assert (self.partialX & self.partialZ)
-            res = self._nuisance_tuning_partial_xz(smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                                                   search_mode, n_iter_randomized_search)
+            assert self.partialX & self.partialZ
+            res = self._nuisance_tuning_partial_xz(
+                smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+            )
 
         return res
 
     def _nuisance_est_partial_x(self, smpls, n_jobs_cv, external_predictions, return_models=False):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
 
         # nuisance l
-        if external_predictions['ml_l'] is not None:
-            l_hat = {'preds': external_predictions['ml_l'],
-                     'targets': None,
-                     'models': None}
+        if external_predictions["ml_l"] is not None:
+            l_hat = {"preds": external_predictions["ml_l"], "targets": None, "models": None}
         else:
-            l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_l'), method=self._predict_method['ml_l'],
-                                    return_models=return_models)
-        _check_finite_predictions(l_hat['preds'], self._learner['ml_l'], 'ml_l', smpls)
-
-        predictions = {'ml_l': l_hat['preds']}
-        targets = {'ml_l': l_hat['targets']}
-        models = {'ml_l': l_hat['models']}
+            l_hat = _dml_cv_predict(
+                self._learner["ml_l"],
+                x,
+                y,
+                smpls=smpls,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_l"),
+                method=self._predict_method["ml_l"],
+                return_models=return_models,
+            )
+        _check_finite_predictions(l_hat["preds"], self._learner["ml_l"], "ml_l", smpls)
+
+        predictions = {"ml_l": l_hat["preds"]}
+        targets = {"ml_l": l_hat["targets"]}
+        models = {"ml_l": l_hat["models"]}
         # nuisance m
         if self._dml_data.n_instr == 1:
             # one instrument: just identified
-            x, z = check_X_y(x, np.ravel(self._dml_data.z),
-                             force_all_finite=False)
-            if external_predictions['ml_m'] is not None:
-                m_hat = {'preds': external_predictions['ml_m'],
-                         'targets': None,
-                         'models': None}
+            x, z = check_X_y(x, np.ravel(self._dml_data.z), force_all_finite=False)
+            if external_predictions["ml_m"] is not None:
+                m_hat = {"preds": external_predictions["ml_m"], "targets": None, "models": None}
             else:
-                m_hat = _dml_cv_predict(self._learner['ml_m'], x, z, smpls=smpls, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                        return_models=return_models)
-            predictions['ml_m'] = m_hat['preds']
-            targets['ml_m'] = m_hat['targets']
-            models['ml_m'] = m_hat['models']
+                m_hat = _dml_cv_predict(
+                    self._learner["ml_m"],
+                    x,
+                    z,
+                    smpls=smpls,
+                    n_jobs=n_jobs_cv,
+                    est_params=self._get_params("ml_m"),
+                    method=self._predict_method["ml_m"],
+                    return_models=return_models,
+                )
+            predictions["ml_m"] = m_hat["preds"]
+            targets["ml_m"] = m_hat["targets"]
+            models["ml_m"] = m_hat["models"]
         else:
             # several instruments: 2SLS
-            m_hat = {'preds': np.full((self._dml_data.n_obs, self._dml_data.n_instr), np.nan),
-                     'targets': [None] * self._dml_data.n_instr,
-                     'models': [None] * self._dml_data.n_instr}
+            m_hat = {
+                "preds": np.full((self._dml_data.n_obs, self._dml_data.n_instr), np.nan),
+                "targets": [None] * self._dml_data.n_instr,
+                "models": [None] * self._dml_data.n_instr,
+            }
             for i_instr in range(self._dml_data.n_instr):
                 z = self._dml_data.z
-                x, this_z = check_X_y(x, z[:, i_instr],
-                                      force_all_finite=False)
-                if external_predictions['ml_m_' + self._dml_data.z_cols[i_instr]] is not None:
-                    m_hat['preds'][:, i_instr] = external_predictions['ml_m_' + self._dml_data.z_cols[i_instr]]
-                    predictions['ml_m_' + self._dml_data.z_cols[i_instr]] = external_predictions[
-                        'ml_m_' + self._dml_data.z_cols[i_instr]]
-                    targets['ml_m_' + self._dml_data.z_cols[i_instr]] = None
-                    models['ml_m_' + self._dml_data.z_cols[i_instr]] = None
+                x, this_z = check_X_y(x, z[:, i_instr], force_all_finite=False)
+                if external_predictions["ml_m_" + self._dml_data.z_cols[i_instr]] is not None:
+                    m_hat["preds"][:, i_instr] = external_predictions["ml_m_" + self._dml_data.z_cols[i_instr]]
+                    predictions["ml_m_" + self._dml_data.z_cols[i_instr]] = external_predictions[
+                        "ml_m_" + self._dml_data.z_cols[i_instr]
+                    ]
+                    targets["ml_m_" + self._dml_data.z_cols[i_instr]] = None
+                    models["ml_m_" + self._dml_data.z_cols[i_instr]] = None
                 else:
-                    res_cv_predict = _dml_cv_predict(self._learner['ml_m'], x, this_z, smpls=smpls, n_jobs=n_jobs_cv,
-                                                     est_params=self._get_params('ml_m_' + self._dml_data.z_cols[i_instr]),
-                                                     method=self._predict_method['ml_m'], return_models=return_models)
+                    res_cv_predict = _dml_cv_predict(
+                        self._learner["ml_m"],
+                        x,
+                        this_z,
+                        smpls=smpls,
+                        n_jobs=n_jobs_cv,
+                        est_params=self._get_params("ml_m_" + self._dml_data.z_cols[i_instr]),
+                        method=self._predict_method["ml_m"],
+                        return_models=return_models,
+                    )
 
-                    m_hat['preds'][:, i_instr] = res_cv_predict['preds']
+                    m_hat["preds"][:, i_instr] = res_cv_predict["preds"]
 
-                    predictions['ml_m_' + self._dml_data.z_cols[i_instr]] = res_cv_predict['preds']
-                    targets['ml_m_' + self._dml_data.z_cols[i_instr]] = res_cv_predict['targets']
-                    models['ml_m_' + self._dml_data.z_cols[i_instr]] = res_cv_predict['models']
+                    predictions["ml_m_" + self._dml_data.z_cols[i_instr]] = res_cv_predict["preds"]
+                    targets["ml_m_" + self._dml_data.z_cols[i_instr]] = res_cv_predict["targets"]
+                    models["ml_m_" + self._dml_data.z_cols[i_instr]] = res_cv_predict["models"]
 
-        _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
+        _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
 
         # nuisance r
-        if external_predictions['ml_r'] is not None:
-            r_hat = {'preds': external_predictions['ml_r'],
-                     'targets': None,
-                     'models': None}
+        if external_predictions["ml_r"] is not None:
+            r_hat = {"preds": external_predictions["ml_r"], "targets": None, "models": None}
         else:
-            r_hat = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_r'), method=self._predict_method['ml_r'],
-                                    return_models=return_models)
-        _check_finite_predictions(r_hat['preds'], self._learner['ml_r'], 'ml_r', smpls)
-        predictions['ml_r'] = r_hat['preds']
-        targets['ml_r'] = r_hat['targets']
-        models['ml_r'] = r_hat['models']
-
-        g_hat = {'preds': None, 'targets': None, 'models': None}
-        if (self._dml_data.n_instr == 1) & ('ml_g' in self._learner):
+            r_hat = _dml_cv_predict(
+                self._learner["ml_r"],
+                x,
+                d,
+                smpls=smpls,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_r"),
+                method=self._predict_method["ml_r"],
+                return_models=return_models,
+            )
+        _check_finite_predictions(r_hat["preds"], self._learner["ml_r"], "ml_r", smpls)
+        predictions["ml_r"] = r_hat["preds"]
+        targets["ml_r"] = r_hat["targets"]
+        models["ml_r"] = r_hat["models"]
+
+        g_hat = {"preds": None, "targets": None, "models": None}
+        if (self._dml_data.n_instr == 1) & ("ml_g" in self._learner):
             # an estimate of g is obtained for the IV-type score and callable scores
             # get an initial estimate for theta using the partialling out score
-            if external_predictions['ml_g'] is not None:
-                g_hat = {'preds': external_predictions['ml_g'],
-                         'targets': None,
-                         'models': None}
+            if external_predictions["ml_g"] is not None:
+                g_hat = {"preds": external_predictions["ml_g"], "targets": None, "models": None}
             else:
-                psi_a = -np.multiply(d - r_hat['preds'], z - m_hat['preds'])
-                psi_b = np.multiply(z - m_hat['preds'], y - l_hat['preds'])
+                psi_a = -np.multiply(d - r_hat["preds"], z - m_hat["preds"])
+                psi_b = np.multiply(z - m_hat["preds"], y - l_hat["preds"])
                 theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
                 # nuisance g
-                g_hat = _dml_cv_predict(self._learner['ml_g'], x, y - theta_initial * d, smpls=smpls, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_g'), method=self._predict_method['ml_g'],
-                                        return_models=return_models)
-            _check_finite_predictions(g_hat['preds'], self._learner['ml_g'], 'ml_g', smpls)
-
-        predictions['ml_g'] = g_hat['preds']
-        targets['ml_g'] = g_hat['targets']
-        models['ml_g'] = g_hat['models']
-        psi_a, psi_b = self._score_elements(y, z, d,
-                                            l_hat['preds'], m_hat['preds'], r_hat['preds'], g_hat['preds'],
-                                            smpls)
-        psi_elements = {'psi_a': psi_a,
-                        'psi_b': psi_b}
-        predictions = {'predictions': predictions,
-                       'targets': targets,
-                       'models': models
-                       }
+                g_hat = _dml_cv_predict(
+                    self._learner["ml_g"],
+                    x,
+                    y - theta_initial * d,
+                    smpls=smpls,
+                    n_jobs=n_jobs_cv,
+                    est_params=self._get_params("ml_g"),
+                    method=self._predict_method["ml_g"],
+                    return_models=return_models,
+                )
+            _check_finite_predictions(g_hat["preds"], self._learner["ml_g"], "ml_g", smpls)
+
+        predictions["ml_g"] = g_hat["preds"]
+        targets["ml_g"] = g_hat["targets"]
+        models["ml_g"] = g_hat["models"]
+        psi_a, psi_b = self._score_elements(y, z, d, l_hat["preds"], m_hat["preds"], r_hat["preds"], g_hat["preds"], smpls)
+        psi_elements = {"psi_a": psi_a, "psi_b": psi_b}
+        predictions = {"predictions": predictions, "targets": targets, "models": models}
 
         return psi_elements, predictions
 
@@ -411,168 +391,208 @@ def _score_elements(self, y, z, d, l_hat, m_hat, r_hat, g_hat, smpls):
 
         if isinstance(self.score, str):
             if self._dml_data.n_instr == 1:
-                if self.score == 'partialling out':
+                if self.score == "partialling out":
                     psi_a = -np.multiply(w_hat, v_hat)
                     psi_b = np.multiply(v_hat, u_hat)
                 else:
-                    assert self.score == 'IV-type'
+                    assert self.score == "IV-type"
                     psi_a = -np.multiply(v_hat, d)
                     psi_b = np.multiply(v_hat, y - g_hat)
             else:
-                assert self.score == 'partialling out'
+                assert self.score == "partialling out"
                 psi_a = -np.multiply(w_hat, r_hat_tilde)
                 psi_b = np.multiply(r_hat_tilde, u_hat)
         else:
             assert callable(self.score)
             if self._dml_data.n_instr > 1:
-                raise NotImplementedError('Callable score not implemented for DoubleMLPLIV.partialX '
-                                          'with several instruments.')
+                raise NotImplementedError("Callable score not implemented for DoubleMLPLIV.partialX with several instruments.")
             else:
                 assert self._dml_data.n_instr == 1
-                psi_a, psi_b = self.score(y=y, z=z, d=d,
-                                          l_hat=l_hat, m_hat=m_hat, r_hat=r_hat, g_hat=g_hat,
-                                          smpls=smpls)
+                psi_a, psi_b = self.score(y=y, z=z, d=d, l_hat=l_hat, m_hat=m_hat, r_hat=r_hat, g_hat=g_hat, smpls=smpls)
 
         return psi_a, psi_b
 
     def _nuisance_est_partial_z(self, smpls, n_jobs_cv, return_models=False):
         y = self._dml_data.y
-        xz, d = check_X_y(np.hstack((self._dml_data.x, self._dml_data.z)),
-                          self._dml_data.d,
-                          force_all_finite=False)
+        xz, d = check_X_y(np.hstack((self._dml_data.x, self._dml_data.z)), self._dml_data.d, force_all_finite=False)
 
         # nuisance m
-        r_hat = _dml_cv_predict(self._learner['ml_r'], xz, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_r'), method=self._predict_method['ml_r'],
-                                return_models=return_models)
-        _check_finite_predictions(r_hat['preds'], self._learner['ml_r'], 'ml_r', smpls)
+        r_hat = _dml_cv_predict(
+            self._learner["ml_r"],
+            xz,
+            d,
+            smpls=smpls,
+            n_jobs=n_jobs_cv,
+            est_params=self._get_params("ml_r"),
+            method=self._predict_method["ml_r"],
+            return_models=return_models,
+        )
+        _check_finite_predictions(r_hat["preds"], self._learner["ml_r"], "ml_r", smpls)
 
         if isinstance(self.score, str):
-            assert self.score == 'partialling out'
-            psi_a = -np.multiply(r_hat['preds'], d)
-            psi_b = np.multiply(r_hat['preds'], y)
+            assert self.score == "partialling out"
+            psi_a = -np.multiply(r_hat["preds"], d)
+            psi_b = np.multiply(r_hat["preds"], y)
         else:
             assert callable(self.score)
-            raise NotImplementedError('Callable score not implemented for DoubleMLPLIV.partialZ.')
+            raise NotImplementedError("Callable score not implemented for DoubleMLPLIV.partialZ.")
 
-        psi_elements = {'psi_a': psi_a,
-                        'psi_b': psi_b}
-        preds = {'predictions': {'ml_r': r_hat['preds']},
-                 'targets': {'ml_r': r_hat['targets']},
-                 'models': {'ml_r': r_hat['models']}}
+        psi_elements = {"psi_a": psi_a, "psi_b": psi_b}
+        preds = {
+            "predictions": {"ml_r": r_hat["preds"]},
+            "targets": {"ml_r": r_hat["targets"]},
+            "models": {"ml_r": r_hat["models"]},
+        }
 
         return psi_elements, preds
 
     def _nuisance_est_partial_xz(self, smpls, n_jobs_cv, return_models=False):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        xz, d = check_X_y(np.hstack((self._dml_data.x, self._dml_data.z)),
-                          self._dml_data.d,
-                          force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        xz, d = check_X_y(np.hstack((self._dml_data.x, self._dml_data.z)), self._dml_data.d, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
 
         # nuisance l
-        l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_l'), method=self._predict_method['ml_l'],
-                                return_models=return_models)
-        _check_finite_predictions(l_hat['preds'], self._learner['ml_l'], 'ml_l', smpls)
+        l_hat = _dml_cv_predict(
+            self._learner["ml_l"],
+            x,
+            y,
+            smpls=smpls,
+            n_jobs=n_jobs_cv,
+            est_params=self._get_params("ml_l"),
+            method=self._predict_method["ml_l"],
+            return_models=return_models,
+        )
+        _check_finite_predictions(l_hat["preds"], self._learner["ml_l"], "ml_l", smpls)
 
         # nuisance m
-        m_hat = _dml_cv_predict(self._learner['ml_m'], xz, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_m'), return_train_preds=True,
-                                method=self._predict_method['ml_m'], return_models=return_models)
-        _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
+        m_hat = _dml_cv_predict(
+            self._learner["ml_m"],
+            xz,
+            d,
+            smpls=smpls,
+            n_jobs=n_jobs_cv,
+            est_params=self._get_params("ml_m"),
+            return_train_preds=True,
+            method=self._predict_method["ml_m"],
+            return_models=return_models,
+        )
+        _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
 
         # nuisance r
-        m_hat_tilde = _dml_cv_predict(self._learner['ml_r'], x, m_hat['train_preds'], smpls=smpls, n_jobs=n_jobs_cv,
-                                      est_params=self._get_params('ml_r'), method=self._predict_method['ml_r'],
-                                      return_models=return_models)
-        _check_finite_predictions(m_hat_tilde['preds'], self._learner['ml_r'], 'ml_r', smpls)
+        m_hat_tilde = _dml_cv_predict(
+            self._learner["ml_r"],
+            x,
+            m_hat["train_preds"],
+            smpls=smpls,
+            n_jobs=n_jobs_cv,
+            est_params=self._get_params("ml_r"),
+            method=self._predict_method["ml_r"],
+            return_models=return_models,
+        )
+        _check_finite_predictions(m_hat_tilde["preds"], self._learner["ml_r"], "ml_r", smpls)
 
         # compute residuals
-        u_hat = y - l_hat['preds']
-        w_hat = d - m_hat_tilde['preds']
+        u_hat = y - l_hat["preds"]
+        w_hat = d - m_hat_tilde["preds"]
 
         if isinstance(self.score, str):
-            assert self.score == 'partialling out'
-            psi_a = -np.multiply(w_hat, (m_hat['preds']-m_hat_tilde['preds']))
-            psi_b = np.multiply((m_hat['preds']-m_hat_tilde['preds']), u_hat)
+            assert self.score == "partialling out"
+            psi_a = -np.multiply(w_hat, (m_hat["preds"] - m_hat_tilde["preds"]))
+            psi_b = np.multiply((m_hat["preds"] - m_hat_tilde["preds"]), u_hat)
         else:
             assert callable(self.score)
-            raise NotImplementedError('Callable score not implemented for DoubleMLPLIV.partialXZ.')
-
-        psi_elements = {'psi_a': psi_a,
-                        'psi_b': psi_b}
-        preds = {'predictions': {'ml_l': l_hat['preds'],
-                                 'ml_m': m_hat['preds'],
-                                 'ml_r': m_hat_tilde['preds']},
-                 'targets': {'ml_l': l_hat['targets'],
-                             'ml_m': m_hat['targets'],
-                             'ml_r': m_hat_tilde['targets']},
-                 'models': {'ml_l': l_hat['models'],
-                            'ml_m': m_hat['models'],
-                            'ml_r': m_hat_tilde['models']}
-                 }
+            raise NotImplementedError("Callable score not implemented for DoubleMLPLIV.partialXZ.")
+
+        psi_elements = {"psi_a": psi_a, "psi_b": psi_b}
+        preds = {
+            "predictions": {"ml_l": l_hat["preds"], "ml_m": m_hat["preds"], "ml_r": m_hat_tilde["preds"]},
+            "targets": {"ml_l": l_hat["targets"], "ml_m": m_hat["targets"], "ml_r": m_hat_tilde["targets"]},
+            "models": {"ml_l": l_hat["models"], "ml_m": m_hat["models"], "ml_r": m_hat_tilde["models"]},
+        }
 
         return psi_elements, preds
 
-    def _nuisance_tuning_partial_x(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                                   search_mode, n_iter_randomized_search):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
+    def _nuisance_tuning_partial_x(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
 
         if scoring_methods is None:
-            scoring_methods = {'ml_l': None,
-                               'ml_m': None,
-                               'ml_r': None,
-                               'ml_g': None}
+            scoring_methods = {"ml_l": None, "ml_m": None, "ml_r": None, "ml_g": None}
 
         train_inds = [train_index for (train_index, _) in smpls]
-        l_tune_res = _dml_tune(y, x, train_inds,
-                               self._learner['ml_l'], param_grids['ml_l'], scoring_methods['ml_l'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        l_tune_res = _dml_tune(
+            y,
+            x,
+            train_inds,
+            self._learner["ml_l"],
+            param_grids["ml_l"],
+            scoring_methods["ml_l"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
 
         if self._dml_data.n_instr > 1:
             # several instruments: 2SLS
             m_tune_res = {instr_var: list() for instr_var in self._dml_data.z_cols}
             z = self._dml_data.z
             for i_instr in range(self._dml_data.n_instr):
-                x, this_z = check_X_y(x, z[:, i_instr],
-                                      force_all_finite=False)
-                m_tune_res[self._dml_data.z_cols[i_instr]] = _dml_tune(this_z, x, train_inds,
-                                                                       self._learner['ml_m'], param_grids['ml_m'],
-                                                                       scoring_methods['ml_m'],
-                                                                       n_folds_tune, n_jobs_cv, search_mode,
-                                                                       n_iter_randomized_search)
+                x, this_z = check_X_y(x, z[:, i_instr], force_all_finite=False)
+                m_tune_res[self._dml_data.z_cols[i_instr]] = _dml_tune(
+                    this_z,
+                    x,
+                    train_inds,
+                    self._learner["ml_m"],
+                    param_grids["ml_m"],
+                    scoring_methods["ml_m"],
+                    n_folds_tune,
+                    n_jobs_cv,
+                    search_mode,
+                    n_iter_randomized_search,
+                )
         else:
             # one instrument: just identified
-            x, z = check_X_y(x, np.ravel(self._dml_data.z),
-                             force_all_finite=False)
-            m_tune_res = _dml_tune(z, x, train_inds,
-                                   self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'],
-                                   n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-
-        r_tune_res = _dml_tune(d, x, train_inds,
-                               self._learner['ml_r'], param_grids['ml_r'], scoring_methods['ml_r'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+            x, z = check_X_y(x, np.ravel(self._dml_data.z), force_all_finite=False)
+            m_tune_res = _dml_tune(
+                z,
+                x,
+                train_inds,
+                self._learner["ml_m"],
+                param_grids["ml_m"],
+                scoring_methods["ml_m"],
+                n_folds_tune,
+                n_jobs_cv,
+                search_mode,
+                n_iter_randomized_search,
+            )
+
+        r_tune_res = _dml_tune(
+            d,
+            x,
+            train_inds,
+            self._learner["ml_r"],
+            param_grids["ml_r"],
+            scoring_methods["ml_r"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
 
         l_best_params = [xx.best_params_ for xx in l_tune_res]
         r_best_params = [xx.best_params_ for xx in r_tune_res]
         if self._dml_data.n_instr > 1:
-            params = {'ml_l': l_best_params,
-                      'ml_r': r_best_params}
+            params = {"ml_l": l_best_params, "ml_r": r_best_params}
             for instr_var in self._dml_data.z_cols:
-                params['ml_m_' + instr_var] = [xx.best_params_ for xx in m_tune_res[instr_var]]
-            tune_res = {'l_tune': l_tune_res,
-                        'm_tune': m_tune_res,
-                        'r_tune': r_tune_res}
+                params["ml_m_" + instr_var] = [xx.best_params_ for xx in m_tune_res[instr_var]]
+            tune_res = {"l_tune": l_tune_res, "m_tune": m_tune_res, "r_tune": r_tune_res}
         else:
             m_best_params = [xx.best_params_ for xx in m_tune_res]
             # an ML model for g is obtained for the IV-type score and callable scores
-            if 'ml_g' in self._learner:
+            if "ml_g" in self._learner:
                 # construct an initial theta estimate from the tuned models using the partialling out score
                 l_hat = np.full_like(y, np.nan)
                 m_hat = np.full_like(z, np.nan)
@@ -584,110 +604,131 @@ def _nuisance_tuning_partial_x(self, smpls, param_grids, scoring_methods, n_fold
                 psi_a = -np.multiply(d - r_hat, z - m_hat)
                 psi_b = np.multiply(z - m_hat, y - l_hat)
                 theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
-                g_tune_res = _dml_tune(y - theta_initial * d, x, train_inds,
-                                       self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'],
-                                       n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+                g_tune_res = _dml_tune(
+                    y - theta_initial * d,
+                    x,
+                    train_inds,
+                    self._learner["ml_g"],
+                    param_grids["ml_g"],
+                    scoring_methods["ml_g"],
+                    n_folds_tune,
+                    n_jobs_cv,
+                    search_mode,
+                    n_iter_randomized_search,
+                )
                 g_best_params = [xx.best_params_ for xx in g_tune_res]
 
-                params = {'ml_l': l_best_params,
-                          'ml_m': m_best_params,
-                          'ml_r': r_best_params,
-                          'ml_g': g_best_params}
-                tune_res = {'l_tune': l_tune_res,
-                            'm_tune': m_tune_res,
-                            'r_tune': r_tune_res,
-                            'g_tune': g_tune_res}
+                params = {"ml_l": l_best_params, "ml_m": m_best_params, "ml_r": r_best_params, "ml_g": g_best_params}
+                tune_res = {"l_tune": l_tune_res, "m_tune": m_tune_res, "r_tune": r_tune_res, "g_tune": g_tune_res}
             else:
-                params = {'ml_l': l_best_params,
-                          'ml_m': m_best_params,
-                          'ml_r': r_best_params}
-                tune_res = {'l_tune': l_tune_res,
-                            'm_tune': m_tune_res,
-                            'r_tune': r_tune_res}
+                params = {"ml_l": l_best_params, "ml_m": m_best_params, "ml_r": r_best_params}
+                tune_res = {"l_tune": l_tune_res, "m_tune": m_tune_res, "r_tune": r_tune_res}
 
-        res = {'params': params,
-               'tune_res': tune_res}
+        res = {"params": params, "tune_res": tune_res}
 
         return res
 
-    def _nuisance_tuning_partial_z(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                                   search_mode, n_iter_randomized_search):
-        xz, d = check_X_y(np.hstack((self._dml_data.x, self._dml_data.z)),
-                          self._dml_data.d,
-                          force_all_finite=False)
+    def _nuisance_tuning_partial_z(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
+        xz, d = check_X_y(np.hstack((self._dml_data.x, self._dml_data.z)), self._dml_data.d, force_all_finite=False)
 
         if scoring_methods is None:
-            scoring_methods = {'ml_r': None}
+            scoring_methods = {"ml_r": None}
 
         train_inds = [train_index for (train_index, _) in smpls]
-        m_tune_res = _dml_tune(d, xz, train_inds,
-                               self._learner['ml_r'], param_grids['ml_r'], scoring_methods['ml_r'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        m_tune_res = _dml_tune(
+            d,
+            xz,
+            train_inds,
+            self._learner["ml_r"],
+            param_grids["ml_r"],
+            scoring_methods["ml_r"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
 
         m_best_params = [xx.best_params_ for xx in m_tune_res]
 
-        params = {'ml_r': m_best_params}
+        params = {"ml_r": m_best_params}
 
-        tune_res = {'r_tune': m_tune_res}
+        tune_res = {"r_tune": m_tune_res}
 
-        res = {'params': params,
-               'tune_res': tune_res}
+        res = {"params": params, "tune_res": tune_res}
 
         return res
 
-    def _nuisance_tuning_partial_xz(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                                    search_mode, n_iter_randomized_search):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        xz, d = check_X_y(np.hstack((self._dml_data.x, self._dml_data.z)),
-                          self._dml_data.d,
-                          force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
+    def _nuisance_tuning_partial_xz(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        xz, d = check_X_y(np.hstack((self._dml_data.x, self._dml_data.z)), self._dml_data.d, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
 
         if scoring_methods is None:
-            scoring_methods = {'ml_l': None,
-                               'ml_m': None,
-                               'ml_r': None}
+            scoring_methods = {"ml_l": None, "ml_m": None, "ml_r": None}
 
         train_inds = [train_index for (train_index, _) in smpls]
-        l_tune_res = _dml_tune(y, x, train_inds,
-                               self._learner['ml_l'], param_grids['ml_l'], scoring_methods['ml_l'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        m_tune_res = _dml_tune(d, xz, train_inds,
-                               self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        l_tune_res = _dml_tune(
+            y,
+            x,
+            train_inds,
+            self._learner["ml_l"],
+            param_grids["ml_l"],
+            scoring_methods["ml_l"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        m_tune_res = _dml_tune(
+            d,
+            xz,
+            train_inds,
+            self._learner["ml_m"],
+            param_grids["ml_m"],
+            scoring_methods["ml_m"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
 
         r_tune_res = list()
         for idx, (train_index, _) in enumerate(smpls):
             m_hat = m_tune_res[idx].predict(xz[train_index, :])
             r_tune_resampling = KFold(n_splits=n_folds_tune, shuffle=True)
-            if search_mode == 'grid_search':
-                r_grid_search = GridSearchCV(self._learner['ml_r'], param_grids['ml_r'],
-                                             scoring=scoring_methods['ml_r'],
-                                             cv=r_tune_resampling, n_jobs=n_jobs_cv)
+            if search_mode == "grid_search":
+                r_grid_search = GridSearchCV(
+                    self._learner["ml_r"],
+                    param_grids["ml_r"],
+                    scoring=scoring_methods["ml_r"],
+                    cv=r_tune_resampling,
+                    n_jobs=n_jobs_cv,
+                )
             else:
-                assert search_mode == 'randomized_search'
-                r_grid_search = RandomizedSearchCV(self._learner['ml_r'], param_grids['ml_r'],
-                                                   scoring=scoring_methods['ml_r'],
-                                                   cv=r_tune_resampling, n_jobs=n_jobs_cv,
-                                                   n_iter=n_iter_randomized_search)
+                assert search_mode == "randomized_search"
+                r_grid_search = RandomizedSearchCV(
+                    self._learner["ml_r"],
+                    param_grids["ml_r"],
+                    scoring=scoring_methods["ml_r"],
+                    cv=r_tune_resampling,
+                    n_jobs=n_jobs_cv,
+                    n_iter=n_iter_randomized_search,
+                )
             r_tune_res.append(r_grid_search.fit(x[train_index, :], m_hat))
 
         l_best_params = [xx.best_params_ for xx in l_tune_res]
         m_best_params = [xx.best_params_ for xx in m_tune_res]
         r_best_params = [xx.best_params_ for xx in r_tune_res]
 
-        params = {'ml_l': l_best_params,
-                  'ml_m': m_best_params,
-                  'ml_r': r_best_params}
+        params = {"ml_l": l_best_params, "ml_m": m_best_params, "ml_r": r_best_params}
 
-        tune_res = {'l_tune': l_tune_res,
-                    'm_tune': m_tune_res,
-                    'r_tune': r_tune_res}
+        tune_res = {"l_tune": l_tune_res, "m_tune": m_tune_res, "r_tune": r_tune_res}
 
-        res = {'params': params,
-               'tune_res': tune_res}
+        res = {"params": params, "tune_res": tune_res}
 
         return res
 
diff --git a/doubleml/plm/plr.py b/doubleml/plm/plr.py
index 3f0a26eaf..79ce6c5a7 100644
--- a/doubleml/plm/plr.py
+++ b/doubleml/plm/plr.py
@@ -1,17 +1,16 @@
+import warnings
+
 import numpy as np
 import pandas as pd
-from sklearn.utils import check_X_y
 from sklearn.base import clone
-
-import warnings
+from sklearn.utils import check_X_y
 
 from ..double_ml import DoubleML
 from ..double_ml_data import DoubleMLData
 from ..double_ml_score_mixins import LinearScoreMixin
-from ..utils.blp import DoubleMLBLP
-
+from ..utils._checks import _check_binary_predictions, _check_finite_predictions, _check_is_propensity, _check_score
 from ..utils._estimation import _dml_cv_predict, _dml_tune
-from ..utils._checks import _check_score, _check_finite_predictions, _check_is_propensity, _check_binary_predictions
+from ..utils.blp import DoubleMLBLP
 
 
 class DoubleMLPLR(LinearScoreMixin, DoubleML):
@@ -89,146 +88,147 @@ class DoubleMLPLR(LinearScoreMixin, DoubleML):
     and :math:`\\zeta` and :math:`V` are stochastic errors.
     """
 
-    def __init__(self,
-                 obj_dml_data,
-                 ml_l,
-                 ml_m,
-                 ml_g=None,
-                 n_folds=5,
-                 n_rep=1,
-                 score='partialling out',
-                 draw_sample_splitting=True):
-        super().__init__(obj_dml_data,
-                         n_folds,
-                         n_rep,
-                         score,
-                         draw_sample_splitting)
+    def __init__(
+        self, obj_dml_data, ml_l, ml_m, ml_g=None, n_folds=5, n_rep=1, score="partialling out", draw_sample_splitting=True
+    ):
+        super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
 
         self._check_data(self._dml_data)
-        valid_scores = ['IV-type', 'partialling out']
+        valid_scores = ["IV-type", "partialling out"]
         _check_score(self.score, valid_scores, allow_callable=True)
 
-        _ = self._check_learner(ml_l, 'ml_l', regressor=True, classifier=False)
-        ml_m_is_classifier = self._check_learner(ml_m, 'ml_m', regressor=True, classifier=True)
-        self._learner = {'ml_l': ml_l, 'ml_m': ml_m}
+        _ = self._check_learner(ml_l, "ml_l", regressor=True, classifier=False)
+        ml_m_is_classifier = self._check_learner(ml_m, "ml_m", regressor=True, classifier=True)
+        self._learner = {"ml_l": ml_l, "ml_m": ml_m}
 
         if ml_g is not None:
-            if (isinstance(self.score, str) & (self.score == 'IV-type')) | callable(self.score):
-                _ = self._check_learner(ml_g, 'ml_g', regressor=True, classifier=False)
-                self._learner['ml_g'] = ml_g
+            if (isinstance(self.score, str) & (self.score == "IV-type")) | callable(self.score):
+                _ = self._check_learner(ml_g, "ml_g", regressor=True, classifier=False)
+                self._learner["ml_g"] = ml_g
             else:
-                assert (isinstance(self.score, str) & (self.score == 'partialling out'))
-                warnings.warn(('A learner ml_g has been provided for score = "partialling out" but will be ignored. "'
-                               'A learner ml_g is not required for estimation.'))
-        elif isinstance(self.score, str) & (self.score == 'IV-type'):
-            warnings.warn(("For score = 'IV-type', learners ml_l and ml_g should be specified. "
-                           "Set ml_g = clone(ml_l)."))
-            self._learner['ml_g'] = clone(ml_l)
-
-        self._predict_method = {'ml_l': 'predict'}
-        if 'ml_g' in self._learner:
-            self._predict_method['ml_g'] = 'predict'
+                assert isinstance(self.score, str) & (self.score == "partialling out")
+                warnings.warn(
+                    (
+                        'A learner ml_g has been provided for score = "partialling out" but will be ignored. "'
+                        "A learner ml_g is not required for estimation."
+                    )
+                )
+        elif isinstance(self.score, str) & (self.score == "IV-type"):
+            warnings.warn(("For score = 'IV-type', learners ml_l and ml_g should be specified. Set ml_g = clone(ml_l)."))
+            self._learner["ml_g"] = clone(ml_l)
+
+        self._predict_method = {"ml_l": "predict"}
+        if "ml_g" in self._learner:
+            self._predict_method["ml_g"] = "predict"
         if ml_m_is_classifier:
             if self._dml_data.binary_treats.all():
-                self._predict_method['ml_m'] = 'predict_proba'
+                self._predict_method["ml_m"] = "predict_proba"
             else:
-                raise ValueError(f'The ml_m learner {str(ml_m)} was identified as classifier '
-                                 'but at least one treatment variable is not binary with values 0 and 1.')
+                raise ValueError(
+                    f"The ml_m learner {str(ml_m)} was identified as classifier "
+                    "but at least one treatment variable is not binary with values 0 and 1."
+                )
         else:
-            self._predict_method['ml_m'] = 'predict'
+            self._predict_method["ml_m"] = "predict"
 
         self._initialize_ml_nuisance_params()
         self._sensitivity_implemented = True
         self._external_predictions_implemented = True
 
     def _initialize_ml_nuisance_params(self):
-        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
-                        for learner in self._learner}
+        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in self._learner}
 
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
-            raise TypeError('The data must be of DoubleMLData type. '
-                            f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.')
+            raise TypeError(
+                f"The data must be of DoubleMLData type. {str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+            )
         if obj_dml_data.z_cols is not None:
-            raise ValueError('Incompatible data. ' +
-                             ' and '.join(obj_dml_data.z_cols) +
-                             ' have been set as instrumental variable(s). '
-                             'To fit a partially linear IV regression model use DoubleMLPLIV instead of DoubleMLPLR.')
+            raise ValueError(
+                "Incompatible data. " + " and ".join(obj_dml_data.z_cols) + " have been set as instrumental variable(s). "
+                "To fit a partially linear IV regression model use DoubleMLPLIV instead of DoubleMLPLR."
+            )
         return
 
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
-        m_external = external_predictions['ml_m'] is not None
-        l_external = external_predictions['ml_l'] is not None
-        if 'ml_g' in self._learner:
-            g_external = external_predictions['ml_g'] is not None
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
+        m_external = external_predictions["ml_m"] is not None
+        l_external = external_predictions["ml_l"] is not None
+        if "ml_g" in self._learner:
+            g_external = external_predictions["ml_g"] is not None
         else:
             g_external = False
 
         # nuisance l
         if l_external:
-            l_hat = {'preds': external_predictions['ml_l'],
-                     'targets': None,
-                     'models': None}
+            l_hat = {"preds": external_predictions["ml_l"], "targets": None, "models": None}
         elif self._score == "IV-type" and g_external:
-            l_hat = {'preds': None,
-                     'targets': None,
-                     'models': None}
+            l_hat = {"preds": None, "targets": None, "models": None}
         else:
-            l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_l'), method=self._predict_method['ml_l'],
-                                    return_models=return_models)
-            _check_finite_predictions(l_hat['preds'], self._learner['ml_l'], 'ml_l', smpls)
+            l_hat = _dml_cv_predict(
+                self._learner["ml_l"],
+                x,
+                y,
+                smpls=smpls,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_l"),
+                method=self._predict_method["ml_l"],
+                return_models=return_models,
+            )
+            _check_finite_predictions(l_hat["preds"], self._learner["ml_l"], "ml_l", smpls)
 
         # nuisance m
         if m_external:
-            m_hat = {'preds': external_predictions['ml_m'],
-                     'targets': None,
-                     'models': None}
+            m_hat = {"preds": external_predictions["ml_m"], "targets": None, "models": None}
         else:
-            m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                    return_models=return_models)
-            _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
-        if self._check_learner(self._learner['ml_m'], 'ml_m', regressor=True, classifier=True):
-            _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
+            m_hat = _dml_cv_predict(
+                self._learner["ml_m"],
+                x,
+                d,
+                smpls=smpls,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_m"),
+                method=self._predict_method["ml_m"],
+                return_models=return_models,
+            )
+            _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
+        if self._check_learner(self._learner["ml_m"], "ml_m", regressor=True, classifier=True):
+            _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
 
         if self._dml_data.binary_treats[self._dml_data.d_cols[self._i_treat]]:
-            _check_binary_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', self._dml_data.d_cols[self._i_treat])
+            _check_binary_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", self._dml_data.d_cols[self._i_treat])
 
         # an estimate of g is obtained for the IV-type score and callable scores
-        g_hat = {'preds': None, 'targets': None, 'models': None}
-        if 'ml_g' in self._learner:
+        g_hat = {"preds": None, "targets": None, "models": None}
+        if "ml_g" in self._learner:
             # nuisance g
             if g_external:
-                g_hat = {'preds': external_predictions['ml_g'],
-                         'targets': None,
-                         'models': None}
+                g_hat = {"preds": external_predictions["ml_g"], "targets": None, "models": None}
             else:
                 # get an initial estimate for theta using the partialling out score
-                psi_a = -np.multiply(d - m_hat['preds'], d - m_hat['preds'])
-                psi_b = np.multiply(d - m_hat['preds'], y - l_hat['preds'])
+                psi_a = -np.multiply(d - m_hat["preds"], d - m_hat["preds"])
+                psi_b = np.multiply(d - m_hat["preds"], y - l_hat["preds"])
                 theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
-                g_hat = _dml_cv_predict(self._learner['ml_g'], x, y - theta_initial*d, smpls=smpls, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_g'), method=self._predict_method['ml_g'],
-                                        return_models=return_models)
-                _check_finite_predictions(g_hat['preds'], self._learner['ml_g'], 'ml_g', smpls)
-
-        psi_a, psi_b = self._score_elements(y, d, l_hat['preds'], m_hat['preds'], g_hat['preds'], smpls)
-        psi_elements = {'psi_a': psi_a,
-                        'psi_b': psi_b}
-        preds = {'predictions': {'ml_l': l_hat['preds'],
-                                 'ml_m': m_hat['preds'],
-                                 'ml_g': g_hat['preds']},
-                 'targets': {'ml_l': l_hat['targets'],
-                             'ml_m': m_hat['targets'],
-                             'ml_g': g_hat['targets']},
-                 'models': {'ml_l': l_hat['models'],
-                            'ml_m': m_hat['models'],
-                            'ml_g': g_hat['models']}}
+                g_hat = _dml_cv_predict(
+                    self._learner["ml_g"],
+                    x,
+                    y - theta_initial * d,
+                    smpls=smpls,
+                    n_jobs=n_jobs_cv,
+                    est_params=self._get_params("ml_g"),
+                    method=self._predict_method["ml_g"],
+                    return_models=return_models,
+                )
+                _check_finite_predictions(g_hat["preds"], self._learner["ml_g"], "ml_g", smpls)
+
+        psi_a, psi_b = self._score_elements(y, d, l_hat["preds"], m_hat["preds"], g_hat["preds"], smpls)
+        psi_elements = {"psi_a": psi_a, "psi_b": psi_b}
+        preds = {
+            "predictions": {"ml_l": l_hat["preds"], "ml_m": m_hat["preds"], "ml_g": g_hat["preds"]},
+            "targets": {"ml_l": l_hat["targets"], "ml_m": m_hat["targets"], "ml_g": g_hat["targets"]},
+            "models": {"ml_l": l_hat["models"], "ml_m": m_hat["models"], "ml_g": g_hat["models"]},
+        }
 
         return psi_elements, preds
 
@@ -237,19 +237,17 @@ def _score_elements(self, y, d, l_hat, m_hat, g_hat, smpls):
         v_hat = d - m_hat
 
         if isinstance(self.score, str):
-            if self.score == 'IV-type':
-                psi_a = - np.multiply(v_hat, d)
+            if self.score == "IV-type":
+                psi_a = -np.multiply(v_hat, d)
                 psi_b = np.multiply(v_hat, y - g_hat)
             else:
-                assert self.score == 'partialling out'
+                assert self.score == "partialling out"
                 u_hat = y - l_hat
                 psi_a = -np.multiply(v_hat, v_hat)
                 psi_b = np.multiply(v_hat, u_hat)
         else:
             assert callable(self.score)
-            psi_a, psi_b = self.score(y=y, d=d,
-                                      l_hat=l_hat, m_hat=m_hat, g_hat=g_hat,
-                                      smpls=smpls)
+            psi_a, psi_b = self.score(y=y, d=d, l_hat=l_hat, m_hat=m_hat, g_hat=g_hat, smpls=smpls)
 
         return psi_a, psi_b
 
@@ -258,15 +256,15 @@ def _sensitivity_element_est(self, preds):
         y = self._dml_data.y
         d = self._dml_data.d
 
-        m_hat = preds['predictions']['ml_m']
+        m_hat = preds["predictions"]["ml_m"]
         theta = self.all_coef[self._i_treat, self._i_rep]
 
-        if self.score == 'partialling out':
-            l_hat = preds['predictions']['ml_l']
-            sigma2_score_element = np.square(y - l_hat - np.multiply(theta, d-m_hat))
+        if self.score == "partialling out":
+            l_hat = preds["predictions"]["ml_l"]
+            sigma2_score_element = np.square(y - l_hat - np.multiply(theta, d - m_hat))
         else:
-            assert self.score == 'IV-type'
-            g_hat = preds['predictions']['ml_g']
+            assert self.score == "IV-type"
+            g_hat = preds["predictions"]["ml_g"]
             sigma2_score_element = np.square(y - g_hat - np.multiply(theta, d))
 
         sigma2 = np.mean(sigma2_score_element)
@@ -277,39 +275,55 @@ def _sensitivity_element_est(self, preds):
         psi_nu2 = nu2 - np.multiply(np.square(treatment_residual), np.square(nu2))
         rr = np.multiply(treatment_residual, nu2)
 
-        element_dict = {'sigma2': sigma2,
-                        'nu2': nu2,
-                        'psi_sigma2': psi_sigma2,
-                        'psi_nu2': psi_nu2,
-                        'riesz_rep': rr,
-                        }
+        element_dict = {
+            "sigma2": sigma2,
+            "nu2": nu2,
+            "psi_sigma2": psi_sigma2,
+            "psi_nu2": psi_nu2,
+            "riesz_rep": rr,
+        }
         return element_dict
 
-    def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                         search_mode, n_iter_randomized_search):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
+    def _nuisance_tuning(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
 
         if scoring_methods is None:
-            scoring_methods = {'ml_l': None,
-                               'ml_m': None,
-                               'ml_g': None}
+            scoring_methods = {"ml_l": None, "ml_m": None, "ml_g": None}
 
         train_inds = [train_index for (train_index, _) in smpls]
-        l_tune_res = _dml_tune(y, x, train_inds,
-                               self._learner['ml_l'], param_grids['ml_l'], scoring_methods['ml_l'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        m_tune_res = _dml_tune(d, x, train_inds,
-                               self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        l_tune_res = _dml_tune(
+            y,
+            x,
+            train_inds,
+            self._learner["ml_l"],
+            param_grids["ml_l"],
+            scoring_methods["ml_l"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        m_tune_res = _dml_tune(
+            d,
+            x,
+            train_inds,
+            self._learner["ml_m"],
+            param_grids["ml_m"],
+            scoring_methods["ml_m"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
 
         l_best_params = [xx.best_params_ for xx in l_tune_res]
         m_best_params = [xx.best_params_ for xx in m_tune_res]
 
         # an ML model for g is obtained for the IV-type score and callable scores
-        if 'ml_g' in self._learner:
+        if "ml_g" in self._learner:
             # construct an initial theta estimate from the tuned models using the partialling out score
             l_hat = np.full_like(y, np.nan)
             m_hat = np.full_like(d, np.nan)
@@ -319,25 +333,27 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
             psi_a = -np.multiply(d - m_hat, d - m_hat)
             psi_b = np.multiply(d - m_hat, y - l_hat)
             theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
-            g_tune_res = _dml_tune(y - theta_initial*d, x, train_inds,
-                                   self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'],
-                                   n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+            g_tune_res = _dml_tune(
+                y - theta_initial * d,
+                x,
+                train_inds,
+                self._learner["ml_g"],
+                param_grids["ml_g"],
+                scoring_methods["ml_g"],
+                n_folds_tune,
+                n_jobs_cv,
+                search_mode,
+                n_iter_randomized_search,
+            )
 
             g_best_params = [xx.best_params_ for xx in g_tune_res]
-            params = {'ml_l': l_best_params,
-                      'ml_m': m_best_params,
-                      'ml_g': g_best_params}
-            tune_res = {'l_tune': l_tune_res,
-                        'm_tune': m_tune_res,
-                        'g_tune': g_tune_res}
+            params = {"ml_l": l_best_params, "ml_m": m_best_params, "ml_g": g_best_params}
+            tune_res = {"l_tune": l_tune_res, "m_tune": m_tune_res, "g_tune": g_tune_res}
         else:
-            params = {'ml_l': l_best_params,
-                      'ml_m': m_best_params}
-            tune_res = {'l_tune': l_tune_res,
-                        'm_tune': m_tune_res}
+            params = {"ml_l": l_best_params, "ml_m": m_best_params}
+            tune_res = {"l_tune": l_tune_res, "m_tune": m_tune_res}
 
-        res = {'params': params,
-               'tune_res': tune_res}
+        res = {"params": params, "tune_res": tune_res}
 
         return res
 
@@ -364,11 +380,11 @@ def cate(self, basis, is_gate=False, **kwargs):
             Best linear Predictor model.
         """
         if self._dml_data.n_treat > 1:
-            raise NotImplementedError('Only implemented for single treatment. ' +
-                                      f'Number of treatments is {str(self._dml_data.n_treat)}.')
+            raise NotImplementedError(
+                "Only implemented for single treatment. " + f"Number of treatments is {str(self._dml_data.n_treat)}."
+            )
         if self.n_rep != 1:
-            raise NotImplementedError('Only implemented for one repetition. ' +
-                                      f'Number of repetitions is {str(self.n_rep)}.')
+            raise NotImplementedError("Only implemented for one repetition. " + f"Number of repetitions is {str(self.n_rep)}.")
 
         Y_tilde, D_tilde = self._partial_out()
 
@@ -402,17 +418,18 @@ def gate(self, groups, **kwargs):
         """
 
         if not isinstance(groups, pd.DataFrame):
-            raise TypeError('Groups must be of DataFrame type. '
-                            f'Groups of type {str(type(groups))} was passed.')
+            raise TypeError(f"Groups must be of DataFrame type. Groups of type {str(type(groups))} was passed.")
         if not all(groups.dtypes == bool) or all(groups.dtypes == int):
             if groups.shape[1] == 1:
-                groups = pd.get_dummies(groups, prefix='Group', prefix_sep='_')
+                groups = pd.get_dummies(groups, prefix="Group", prefix_sep="_")
             else:
-                raise TypeError('Columns of groups must be of bool type or int type (dummy coded). '
-                                'Alternatively, groups should only contain one column.')
+                raise TypeError(
+                    "Columns of groups must be of bool type or int type (dummy coded). "
+                    "Alternatively, groups should only contain one column."
+                )
 
         if any(groups.sum(0) <= 5):
-            warnings.warn('At least one group effect is estimated with less than 6 observations.')
+            warnings.warn("At least one group effect is estimated with less than 6 observations.")
 
         model = self.cate(groups, is_gate=True, **kwargs)
         return model
@@ -430,7 +447,7 @@ def _partial_out(self):
             The residual of the regression of D on X.
         """
         if self.predictions is None:
-            raise ValueError('predictions are None. Call .fit(store_predictions=True) to store the predictions.')
+            raise ValueError("predictions are None. Call .fit(store_predictions=True) to store the predictions.")
 
         y = self._dml_data.y.reshape(-1, 1)
         d = self._dml_data.d.reshape(-1, 1)
diff --git a/doubleml/plm/tests/_utils_pliv_manual.py b/doubleml/plm/tests/_utils_pliv_manual.py
index 32ff7cdb6..90b1e6892 100644
--- a/doubleml/plm/tests/_utils_pliv_manual.py
+++ b/doubleml/plm/tests/_utils_pliv_manual.py
@@ -1,13 +1,27 @@
 import numpy as np
 from sklearn.base import clone
 
-from ...tests._utils_boot import boot_manual, draw_weights
 from ...tests._utils import fit_predict, tune_grid_search
+from ...tests._utils_boot import boot_manual, draw_weights
 
 
-def fit_pliv(y, x, d, z,
-             learner_l, learner_m, learner_r, learner_g, all_smpls, score,
-             n_rep=1, l_params=None, m_params=None, r_params=None, g_params=None):
+def fit_pliv(
+    y,
+    x,
+    d,
+    z,
+    learner_l,
+    learner_m,
+    learner_r,
+    learner_g,
+    all_smpls,
+    score,
+    n_rep=1,
+    l_params=None,
+    m_params=None,
+    r_params=None,
+    g_params=None,
+):
     n_obs = len(y)
 
     thetas = np.zeros(n_rep)
@@ -19,33 +33,38 @@ def fit_pliv(y, x, d, z,
     for i_rep in range(n_rep):
         smpls = all_smpls[i_rep]
 
-        fit_g = (score == 'IV-type') | callable(score)
-        l_hat, m_hat, r_hat, g_hat = fit_nuisance_pliv(y, x, d, z,
-                                                       learner_l, learner_m, learner_r, learner_g,
-                                                       smpls, fit_g,
-                                                       l_params, m_params, r_params, g_params)
+        fit_g = (score == "IV-type") | callable(score)
+        l_hat, m_hat, r_hat, g_hat = fit_nuisance_pliv(
+            y, x, d, z, learner_l, learner_m, learner_r, learner_g, smpls, fit_g, l_params, m_params, r_params, g_params
+        )
 
         all_l_hat.append(l_hat)
         all_m_hat.append(m_hat)
         all_r_hat.append(r_hat)
         all_g_hat.append(g_hat)
 
-        thetas[i_rep], ses[i_rep] = pliv_dml2(y, x, d, z,
-                                              l_hat, m_hat, r_hat, g_hat,
-                                              smpls, score)
+        thetas[i_rep], ses[i_rep] = pliv_dml2(y, x, d, z, l_hat, m_hat, r_hat, g_hat, smpls, score)
 
     theta = np.median(thetas)
     se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs)
 
-    res = {'theta': theta, 'se': se,
-           'thetas': thetas, 'ses': ses,
-           'all_l_hat': all_l_hat, 'all_m_hat': all_m_hat, 'all_r_hat': all_r_hat, 'all_g_hat': all_g_hat}
+    res = {
+        "theta": theta,
+        "se": se,
+        "thetas": thetas,
+        "ses": ses,
+        "all_l_hat": all_l_hat,
+        "all_m_hat": all_m_hat,
+        "all_r_hat": all_r_hat,
+        "all_g_hat": all_g_hat,
+    }
 
     return res
 
 
-def fit_nuisance_pliv(y, x, d, z, ml_l, ml_m, ml_r, ml_g, smpls, fit_g=True,
-                      l_params=None, m_params=None, r_params=None, g_params=None):
+def fit_nuisance_pliv(
+    y, x, d, z, ml_l, ml_m, ml_r, ml_g, smpls, fit_g=True, l_params=None, m_params=None, r_params=None, g_params=None
+):
     l_hat = fit_predict(y, x, ml_l, l_params, smpls)
 
     m_hat = fit_predict(z, x, ml_m, m_params, smpls)
@@ -53,8 +72,7 @@ def fit_nuisance_pliv(y, x, d, z, ml_l, ml_m, ml_r, ml_g, smpls, fit_g=True,
     r_hat = fit_predict(d, x, ml_r, r_params, smpls)
 
     if fit_g:
-        y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, _ = compute_pliv_residuals(
-            y, d, z, l_hat, m_hat, r_hat, [], smpls)
+        y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, _ = compute_pliv_residuals(y, d, z, l_hat, m_hat, r_hat, [], smpls)
         psi_a = -np.multiply(d_minus_r_hat, z_minus_m_hat)
         psi_b = np.multiply(z_minus_m_hat, y_minus_l_hat)
         theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
@@ -67,8 +85,23 @@ def fit_nuisance_pliv(y, x, d, z, ml_l, ml_m, ml_r, ml_g, smpls, fit_g=True,
     return l_hat, m_hat, r_hat, g_hat
 
 
-def tune_nuisance_pliv(y, x, d, z, ml_l, ml_m, ml_r, ml_g, smpls, n_folds_tune,
-                       param_grid_l, param_grid_m, param_grid_r, param_grid_g, tune_g=True):
+def tune_nuisance_pliv(
+    y,
+    x,
+    d,
+    z,
+    ml_l,
+    ml_m,
+    ml_r,
+    ml_g,
+    smpls,
+    n_folds_tune,
+    param_grid_l,
+    param_grid_m,
+    param_grid_r,
+    param_grid_g,
+    tune_g=True,
+):
     l_tune_res = tune_grid_search(y, x, ml_l, smpls, param_grid_l, n_folds_tune)
 
     m_tune_res = tune_grid_search(z, x, ml_m, smpls, param_grid_m, n_folds_tune)
@@ -87,7 +120,7 @@ def tune_nuisance_pliv(y, x, d, z, ml_l, ml_m, ml_r, ml_g, smpls, n_folds_tune,
         psi_b = np.multiply(z - m_hat, y - l_hat)
         theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
 
-        g_tune_res = tune_grid_search(y - theta_initial*d, x, ml_g, smpls, param_grid_g, n_folds_tune)
+        g_tune_res = tune_grid_search(y - theta_initial * d, x, ml_g, smpls, param_grid_g, n_folds_tune)
         g_best_params = [xx.best_params_ for xx in g_tune_res]
     else:
         g_best_params = []
@@ -100,10 +133,10 @@ def tune_nuisance_pliv(y, x, d, z, ml_l, ml_m, ml_r, ml_g, smpls, n_folds_tune,
 
 
 def compute_pliv_residuals(y, d, z, l_hat, m_hat, r_hat, g_hat, smpls):
-    y_minus_l_hat = np.full_like(y, np.nan, dtype='float64')
-    z_minus_m_hat = np.full_like(y, np.nan, dtype='float64')
-    d_minus_r_hat = np.full_like(d, np.nan, dtype='float64')
-    y_minus_g_hat = np.full_like(y, np.nan, dtype='float64')
+    y_minus_l_hat = np.full_like(y, np.nan, dtype="float64")
+    z_minus_m_hat = np.full_like(y, np.nan, dtype="float64")
+    d_minus_r_hat = np.full_like(d, np.nan, dtype="float64")
+    y_minus_g_hat = np.full_like(y, np.nan, dtype="float64")
     for idx, (_, test_index) in enumerate(smpls):
         y_minus_l_hat[test_index] = y[test_index] - l_hat[idx]
         z_minus_m_hat[test_index] = z[test_index] - m_hat[idx]
@@ -117,7 +150,8 @@ def compute_pliv_residuals(y, d, z, l_hat, m_hat, r_hat, g_hat, smpls):
 def pliv_dml2(y, x, d, z, l_hat, m_hat, r_hat, g_hat, smpls, score):
     n_obs = len(y)
     y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, y_minus_g_hat = compute_pliv_residuals(
-        y, d, z, l_hat, m_hat, r_hat, g_hat, smpls)
+        y, d, z, l_hat, m_hat, r_hat, g_hat, smpls
+    )
     theta_hat = pliv_orth(y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, y_minus_g_hat, d, score)
     se = np.sqrt(var_pliv(theta_hat, d, y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, y_minus_g_hat, score, n_obs))
 
@@ -125,30 +159,54 @@ def pliv_dml2(y, x, d, z, l_hat, m_hat, r_hat, g_hat, smpls, score):
 
 
 def var_pliv(theta, d, y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, y_minus_g_hat, score, n_obs):
-    if score == 'partialling out':
-        var = 1/n_obs * 1/np.power(np.mean(np.multiply(z_minus_m_hat, d_minus_r_hat)), 2) * \
-            np.mean(np.power(np.multiply(y_minus_l_hat - d_minus_r_hat*theta, z_minus_m_hat), 2))
+    if score == "partialling out":
+        var = (
+            1
+            / n_obs
+            * 1
+            / np.power(np.mean(np.multiply(z_minus_m_hat, d_minus_r_hat)), 2)
+            * np.mean(np.power(np.multiply(y_minus_l_hat - d_minus_r_hat * theta, z_minus_m_hat), 2))
+        )
     else:
-        assert score == 'IV-type'
-        var = 1/n_obs * 1/np.power(np.mean(np.multiply(z_minus_m_hat, d)), 2) * \
-            np.mean(np.power(np.multiply(y_minus_g_hat - d*theta, z_minus_m_hat), 2))
+        assert score == "IV-type"
+        var = (
+            1
+            / n_obs
+            * 1
+            / np.power(np.mean(np.multiply(z_minus_m_hat, d)), 2)
+            * np.mean(np.power(np.multiply(y_minus_g_hat - d * theta, z_minus_m_hat), 2))
+        )
 
     return var
 
 
 def pliv_orth(y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, y_minus_g_hat, d, score):
-    if score == 'partialling out':
-        res = np.mean(np.multiply(z_minus_m_hat, y_minus_l_hat))/np.mean(np.multiply(z_minus_m_hat, d_minus_r_hat))
+    if score == "partialling out":
+        res = np.mean(np.multiply(z_minus_m_hat, y_minus_l_hat)) / np.mean(np.multiply(z_minus_m_hat, d_minus_r_hat))
     else:
-        assert score == 'IV-type'
-        res = np.mean(np.multiply(z_minus_m_hat, y_minus_g_hat))/np.mean(np.multiply(z_minus_m_hat, d))
+        assert score == "IV-type"
+        res = np.mean(np.multiply(z_minus_m_hat, y_minus_g_hat)) / np.mean(np.multiply(z_minus_m_hat, d))
 
     return res
 
 
-def boot_pliv(y, d, z, thetas, ses, all_l_hat, all_m_hat, all_r_hat, all_g_hat,
-              all_smpls, score, bootstrap, n_rep_boot,
-              n_rep=1, apply_cross_fitting=True):
+def boot_pliv(
+    y,
+    d,
+    z,
+    thetas,
+    ses,
+    all_l_hat,
+    all_m_hat,
+    all_r_hat,
+    all_g_hat,
+    all_smpls,
+    score,
+    bootstrap,
+    n_rep_boot,
+    n_rep=1,
+    apply_cross_fitting=True,
+):
     all_boot_t_stat = list()
     for i_rep in range(n_rep):
         smpls = all_smpls[i_rep]
@@ -159,8 +217,21 @@ def boot_pliv(y, d, z, thetas, ses, all_l_hat, all_m_hat, all_r_hat, all_g_hat,
             n_obs = len(test_index)
         weights = draw_weights(bootstrap, n_rep_boot, n_obs)
         boot_t_stat = boot_pliv_single_split(
-            thetas[i_rep], y, d, z, all_l_hat[i_rep], all_m_hat[i_rep], all_r_hat[i_rep], all_g_hat[i_rep], smpls,
-            score, ses[i_rep], weights, n_rep_boot, apply_cross_fitting)
+            thetas[i_rep],
+            y,
+            d,
+            z,
+            all_l_hat[i_rep],
+            all_m_hat[i_rep],
+            all_r_hat[i_rep],
+            all_g_hat[i_rep],
+            smpls,
+            score,
+            ses[i_rep],
+            weights,
+            n_rep_boot,
+            apply_cross_fitting,
+        )
         all_boot_t_stat.append(boot_t_stat)
 
     boot_t_stat = np.hstack(all_boot_t_stat)
@@ -168,30 +239,32 @@ def boot_pliv(y, d, z, thetas, ses, all_l_hat, all_m_hat, all_r_hat, all_g_hat,
     return boot_t_stat
 
 
-def boot_pliv_single_split(theta, y, d, z, l_hat, m_hat, r_hat, g_hat,
-                           smpls, score, se, weights, n_rep_boot, apply_cross_fitting):
+def boot_pliv_single_split(
+    theta, y, d, z, l_hat, m_hat, r_hat, g_hat, smpls, score, se, weights, n_rep_boot, apply_cross_fitting
+):
     y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, y_minus_g_hat = compute_pliv_residuals(
-        y, d, z, l_hat, m_hat, r_hat, g_hat, smpls)
+        y, d, z, l_hat, m_hat, r_hat, g_hat, smpls
+    )
 
     if apply_cross_fitting:
-        if score == 'partialling out':
+        if score == "partialling out":
             J = np.mean(-np.multiply(z_minus_m_hat, d_minus_r_hat))
         else:
-            assert score == 'IV-type'
+            assert score == "IV-type"
             J = np.mean(-np.multiply(z_minus_m_hat, d))
     else:
         test_index = smpls[0][1]
-        if score == 'partialling out':
+        if score == "partialling out":
             J = np.mean(-np.multiply(z_minus_m_hat[test_index], d_minus_r_hat[test_index]))
         else:
-            assert score == 'IV-type'
+            assert score == "IV-type"
             J = np.mean(-np.multiply(z_minus_m_hat[test_index], d[test_index]))
 
-    if score == 'partialling out':
-        psi = np.multiply(y_minus_l_hat - d_minus_r_hat*theta, z_minus_m_hat)
+    if score == "partialling out":
+        psi = np.multiply(y_minus_l_hat - d_minus_r_hat * theta, z_minus_m_hat)
     else:
-        assert score == 'IV-type'
-        psi = np.multiply(y_minus_g_hat - d*theta, z_minus_m_hat)
+        assert score == "IV-type"
+        psi = np.multiply(y_minus_g_hat - d * theta, z_minus_m_hat)
 
     boot_t_stat = boot_manual(psi, J, smpls, se, weights, n_rep_boot, apply_cross_fitting)
 
diff --git a/doubleml/plm/tests/_utils_pliv_partial_x_manual.py b/doubleml/plm/tests/_utils_pliv_partial_x_manual.py
index 089b317ee..6fa64a14d 100644
--- a/doubleml/plm/tests/_utils_pliv_partial_x_manual.py
+++ b/doubleml/plm/tests/_utils_pliv_partial_x_manual.py
@@ -1,13 +1,13 @@
 import numpy as np
 from sklearn.linear_model import LinearRegression
 
-from ...tests._utils_boot import boot_manual, draw_weights
 from ...tests._utils import fit_predict, tune_grid_search
+from ...tests._utils_boot import boot_manual, draw_weights
 
 
-def fit_pliv_partial_x(y, x, d, z,
-                       learner_l, learner_m, learner_r, all_smpls, score,
-                       n_rep=1, l_params=None, m_params=None, r_params=None):
+def fit_pliv_partial_x(
+    y, x, d, z, learner_l, learner_m, learner_r, all_smpls, score, n_rep=1, l_params=None, m_params=None, r_params=None
+):
     n_obs = len(y)
 
     thetas = np.zeros(n_rep)
@@ -18,25 +18,28 @@ def fit_pliv_partial_x(y, x, d, z,
     for i_rep in range(n_rep):
         smpls = all_smpls[i_rep]
 
-        l_hat, m_hat, r_hat = fit_nuisance_pliv_partial_x(y, x, d, z,
-                                                          learner_l, learner_m, learner_r,
-                                                          smpls,
-                                                          l_params, m_params, r_params)
+        l_hat, m_hat, r_hat = fit_nuisance_pliv_partial_x(
+            y, x, d, z, learner_l, learner_m, learner_r, smpls, l_params, m_params, r_params
+        )
 
         all_l_hat.append(l_hat)
         all_m_hat.append(m_hat)
         all_r_hat.append(r_hat)
 
-        thetas[i_rep], ses[i_rep] = pliv_partial_x_dml2(y, x, d, z,
-                                                        l_hat, m_hat, r_hat,
-                                                        smpls, score)
+        thetas[i_rep], ses[i_rep] = pliv_partial_x_dml2(y, x, d, z, l_hat, m_hat, r_hat, smpls, score)
 
     theta = np.median(thetas)
     se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs)
 
-    res = {'theta': theta, 'se': se,
-           'thetas': thetas, 'ses': ses,
-           'all_l_hat': all_l_hat, 'all_m_hat': all_m_hat, 'all_r_hat': all_r_hat}
+    res = {
+        "theta": theta,
+        "se": se,
+        "thetas": thetas,
+        "ses": ses,
+        "all_l_hat": all_l_hat,
+        "all_m_hat": all_m_hat,
+        "all_r_hat": all_r_hat,
+    }
 
     return res
 
@@ -54,8 +57,8 @@ def fit_nuisance_pliv_partial_x(y, x, d, z, ml_l, ml_m, ml_r, smpls, l_params=No
 
     r_hat = fit_predict(d, x, ml_r, r_params, smpls)
 
-    r_hat_array = np.zeros_like(d, dtype='float64')
-    m_hat_array = np.zeros_like(z, dtype='float64')
+    r_hat_array = np.zeros_like(d, dtype="float64")
+    m_hat_array = np.zeros_like(z, dtype="float64")
     for idx, (_, test_index) in enumerate(smpls):
         r_hat_array[test_index] = r_hat[idx]
         for i_instr in range(z.shape[1]):
@@ -66,8 +69,7 @@ def fit_nuisance_pliv_partial_x(y, x, d, z, ml_l, ml_m, ml_r, smpls, l_params=No
     return l_hat, r_hat, r_hat_tilde
 
 
-def tune_nuisance_pliv_partial_x(y, x, d, z, ml_l, ml_m, ml_r, smpls, n_folds_tune,
-                                 param_grid_l, param_grid_m, param_grid_r):
+def tune_nuisance_pliv_partial_x(y, x, d, z, ml_l, ml_m, ml_r, smpls, n_folds_tune, param_grid_l, param_grid_m, param_grid_r):
     l_tune_res = tune_grid_search(y, x, ml_l, smpls, param_grid_l, n_folds_tune)
 
     m_tune_res = list()
@@ -84,8 +86,8 @@ def tune_nuisance_pliv_partial_x(y, x, d, z, ml_l, ml_m, ml_r, smpls, n_folds_tu
 
 
 def compute_pliv_partial_x_residuals(y, d, l_hat, r_hat, smpls):
-    u_hat = np.full_like(y, np.nan, dtype='float64')
-    w_hat = np.full_like(y, np.nan, dtype='float64')
+    u_hat = np.full_like(y, np.nan, dtype="float64")
+    w_hat = np.full_like(y, np.nan, dtype="float64")
     for idx, (_, test_index) in enumerate(smpls):
         u_hat[test_index] = y[test_index] - l_hat[idx]
         w_hat[test_index] = d[test_index] - r_hat[idx]
@@ -103,30 +105,46 @@ def pliv_partial_x_dml2(y, x, d, z, l_hat, r_hat, r_hat_tilde, smpls, score):
 
 
 def var_pliv_partial_x(theta, d, u_hat, w_hat, r_hat_tilde, score, n_obs):
-    assert score == 'partialling out'
-    var = 1/n_obs * 1/np.power(np.mean(np.multiply(r_hat_tilde, w_hat)), 2) * \
-        np.mean(np.power(np.multiply(u_hat - w_hat*theta, r_hat_tilde), 2))
+    assert score == "partialling out"
+    var = (
+        1
+        / n_obs
+        * 1
+        / np.power(np.mean(np.multiply(r_hat_tilde, w_hat)), 2)
+        * np.mean(np.power(np.multiply(u_hat - w_hat * theta, r_hat_tilde), 2))
+    )
 
     return var
 
 
 def pliv_partial_x_orth(u_hat, w_hat, r_hat_tilde, d, score):
-    assert score == 'partialling out'
-    res = np.mean(np.multiply(r_hat_tilde, u_hat))/np.mean(np.multiply(r_hat_tilde, w_hat))
+    assert score == "partialling out"
+    res = np.mean(np.multiply(r_hat_tilde, u_hat)) / np.mean(np.multiply(r_hat_tilde, w_hat))
 
     return res
 
 
-def boot_pliv_partial_x(y, d, z, thetas, ses, all_l_hat, all_m_hat, all_r_hat,
-                        all_smpls, score, bootstrap, n_rep_boot,
-                        n_rep=1):
+def boot_pliv_partial_x(
+    y, d, z, thetas, ses, all_l_hat, all_m_hat, all_r_hat, all_smpls, score, bootstrap, n_rep_boot, n_rep=1
+):
     all_boot_t_stat = list()
     for i_rep in range(n_rep):
         n_obs = len(y)
         weights = draw_weights(bootstrap, n_rep_boot, n_obs)
         boot_t_stat = boot_pliv_partial_x_single_split(
-            thetas[i_rep], y, d, z, all_l_hat[i_rep], all_m_hat[i_rep], all_r_hat[i_rep], all_smpls[i_rep],
-            score, ses[i_rep], weights, n_rep_boot)
+            thetas[i_rep],
+            y,
+            d,
+            z,
+            all_l_hat[i_rep],
+            all_m_hat[i_rep],
+            all_r_hat[i_rep],
+            all_smpls[i_rep],
+            score,
+            ses[i_rep],
+            weights,
+            n_rep_boot,
+        )
         all_boot_t_stat.append(boot_t_stat)
 
     boot_t_stat = np.hstack(all_boot_t_stat)
@@ -134,14 +152,13 @@ def boot_pliv_partial_x(y, d, z, thetas, ses, all_l_hat, all_m_hat, all_r_hat,
     return boot_t_stat
 
 
-def boot_pliv_partial_x_single_split(theta, y, d, z, l_hat, r_hat, r_hat_tilde,
-                                     smpls, score, se, weights, n_rep_boot):
-    assert score == 'partialling out'
+def boot_pliv_partial_x_single_split(theta, y, d, z, l_hat, r_hat, r_hat_tilde, smpls, score, se, weights, n_rep_boot):
+    assert score == "partialling out"
     u_hat, w_hat = compute_pliv_partial_x_residuals(y, d, l_hat, r_hat, smpls)
 
     J = np.mean(-np.multiply(r_hat_tilde, w_hat))
 
-    psi = np.multiply(u_hat - w_hat*theta, r_hat_tilde)
+    psi = np.multiply(u_hat - w_hat * theta, r_hat_tilde)
 
     boot_t_stat = boot_manual(psi, J, smpls, se, weights, n_rep_boot)
 
diff --git a/doubleml/plm/tests/_utils_pliv_partial_xz_manual.py b/doubleml/plm/tests/_utils_pliv_partial_xz_manual.py
index b82c84df7..4f5419c00 100644
--- a/doubleml/plm/tests/_utils_pliv_partial_xz_manual.py
+++ b/doubleml/plm/tests/_utils_pliv_partial_xz_manual.py
@@ -1,13 +1,13 @@
 import numpy as np
-from sklearn.model_selection import KFold, GridSearchCV
+from sklearn.model_selection import GridSearchCV, KFold
 
-from ...tests._utils_boot import boot_manual, draw_weights
 from ...tests._utils import fit_predict, tune_grid_search
+from ...tests._utils_boot import boot_manual, draw_weights
 
 
-def fit_pliv_partial_xz(y, x, d, z,
-                        learner_l, learner_m, learner_r, all_smpls, score,
-                        n_rep=1, l_params=None, m_params=None, r_params=None):
+def fit_pliv_partial_xz(
+    y, x, d, z, learner_l, learner_m, learner_r, all_smpls, score, n_rep=1, l_params=None, m_params=None, r_params=None
+):
     n_obs = len(y)
 
     thetas = np.zeros(n_rep)
@@ -18,25 +18,28 @@ def fit_pliv_partial_xz(y, x, d, z,
     for i_rep in range(n_rep):
         smpls = all_smpls[i_rep]
 
-        l_hat, m_hat, r_hat = fit_nuisance_pliv_partial_xz(y, x, d, z,
-                                                           learner_l, learner_m, learner_r,
-                                                           smpls,
-                                                           l_params, m_params, r_params)
+        l_hat, m_hat, r_hat = fit_nuisance_pliv_partial_xz(
+            y, x, d, z, learner_l, learner_m, learner_r, smpls, l_params, m_params, r_params
+        )
 
         all_l_hat.append(l_hat)
         all_m_hat.append(m_hat)
         all_r_hat.append(r_hat)
 
-        thetas[i_rep], ses[i_rep] = pliv_partial_xz_dml2(y, x, d, z,
-                                                         l_hat, m_hat, r_hat,
-                                                         smpls, score)
+        thetas[i_rep], ses[i_rep] = pliv_partial_xz_dml2(y, x, d, z, l_hat, m_hat, r_hat, smpls, score)
 
     theta = np.median(thetas)
     se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs)
 
-    res = {'theta': theta, 'se': se,
-           'thetas': thetas, 'ses': ses,
-           'all_l_hat': all_l_hat, 'all_m_hat': all_m_hat, 'all_r_hat': all_r_hat}
+    res = {
+        "theta": theta,
+        "se": se,
+        "thetas": thetas,
+        "ses": ses,
+        "all_l_hat": all_l_hat,
+        "all_m_hat": all_m_hat,
+        "all_r_hat": all_r_hat,
+    }
 
     return res
 
@@ -63,8 +66,7 @@ def fit_nuisance_pliv_partial_xz(y, x, d, z, ml_l, ml_m, ml_r, smpls, l_params=N
     return l_hat, m_hat, m_hat_tilde
 
 
-def tune_nuisance_pliv_partial_xz(y, x, d, z, ml_l, ml_m, ml_r, smpls, n_folds_tune,
-                                  param_grid_l, param_grid_m, param_grid_r):
+def tune_nuisance_pliv_partial_xz(y, x, d, z, ml_l, ml_m, ml_r, smpls, n_folds_tune, param_grid_l, param_grid_m, param_grid_r):
     l_tune_res = tune_grid_search(y, x, ml_l, smpls, param_grid_l, n_folds_tune)
 
     xz = np.hstack((x, z))
@@ -74,8 +76,7 @@ def tune_nuisance_pliv_partial_xz(y, x, d, z, ml_l, ml_m, ml_r, smpls, n_folds_t
     for idx, (train_index, _) in enumerate(smpls):
         m_hat = m_tune_res[idx].predict(xz[train_index, :])
         r_tune_resampling = KFold(n_splits=n_folds_tune, shuffle=True)
-        r_grid_search = GridSearchCV(ml_r, param_grid_r,
-                                     cv=r_tune_resampling)
+        r_grid_search = GridSearchCV(ml_r, param_grid_r, cv=r_tune_resampling)
         r_tune_res[idx] = r_grid_search.fit(x[train_index, :], m_hat)
 
     l_best_params = [xx.best_params_ for xx in l_tune_res]
@@ -86,9 +87,9 @@ def tune_nuisance_pliv_partial_xz(y, x, d, z, ml_l, ml_m, ml_r, smpls, n_folds_t
 
 
 def compute_pliv_partial_xz_residuals(y, d, l_hat, m_hat, m_hat_tilde, smpls):
-    u_hat = np.full_like(y, np.nan, dtype='float64')
-    v_hat = np.full_like(y, np.nan, dtype='float64')
-    w_hat = np.full_like(y, np.nan, dtype='float64')
+    u_hat = np.full_like(y, np.nan, dtype="float64")
+    v_hat = np.full_like(y, np.nan, dtype="float64")
+    w_hat = np.full_like(y, np.nan, dtype="float64")
     for idx, (_, test_index) in enumerate(smpls):
         u_hat[test_index] = y[test_index] - l_hat[idx]
         v_hat[test_index] = m_hat[idx] - m_hat_tilde[idx]
@@ -107,30 +108,46 @@ def pliv_partial_xz_dml2(y, x, d, z, l_hat, m_hat, m_hat_tilde, smpls, score):
 
 
 def var_pliv_partial_xz(theta, d, u_hat, v_hat, w_hat, score, n_obs):
-    assert score == 'partialling out'
-    var = 1/n_obs * 1/np.power(np.mean(np.multiply(v_hat, w_hat)), 2) * \
-        np.mean(np.power(np.multiply(u_hat - w_hat*theta, v_hat), 2))
+    assert score == "partialling out"
+    var = (
+        1
+        / n_obs
+        * 1
+        / np.power(np.mean(np.multiply(v_hat, w_hat)), 2)
+        * np.mean(np.power(np.multiply(u_hat - w_hat * theta, v_hat), 2))
+    )
 
     return var
 
 
 def pliv_partial_xz_orth(u_hat, v_hat, w_hat, d, score):
-    assert score == 'partialling out'
-    res = np.mean(np.multiply(v_hat, u_hat))/np.mean(np.multiply(v_hat, w_hat))
+    assert score == "partialling out"
+    res = np.mean(np.multiply(v_hat, u_hat)) / np.mean(np.multiply(v_hat, w_hat))
 
     return res
 
 
-def boot_pliv_partial_xz(y, d, z, thetas, ses, all_l_hat, all_m_hat, all_r_hat,
-                         all_smpls, score, bootstrap, n_rep_boot,
-                         n_rep=1):
+def boot_pliv_partial_xz(
+    y, d, z, thetas, ses, all_l_hat, all_m_hat, all_r_hat, all_smpls, score, bootstrap, n_rep_boot, n_rep=1
+):
     all_boot_t_stat = list()
     for i_rep in range(n_rep):
         n_obs = len(y)
         weights = draw_weights(bootstrap, n_rep_boot, n_obs)
         boot_t_stat = boot_pliv_partial_xz_single_split(
-            thetas[i_rep], y, d, z, all_l_hat[i_rep], all_m_hat[i_rep], all_r_hat[i_rep], all_smpls[i_rep],
-            score, ses[i_rep], weights, n_rep_boot)
+            thetas[i_rep],
+            y,
+            d,
+            z,
+            all_l_hat[i_rep],
+            all_m_hat[i_rep],
+            all_r_hat[i_rep],
+            all_smpls[i_rep],
+            score,
+            ses[i_rep],
+            weights,
+            n_rep_boot,
+        )
         all_boot_t_stat.append(boot_t_stat)
 
     boot_t_stat = np.hstack(all_boot_t_stat)
@@ -138,14 +155,13 @@ def boot_pliv_partial_xz(y, d, z, thetas, ses, all_l_hat, all_m_hat, all_r_hat,
     return boot_t_stat
 
 
-def boot_pliv_partial_xz_single_split(theta, y, d, z, l_hat, m_hat, m_hat_tilde,
-                                      smpls, score, se, weights, n_rep_boot):
-    assert score == 'partialling out'
+def boot_pliv_partial_xz_single_split(theta, y, d, z, l_hat, m_hat, m_hat_tilde, smpls, score, se, weights, n_rep_boot):
+    assert score == "partialling out"
     u_hat, v_hat, w_hat = compute_pliv_partial_xz_residuals(y, d, l_hat, m_hat, m_hat_tilde, smpls)
 
     J = np.mean(-np.multiply(v_hat, w_hat))
 
-    psi = np.multiply(u_hat - w_hat*theta, v_hat)
+    psi = np.multiply(u_hat - w_hat * theta, v_hat)
 
     boot_t_stat = boot_manual(psi, J, smpls, se, weights, n_rep_boot)
 
diff --git a/doubleml/plm/tests/_utils_pliv_partial_z_manual.py b/doubleml/plm/tests/_utils_pliv_partial_z_manual.py
index 631905d67..839ddeea9 100644
--- a/doubleml/plm/tests/_utils_pliv_partial_z_manual.py
+++ b/doubleml/plm/tests/_utils_pliv_partial_z_manual.py
@@ -1,12 +1,10 @@
 import numpy as np
 
-from ...tests._utils_boot import boot_manual, draw_weights
 from ...tests._utils import fit_predict, tune_grid_search
+from ...tests._utils_boot import boot_manual, draw_weights
 
 
-def fit_pliv_partial_z(y, x, d, z,
-                       learner_r, all_smpls, score,
-                       n_rep=1, r_params=None):
+def fit_pliv_partial_z(y, x, d, z, learner_r, all_smpls, score, n_rep=1, r_params=None):
     n_obs = len(y)
 
     thetas = np.zeros(n_rep)
@@ -15,23 +13,16 @@ def fit_pliv_partial_z(y, x, d, z,
     for i_rep in range(n_rep):
         smpls = all_smpls[i_rep]
 
-        r_hat = fit_nuisance_pliv_partial_z(y, x, d, z,
-                                            learner_r,
-                                            smpls,
-                                            r_params)
+        r_hat = fit_nuisance_pliv_partial_z(y, x, d, z, learner_r, smpls, r_params)
 
         all_r_hat.append(r_hat)
 
-        thetas[i_rep], ses[i_rep] = pliv_partial_z_dml2(y, x, d, z,
-                                                        r_hat,
-                                                        smpls, score)
+        thetas[i_rep], ses[i_rep] = pliv_partial_z_dml2(y, x, d, z, r_hat, smpls, score)
 
     theta = np.median(thetas)
     se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs)
 
-    res = {'theta': theta, 'se': se,
-           'thetas': thetas, 'ses': ses,
-           'all_r_hat': all_r_hat}
+    res = {"theta": theta, "se": se, "thetas": thetas, "ses": ses, "all_r_hat": all_r_hat}
 
     return res
 
@@ -53,7 +44,7 @@ def tune_nuisance_pliv_partial_z(y, x, d, z, ml_r, smpls, n_folds_tune, param_gr
 
 
 def compute_pliv_partial_z_residuals(y, r_hat, smpls):
-    r_hat_array = np.full_like(y, np.nan, dtype='float64')
+    r_hat_array = np.full_like(y, np.nan, dtype="float64")
     for idx, (_, test_index) in enumerate(smpls):
         r_hat_array[test_index] = r_hat[idx]
     return r_hat_array
@@ -69,30 +60,27 @@ def pliv_partial_z_dml2(y, x, d, z, r_hat, smpls, score):
 
 
 def var_pliv_partial_z(theta, r_hat, y, d, score, n_obs):
-    assert score == 'partialling out'
-    var = 1/n_obs * 1/np.power(np.mean(np.multiply(r_hat, d)), 2) * \
-        np.mean(np.power(np.multiply(y - d*theta, r_hat), 2))
+    assert score == "partialling out"
+    var = 1 / n_obs * 1 / np.power(np.mean(np.multiply(r_hat, d)), 2) * np.mean(np.power(np.multiply(y - d * theta, r_hat), 2))
 
     return var
 
 
 def pliv_partial_z_orth(r_hat, y, d, score):
-    assert score == 'partialling out'
-    res = np.mean(np.multiply(r_hat, y))/np.mean(np.multiply(r_hat, d))
+    assert score == "partialling out"
+    res = np.mean(np.multiply(r_hat, y)) / np.mean(np.multiply(r_hat, d))
 
     return res
 
 
-def boot_pliv_partial_z(y, d, z, thetas, ses, all_r_hat,
-                        all_smpls, score, bootstrap, n_rep_boot,
-                        n_rep=1):
+def boot_pliv_partial_z(y, d, z, thetas, ses, all_r_hat, all_smpls, score, bootstrap, n_rep_boot, n_rep=1):
     all_boot_t_stat = list()
     for i_rep in range(n_rep):
         n_obs = len(y)
         weights = draw_weights(bootstrap, n_rep_boot, n_obs)
         boot_t_stat = boot_pliv_partial_z_single_split(
-            thetas[i_rep], y, d, z, all_r_hat[i_rep], all_smpls[i_rep],
-            score, ses[i_rep], weights, n_rep_boot)
+            thetas[i_rep], y, d, z, all_r_hat[i_rep], all_smpls[i_rep], score, ses[i_rep], weights, n_rep_boot
+        )
         all_boot_t_stat.append(boot_t_stat)
 
     boot_t_stat = np.hstack(all_boot_t_stat)
@@ -100,14 +88,13 @@ def boot_pliv_partial_z(y, d, z, thetas, ses, all_r_hat,
     return boot_t_stat
 
 
-def boot_pliv_partial_z_single_split(theta, y, d, z, r_hat,
-                                     smpls, score, se, weights, n_rep_boot):
-    assert score == 'partialling out'
+def boot_pliv_partial_z_single_split(theta, y, d, z, r_hat, smpls, score, se, weights, n_rep_boot):
+    assert score == "partialling out"
     r_hat_array = compute_pliv_partial_z_residuals(y, r_hat, smpls)
 
     J = np.mean(-np.multiply(r_hat_array, d))
 
-    psi = np.multiply(y - d*theta, r_hat_array)
+    psi = np.multiply(y - d * theta, r_hat_array)
 
     boot_t_stat = boot_manual(psi, J, smpls, se, weights, n_rep_boot)
 
diff --git a/doubleml/plm/tests/_utils_plr_manual.py b/doubleml/plm/tests/_utils_plr_manual.py
index 745de5219..41572289c 100644
--- a/doubleml/plm/tests/_utils_plr_manual.py
+++ b/doubleml/plm/tests/_utils_plr_manual.py
@@ -2,13 +2,25 @@
 import scipy
 from sklearn.base import clone, is_classifier
 
-from ...tests._utils_boot import boot_manual, draw_weights
 from ...tests._utils import fit_predict, fit_predict_proba, tune_grid_search
+from ...tests._utils_boot import boot_manual, draw_weights
 
 
-def fit_plr_multitreat(y, x, d, learner_l, learner_m, learner_g, all_smpls, score,
-                       n_rep=1, l_params=None, m_params=None, g_params=None,
-                       use_other_treat_as_covariate=True):
+def fit_plr_multitreat(
+    y,
+    x,
+    d,
+    learner_l,
+    learner_m,
+    learner_g,
+    all_smpls,
+    score,
+    n_rep=1,
+    l_params=None,
+    m_params=None,
+    g_params=None,
+    use_other_treat_as_covariate=True,
+):
     n_obs = len(y)
     n_d = d.shape[1]
 
@@ -32,10 +44,8 @@ def fit_plr_multitreat(y, x, d, learner_l, learner_m, learner_g, all_smpls, scor
                 xd = x
 
             l_hat, m_hat, g_hat, thetas_this_rep[i_d], ses_this_rep[i_d] = fit_plr_single_split(
-                y, xd, d[:, i_d],
-                learner_l, learner_m, learner_g,
-                smpls, score,
-                l_params, m_params, g_params)
+                y, xd, d[:, i_d], learner_l, learner_m, learner_g, smpls, score, l_params, m_params, g_params
+            )
             all_l_hat_this_rep.append(l_hat)
             all_m_hat_this_rep.append(m_hat)
             all_g_hat_this_rep.append(g_hat)
@@ -54,15 +64,20 @@ def fit_plr_multitreat(y, x, d, learner_l, learner_m, learner_g, all_smpls, scor
         theta[i_d] = np.median(theta_vec)
         se[i_d] = np.sqrt(np.median(np.power(se_vec, 2) * n_obs + np.power(theta_vec - theta[i_d], 2)) / n_obs)
 
-    res = {'theta': theta, 'se': se,
-           'thetas': thetas, 'ses': ses,
-           'all_l_hat': all_l_hat, 'all_m_hat': all_m_hat, 'all_g_hat': all_g_hat}
+    res = {
+        "theta": theta,
+        "se": se,
+        "thetas": thetas,
+        "ses": ses,
+        "all_l_hat": all_l_hat,
+        "all_m_hat": all_m_hat,
+        "all_g_hat": all_g_hat,
+    }
 
     return res
 
 
-def fit_plr(y, x, d, learner_l, learner_m, learner_g, all_smpls, score,
-            n_rep=1, l_params=None, m_params=None, g_params=None):
+def fit_plr(y, x, d, learner_l, learner_m, learner_g, all_smpls, score, n_rep=1, l_params=None, m_params=None, g_params=None):
     n_obs = len(y)
 
     thetas = np.zeros(n_rep)
@@ -73,10 +88,8 @@ def fit_plr(y, x, d, learner_l, learner_m, learner_g, all_smpls, score,
     for i_rep in range(n_rep):
         smpls = all_smpls[i_rep]
         l_hat, m_hat, g_hat, thetas[i_rep], ses[i_rep] = fit_plr_single_split(
-            y, x, d,
-            learner_l, learner_m, learner_g,
-            smpls, score,
-            l_params, m_params, g_params)
+            y, x, d, learner_l, learner_m, learner_g, smpls, score, l_params, m_params, g_params
+        )
         all_l_hat.append(l_hat)
         all_m_hat.append(m_hat)
         all_g_hat.append(g_hat)
@@ -84,35 +97,36 @@ def fit_plr(y, x, d, learner_l, learner_m, learner_g, all_smpls, score,
     theta = np.median(thetas)
     se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs)
 
-    res = {'theta': theta, 'se': se,
-           'thetas': thetas, 'ses': ses,
-           'all_l_hat': all_l_hat, 'all_m_hat': all_m_hat, 'all_g_hat': all_g_hat}
+    res = {
+        "theta": theta,
+        "se": se,
+        "thetas": thetas,
+        "ses": ses,
+        "all_l_hat": all_l_hat,
+        "all_m_hat": all_m_hat,
+        "all_g_hat": all_g_hat,
+    }
 
     return res
 
 
-def fit_plr_single_split(y, x, d, learner_l, learner_m, learner_g, smpls, score,
-                         l_params=None, m_params=None, g_params=None):
-    fit_g = (score == 'IV-type') | callable(score)
+def fit_plr_single_split(y, x, d, learner_l, learner_m, learner_g, smpls, score, l_params=None, m_params=None, g_params=None):
+    fit_g = (score == "IV-type") | callable(score)
     if is_classifier(learner_m):
-        l_hat, m_hat, g_hat = fit_nuisance_plr_classifier(y, x, d,
-                                                          learner_l, learner_m, learner_g,
-                                                          smpls, fit_g,
-                                                          l_params, m_params, g_params)
+        l_hat, m_hat, g_hat = fit_nuisance_plr_classifier(
+            y, x, d, learner_l, learner_m, learner_g, smpls, fit_g, l_params, m_params, g_params
+        )
     else:
-        l_hat, m_hat, g_hat = fit_nuisance_plr(y, x, d,
-                                               learner_l, learner_m, learner_g,
-                                               smpls, fit_g,
-                                               l_params, m_params, g_params)
+        l_hat, m_hat, g_hat = fit_nuisance_plr(
+            y, x, d, learner_l, learner_m, learner_g, smpls, fit_g, l_params, m_params, g_params
+        )
 
-    theta, se = plr_dml2(y, x, d, l_hat, m_hat, g_hat,
-                         smpls, score)
+    theta, se = plr_dml2(y, x, d, l_hat, m_hat, g_hat, smpls, score)
 
     return l_hat, m_hat, g_hat, theta.item(), se.item()
 
 
-def fit_nuisance_plr(y, x, d, learner_l, learner_m, learner_g, smpls, fit_g=True,
-                     l_params=None, m_params=None, g_params=None):
+def fit_nuisance_plr(y, x, d, learner_l, learner_m, learner_g, smpls, fit_g=True, l_params=None, m_params=None, g_params=None):
     ml_l = clone(learner_l)
     l_hat = fit_predict(y, x, ml_l, l_params, smpls)
 
@@ -126,15 +140,16 @@ def fit_nuisance_plr(y, x, d, learner_l, learner_m, learner_g, smpls, fit_g=True
         theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
 
         ml_g = clone(learner_g)
-        g_hat = fit_predict(y - theta_initial*d, x, ml_g, g_params, smpls)
+        g_hat = fit_predict(y - theta_initial * d, x, ml_g, g_params, smpls)
     else:
         g_hat = []
 
     return l_hat, m_hat, g_hat
 
 
-def fit_nuisance_plr_classifier(y, x, d, learner_l, learner_m, learner_g, smpls, fit_g=True,
-                                l_params=None, m_params=None, g_params=None):
+def fit_nuisance_plr_classifier(
+    y, x, d, learner_l, learner_m, learner_g, smpls, fit_g=True, l_params=None, m_params=None, g_params=None
+):
     ml_l = clone(learner_l)
     l_hat = fit_predict(y, x, ml_l, l_params, smpls)
 
@@ -148,7 +163,7 @@ def fit_nuisance_plr_classifier(y, x, d, learner_l, learner_m, learner_g, smpls,
         theta_initial = -np.mean(psi_b) / np.mean(psi_a)
 
         ml_g = clone(learner_g)
-        g_hat = fit_predict(y - theta_initial*d, x, ml_g, g_params, smpls)
+        g_hat = fit_predict(y - theta_initial * d, x, ml_g, g_params, smpls)
     else:
         g_hat = []
 
@@ -170,7 +185,7 @@ def tune_nuisance_plr(y, x, d, ml_l, ml_m, ml_g, smpls, n_folds_tune, param_grid
         psi_b = np.multiply(d - m_hat, y - l_hat)
         theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
 
-        g_tune_res = tune_grid_search(y - theta_initial*d, x, ml_g, smpls, param_grid_g, n_folds_tune)
+        g_tune_res = tune_grid_search(y - theta_initial * d, x, ml_g, smpls, param_grid_g, n_folds_tune)
         g_best_params = [xx.best_params_ for xx in g_tune_res]
     else:
         g_best_params = []
@@ -182,9 +197,9 @@ def tune_nuisance_plr(y, x, d, ml_l, ml_m, ml_g, smpls, n_folds_tune, param_grid
 
 
 def compute_plr_residuals(y, d, l_hat, m_hat, g_hat, smpls):
-    y_minus_l_hat = np.full_like(y, np.nan, dtype='float64')
-    d_minus_m_hat = np.full_like(d, np.nan, dtype='float64')
-    y_minus_g_hat = np.full_like(y, np.nan, dtype='float64')
+    y_minus_l_hat = np.full_like(y, np.nan, dtype="float64")
+    d_minus_m_hat = np.full_like(d, np.nan, dtype="float64")
+    y_minus_g_hat = np.full_like(y, np.nan, dtype="float64")
     for idx, (_, test_index) in enumerate(smpls):
         y_minus_l_hat[test_index] = y[test_index] - l_hat[idx]
         if len(g_hat) > 0:
@@ -203,30 +218,52 @@ def plr_dml2(y, x, d, l_hat, m_hat, g_hat, smpls, score):
 
 
 def var_plr(theta, d, y_minus_l_hat, d_minus_m_hat, y_minus_g_hat, score, n_obs):
-    if score == 'partialling out':
-        var = 1/n_obs * 1/np.power(np.mean(np.multiply(d_minus_m_hat, d_minus_m_hat)), 2) * \
-            np.mean(np.power(np.multiply(y_minus_l_hat - d_minus_m_hat*theta, d_minus_m_hat), 2))
+    if score == "partialling out":
+        var = (
+            1
+            / n_obs
+            * 1
+            / np.power(np.mean(np.multiply(d_minus_m_hat, d_minus_m_hat)), 2)
+            * np.mean(np.power(np.multiply(y_minus_l_hat - d_minus_m_hat * theta, d_minus_m_hat), 2))
+        )
     else:
-        assert score == 'IV-type'
-        var = 1/n_obs * 1/np.power(np.mean(np.multiply(d_minus_m_hat, d)), 2) * \
-            np.mean(np.power(np.multiply(y_minus_g_hat - d*theta, d_minus_m_hat), 2))
+        assert score == "IV-type"
+        var = (
+            1
+            / n_obs
+            * 1
+            / np.power(np.mean(np.multiply(d_minus_m_hat, d)), 2)
+            * np.mean(np.power(np.multiply(y_minus_g_hat - d * theta, d_minus_m_hat), 2))
+        )
 
     return var
 
 
 def plr_orth(y_minus_l_hat, d_minus_m_hat, y_minus_g_hat, d, score):
-    if score == 'IV-type':
-        res = np.mean(np.multiply(d_minus_m_hat, y_minus_g_hat))/np.mean(np.multiply(d_minus_m_hat, d))
+    if score == "IV-type":
+        res = np.mean(np.multiply(d_minus_m_hat, y_minus_g_hat)) / np.mean(np.multiply(d_minus_m_hat, d))
     else:
-        assert score == 'partialling out'
+        assert score == "partialling out"
         res = scipy.linalg.lstsq(d_minus_m_hat.reshape(-1, 1), y_minus_l_hat)[0]
 
     return res
 
 
-def boot_plr(y, d, thetas, ses, all_l_hat, all_m_hat, all_g_hat,
-             all_smpls, score, bootstrap, n_rep_boot,
-             n_rep=1, apply_cross_fitting=True):
+def boot_plr(
+    y,
+    d,
+    thetas,
+    ses,
+    all_l_hat,
+    all_m_hat,
+    all_g_hat,
+    all_smpls,
+    score,
+    bootstrap,
+    n_rep_boot,
+    n_rep=1,
+    apply_cross_fitting=True,
+):
     all_boot_t_stat = list()
     for i_rep in range(n_rep):
         smpls = all_smpls[i_rep]
@@ -238,9 +275,19 @@ def boot_plr(y, d, thetas, ses, all_l_hat, all_m_hat, all_g_hat,
         weights = draw_weights(bootstrap, n_rep_boot, n_obs)
 
         boot_t_stat = boot_plr_single_split(
-            thetas[i_rep], y, d, all_l_hat[i_rep], all_m_hat[i_rep], all_g_hat[i_rep], smpls,
-            score, ses[i_rep],
-            weights, n_rep_boot, apply_cross_fitting)
+            thetas[i_rep],
+            y,
+            d,
+            all_l_hat[i_rep],
+            all_m_hat[i_rep],
+            all_g_hat[i_rep],
+            smpls,
+            score,
+            ses[i_rep],
+            weights,
+            n_rep_boot,
+            apply_cross_fitting,
+        )
         all_boot_t_stat.append(boot_t_stat)
 
     # differently for plr because of n_rep_boot and multiple treatmentsa
@@ -249,9 +296,21 @@ def boot_plr(y, d, thetas, ses, all_l_hat, all_m_hat, all_g_hat,
     return boot_t_stat
 
 
-def boot_plr_multitreat(y, d, thetas, ses, all_l_hat, all_m_hat, all_g_hat,
-                        all_smpls, score, bootstrap, n_rep_boot,
-                        n_rep=1, apply_cross_fitting=True):
+def boot_plr_multitreat(
+    y,
+    d,
+    thetas,
+    ses,
+    all_l_hat,
+    all_m_hat,
+    all_g_hat,
+    all_smpls,
+    score,
+    bootstrap,
+    n_rep_boot,
+    n_rep=1,
+    apply_cross_fitting=True,
+):
     n_d = d.shape[1]
     all_boot_t_stat = list()
     for i_rep in range(n_rep):
@@ -266,10 +325,19 @@ def boot_plr_multitreat(y, d, thetas, ses, all_l_hat, all_m_hat, all_g_hat,
         boot_t_stat = np.full((n_d, n_rep_boot), np.nan)
         for i_d in range(n_d):
             boot_t_stat[i_d, :] = boot_plr_single_split(
-                thetas[i_rep][i_d], y, d[:, i_d],
-                all_l_hat[i_rep][i_d], all_m_hat[i_rep][i_d], all_g_hat[i_rep][i_d],
-                smpls, score, ses[i_rep][i_d],
-                weights, n_rep_boot, apply_cross_fitting)
+                thetas[i_rep][i_d],
+                y,
+                d[:, i_d],
+                all_l_hat[i_rep][i_d],
+                all_m_hat[i_rep][i_d],
+                all_g_hat[i_rep][i_d],
+                smpls,
+                score,
+                ses[i_rep][i_d],
+                weights,
+                n_rep_boot,
+                apply_cross_fitting,
+            )
 
         # transpose for shape (n_rep_boot, n_d)
         boot_t_stat = np.transpose(boot_t_stat)
@@ -281,28 +349,27 @@ def boot_plr_multitreat(y, d, thetas, ses, all_l_hat, all_m_hat, all_g_hat,
     return boot_t_stat
 
 
-def boot_plr_single_split(theta, y, d, l_hat, m_hat, g_hat,
-                          smpls, score, se, weights, n_rep, apply_cross_fitting):
+def boot_plr_single_split(theta, y, d, l_hat, m_hat, g_hat, smpls, score, se, weights, n_rep, apply_cross_fitting):
     y_minus_l_hat, d_minus_m_hat, y_minus_g_hat = compute_plr_residuals(y, d, l_hat, m_hat, g_hat, smpls)
 
     if apply_cross_fitting:
-        if score == 'partialling out':
+        if score == "partialling out":
             J = np.mean(-np.multiply(d_minus_m_hat, d_minus_m_hat))
         else:
-            assert score == 'IV-type'
+            assert score == "IV-type"
             J = np.mean(-np.multiply(d_minus_m_hat, d))
     else:
         test_index = smpls[0][1]
-        if score == 'partialling out':
+        if score == "partialling out":
             J = np.mean(-np.multiply(d_minus_m_hat[test_index], d_minus_m_hat[test_index]))
         else:
-            assert score == 'IV-type'
+            assert score == "IV-type"
             J = np.mean(-np.multiply(d_minus_m_hat[test_index], d[test_index]))
 
-    if score == 'partialling out':
+    if score == "partialling out":
         psi = np.multiply(y_minus_l_hat - d_minus_m_hat * theta, d_minus_m_hat)
     else:
-        assert score == 'IV-type'
+        assert score == "IV-type"
         psi = np.multiply(y_minus_g_hat - d * theta, d_minus_m_hat)
 
     boot_t_stat = boot_manual(psi, J, smpls, se, weights, n_rep, apply_cross_fitting)
@@ -322,25 +389,23 @@ def fit_sensitivity_elements_plr(y, d, all_coef, predictions, score, n_rep):
     for i_rep in range(n_rep):
         for i_treat in range(n_treat):
             d_tilde = d[:, i_treat]
-            m_hat = predictions['ml_m'][:, i_rep, i_treat]
+            m_hat = predictions["ml_m"][:, i_rep, i_treat]
             theta = all_coef[i_treat, i_rep]
-            if score == 'partialling out':
-                l_hat = predictions['ml_l'][:, i_rep, i_treat]
-                sigma2_score_element = np.square(y - l_hat - np.multiply(theta, d_tilde-m_hat))
+            if score == "partialling out":
+                l_hat = predictions["ml_l"][:, i_rep, i_treat]
+                sigma2_score_element = np.square(y - l_hat - np.multiply(theta, d_tilde - m_hat))
             else:
-                assert score == 'IV-type'
-                g_hat = predictions['ml_g'][:, i_rep, i_treat]
+                assert score == "IV-type"
+                g_hat = predictions["ml_g"][:, i_rep, i_treat]
                 sigma2_score_element = np.square(y - g_hat - np.multiply(theta, d_tilde))
 
             sigma2[0, i_rep, i_treat] = np.mean(sigma2_score_element)
             psi_sigma2[:, i_rep, i_treat] = sigma2_score_element - sigma2[0, i_rep, i_treat]
 
-            nu2[0, i_rep, i_treat] = np.divide(1.0, np.mean(np.square(d_tilde-m_hat)))
-            psi_nu2[:, i_rep, i_treat] = nu2[0, i_rep, i_treat] - \
-                np.multiply(np.square(d_tilde-m_hat), np.square(nu2[0, i_rep, i_treat]))
+            nu2[0, i_rep, i_treat] = np.divide(1.0, np.mean(np.square(d_tilde - m_hat)))
+            psi_nu2[:, i_rep, i_treat] = nu2[0, i_rep, i_treat] - np.multiply(
+                np.square(d_tilde - m_hat), np.square(nu2[0, i_rep, i_treat])
+            )
 
-    element_dict = {'sigma2': sigma2,
-                    'nu2': nu2,
-                    'psi_sigma2': psi_sigma2,
-                    'psi_nu2': psi_nu2}
+    element_dict = {"sigma2": sigma2, "nu2": nu2, "psi_sigma2": psi_sigma2, "psi_nu2": psi_nu2}
     return element_dict
diff --git a/doubleml/plm/tests/conftest.py b/doubleml/plm/tests/conftest.py
index 925179e14..497d6fc9d 100644
--- a/doubleml/plm/tests/conftest.py
+++ b/doubleml/plm/tests/conftest.py
@@ -1,18 +1,17 @@
 import numpy as np
 import pandas as pd
-
 import pytest
 from scipy.linalg import toeplitz
-
 from sklearn.datasets import make_spd_matrix
-from doubleml.datasets import make_plr_turrell2018, make_pliv_CHS2015
+
+from doubleml.datasets import make_pliv_CHS2015, make_plr_turrell2018
 
 
 def _g(x):
     return np.power(np.sin(x), 2)
 
 
-def _m(x, nu=0., gamma=1.):
+def _m(x, nu=0.0, gamma=1.0):
     return 0.5 / np.pi * (np.sinh(gamma)) / (np.cosh(gamma) - np.cos(x - nu))
 
 
@@ -20,10 +19,7 @@ def _m2(x):
     return np.power(x, 2)
 
 
-@pytest.fixture(scope='session',
-                params=[(500, 10),
-                        (1000, 20),
-                        (1000, 100)])
+@pytest.fixture(scope="session", params=[(500, 10), (1000, 20), (1000, 100)])
 def generate_data1(request):
     n_p = request.param
     np.random.seed(1111)
@@ -38,8 +34,7 @@ def generate_data1(request):
     return data
 
 
-@pytest.fixture(scope='session',
-                params=[(500, 20)])
+@pytest.fixture(scope="session", params=[(500, 20)])
 def generate_data2(request):
     n_p = request.param
     np.random.seed(1111)
@@ -54,8 +49,7 @@ def generate_data2(request):
     return data
 
 
-@pytest.fixture(scope='session',
-                params=[(1000, 20)])
+@pytest.fixture(scope="session", params=[(1000, 20)])
 def generate_data_bivariate(request):
     n_p = request.param
     np.random.seed(1111)
@@ -67,24 +61,44 @@ def generate_data_bivariate(request):
     sigma = make_spd_matrix(p)
 
     # generating data
-    x = np.random.multivariate_normal(np.zeros(p), sigma, size=[n, ])
+    x = np.random.multivariate_normal(
+        np.zeros(p),
+        sigma,
+        size=[
+            n,
+        ],
+    )
     G = _g(np.dot(x, b))
     M0 = _m(np.dot(x, b))
     M1 = _m2(np.dot(x, b))
-    D0 = M0 + np.random.standard_normal(size=[n, ])
-    D1 = M1 + np.random.standard_normal(size=[n, ])
-    y = theta[0] * D0 + theta[1] * D1 + G + np.random.standard_normal(size=[n, ])
+    D0 = M0 + np.random.standard_normal(
+        size=[
+            n,
+        ]
+    )
+    D1 = M1 + np.random.standard_normal(
+        size=[
+            n,
+        ]
+    )
+    y = (
+        theta[0] * D0
+        + theta[1] * D1
+        + G
+        + np.random.standard_normal(
+            size=[
+                n,
+            ]
+        )
+    )
     d = np.column_stack((D0, D1))
-    column_names = [f'X{i + 1}' for i in np.arange(p)] + ['y'] + \
-                   [f'd{i + 1}' for i in np.arange(2)]
-    data = pd.DataFrame(np.column_stack((x, y, d)),
-                        columns=column_names)
+    column_names = [f"X{i + 1}" for i in np.arange(p)] + ["y"] + [f"d{i + 1}" for i in np.arange(2)]
+    data = pd.DataFrame(np.column_stack((x, y, d)), columns=column_names)
 
     return data
 
 
-@pytest.fixture(scope='session',
-                params=[(1000, 20)])
+@pytest.fixture(scope="session", params=[(1000, 20)])
 def generate_data_toeplitz(request, betamax=4, decay=0.99, threshold=0, noisevar=10):
     n_p = request.param
     np.random.seed(3141)
@@ -101,20 +115,29 @@ def generate_data_toeplitz(request, betamax=4, decay=0.99, threshold=0, noisevar
     mu = np.zeros(p)
 
     # generating data
-    x = np.random.multivariate_normal(mu, sigma, size=[n, ])
-    y = np.dot(x, beta) + np.random.normal(loc=0.0, scale=np.sqrt(noisevar), size=[n, ])
+    x = np.random.multivariate_normal(
+        mu,
+        sigma,
+        size=[
+            n,
+        ],
+    )
+    y = np.dot(x, beta) + np.random.normal(
+        loc=0.0,
+        scale=np.sqrt(noisevar),
+        size=[
+            n,
+        ],
+    )
     d = x[:, cols_treatment]
     x = np.delete(x, cols_treatment, axis=1)
-    column_names = [f'X{i + 1}' for i in np.arange(x.shape[1])] + \
-                   ['y'] + [f'd{i + 1}' for i in np.arange(len(cols_treatment))]
-    data = pd.DataFrame(np.column_stack((x, y, d)),
-                        columns=column_names)
+    column_names = [f"X{i + 1}" for i in np.arange(x.shape[1])] + ["y"] + [f"d{i + 1}" for i in np.arange(len(cols_treatment))]
+    data = pd.DataFrame(np.column_stack((x, y, d)), columns=column_names)
 
     return data
 
 
-@pytest.fixture(scope='session',
-                params=[(1000, 20)])
+@pytest.fixture(scope="session", params=[(1000, 20)])
 def generate_data_iv(request):
     n_p = request.param
     np.random.seed(1111)
@@ -129,14 +152,13 @@ def generate_data_iv(request):
     return data
 
 
-@pytest.fixture(scope='session',
-                params=[500])
+@pytest.fixture(scope="session", params=[500])
 def generate_data_pliv_partialXZ(request):
     n_p = request.param
     np.random.seed(1111)
     # setting parameters
     n = n_p
-    theta = 1.
+    theta = 1.0
 
     # generating data
     data = make_pliv_CHS2015(n, alpha=theta)
@@ -144,14 +166,13 @@ def generate_data_pliv_partialXZ(request):
     return data
 
 
-@pytest.fixture(scope='session',
-                params=[500])
+@pytest.fixture(scope="session", params=[500])
 def generate_data_pliv_partialX(request):
     n_p = request.param
     np.random.seed(1111)
     # setting parameters
     n = n_p
-    theta = 1.
+    theta = 1.0
 
     # generating data
     data = make_pliv_CHS2015(n, alpha=theta, dim_z=5)
@@ -159,14 +180,13 @@ def generate_data_pliv_partialX(request):
     return data
 
 
-@pytest.fixture(scope='session',
-                params=[500])
+@pytest.fixture(scope="session", params=[500])
 def generate_data_pliv_partialZ(request):
     n_p = request.param
     np.random.seed(1111)
     # setting parameters
     n = n_p
-    theta = 1.
+    theta = 1.0
 
     # generating data
     data = make_data_pliv_partialZ(n, alpha=theta, dim_x=5)
@@ -174,26 +194,38 @@ def generate_data_pliv_partialZ(request):
     return data
 
 
-def make_data_pliv_partialZ(n_obs, alpha=1., dim_x=5, dim_z=150):
-    xx = np.random.multivariate_normal(np.zeros(2),
-                                       np.array([[1., 0.6], [0.6, 1.]]),
-                                       size=[n_obs, ])
+def make_data_pliv_partialZ(n_obs, alpha=1.0, dim_x=5, dim_z=150):
+    xx = np.random.multivariate_normal(
+        np.zeros(2),
+        np.array([[1.0, 0.6], [0.6, 1.0]]),
+        size=[
+            n_obs,
+        ],
+    )
     epsilon = xx[:, 0]
     u = xx[:, 1]
 
     sigma = toeplitz([np.power(0.5, k) for k in range(1, dim_x + 1)])
-    x = np.random.multivariate_normal(np.zeros(dim_x),
-                                      sigma,
-                                      size=[n_obs, ])
+    x = np.random.multivariate_normal(
+        np.zeros(dim_x),
+        sigma,
+        size=[
+            n_obs,
+        ],
+    )
 
     I_z = np.eye(dim_z)
-    xi = np.random.multivariate_normal(np.zeros(dim_z),
-                                       0.25 * I_z,
-                                       size=[n_obs, ])
-
-    beta = [1 / (k ** 2) for k in range(1, dim_x + 1)]
+    xi = np.random.multivariate_normal(
+        np.zeros(dim_z),
+        0.25 * I_z,
+        size=[
+            n_obs,
+        ],
+    )
+
+    beta = [1 / (k**2) for k in range(1, dim_x + 1)]
     gamma = beta
-    delta = [1 / (k ** 2) for k in range(1, dim_z + 1)]
+    delta = [1 / (k**2) for k in range(1, dim_z + 1)]
 
     I_x = np.eye(dim_x)
     Pi = np.hstack((I_x, np.zeros((dim_x, dim_z - dim_x))))
@@ -202,9 +234,8 @@ def make_data_pliv_partialZ(n_obs, alpha=1., dim_x=5, dim_z=150):
     d = np.dot(x, gamma) + np.dot(z, delta) + u
     y = alpha * d + np.dot(x, beta) + epsilon
 
-    x_cols = [f'X{i + 1}' for i in np.arange(dim_x)]
-    z_cols = [f'Z{i + 1}' for i in np.arange(dim_z)]
-    data = pd.DataFrame(np.column_stack((x, y, d, z)),
-                        columns=x_cols + ['y', 'd'] + z_cols)
+    x_cols = [f"X{i + 1}" for i in np.arange(dim_x)]
+    z_cols = [f"Z{i + 1}" for i in np.arange(dim_z)]
+    data = pd.DataFrame(np.column_stack((x, y, d, z)), columns=x_cols + ["y", "d"] + z_cols)
 
     return data
diff --git a/doubleml/plm/tests/test_pliv.py b/doubleml/plm/tests/test_pliv.py
index ce640aadb..7ee248f25 100644
--- a/doubleml/plm/tests/test_pliv.py
+++ b/doubleml/plm/tests/test_pliv.py
@@ -1,41 +1,38 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-
-from sklearn.linear_model import LinearRegression, Lasso
 from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import Lasso, LinearRegression
 
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_pliv_manual import fit_pliv, boot_pliv
+from ._utils_pliv_manual import boot_pliv, fit_pliv
 
 
-@pytest.fixture(scope='module',
-                params=[RandomForestRegressor(max_depth=2, n_estimators=10),
-                        LinearRegression(),
-                        Lasso(alpha=0.1)])
+@pytest.fixture(
+    scope="module", params=[RandomForestRegressor(max_depth=2, n_estimators=10), LinearRegression(), Lasso(alpha=0.1)]
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['partialling out', 'IV-type'])
+@pytest.fixture(scope="module", params=["partialling out", "IV-type"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_pliv_fixture(generate_data_iv, learner, score):
-    boot_methods = ['Bayes', 'normal', 'wild']
+    boot_methods = ["Bayes", "normal", "wild"]
     n_folds = 2
     n_rep_boot = 503
 
     # collect data
     data = generate_data_iv
-    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
 
     # Set machine learning methods for l, m, r & g
     ml_l = clone(learner)
@@ -44,71 +41,75 @@ def dml_pliv_fixture(generate_data_iv, learner, score):
     ml_g = clone(learner)
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, 'Z1')
-    if score == 'partialling out':
-        dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data,
-                                        ml_l, ml_m, ml_r,
-                                        n_folds=n_folds,
-                                        score=score)
+    obj_dml_data = dml.DoubleMLData(data, "y", ["d"], x_cols, "Z1")
+    if score == "partialling out":
+        dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, ml_l, ml_m, ml_r, n_folds=n_folds, score=score)
     else:
-        assert score == 'IV-type'
-        dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data,
-                                        ml_l, ml_m, ml_r, ml_g,
-                                        n_folds=n_folds,
-                                        score=score)
+        assert score == "IV-type"
+        dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, ml_l, ml_m, ml_r, ml_g, n_folds=n_folds, score=score)
 
     dml_pliv_obj.fit()
 
     np.random.seed(3141)
-    y = data['y'].values
+    y = data["y"].values
     x = data.loc[:, x_cols].values
-    d = data['d'].values
-    z = data['Z1'].values
+    d = data["d"].values
+    z = data["Z1"].values
     n_obs = len(y)
     all_smpls = draw_smpls(n_obs, n_folds)
 
-    res_manual = fit_pliv(y, x, d, z,
-                          clone(learner), clone(learner), clone(learner), clone(learner),
-                          all_smpls, score)
+    res_manual = fit_pliv(y, x, d, z, clone(learner), clone(learner), clone(learner), clone(learner), all_smpls, score)
 
-    res_dict = {'coef': dml_pliv_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'se': dml_pliv_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_dict = {
+        "coef": dml_pliv_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "se": dml_pliv_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_pliv(y, d, z, res_manual['thetas'], res_manual['ses'],
-                                res_manual['all_l_hat'], res_manual['all_m_hat'], res_manual['all_r_hat'],
-                                res_manual['all_g_hat'],
-                                all_smpls, score, bootstrap, n_rep_boot)
+        boot_t_stat = boot_pliv(
+            y,
+            d,
+            z,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_l_hat"],
+            res_manual["all_m_hat"],
+            res_manual["all_r_hat"],
+            res_manual["all_g_hat"],
+            all_smpls,
+            score,
+            bootstrap,
+            n_rep_boot,
+        )
 
         np.random.seed(3141)
         dml_pliv_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_pliv_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_pliv_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_pliv_coef(dml_pliv_fixture):
-    assert math.isclose(dml_pliv_fixture['coef'],
-                        dml_pliv_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_pliv_fixture["coef"], dml_pliv_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_pliv_se(dml_pliv_fixture):
-    assert math.isclose(dml_pliv_fixture['se'],
-                        dml_pliv_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_pliv_fixture["se"], dml_pliv_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_pliv_boot(dml_pliv_fixture):
-    for bootstrap in dml_pliv_fixture['boot_methods']:
-        assert np.allclose(dml_pliv_fixture['boot_t_stat' + bootstrap],
-                           dml_pliv_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_pliv_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_pliv_fixture["boot_t_stat" + bootstrap],
+            dml_pliv_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
diff --git a/doubleml/plm/tests/test_pliv_external_predictions.py b/doubleml/plm/tests/test_pliv_external_predictions.py
index 4b53f6f70..bc8a1e8a4 100644
--- a/doubleml/plm/tests/test_pliv_external_predictions.py
+++ b/doubleml/plm/tests/test_pliv_external_predictions.py
@@ -1,8 +1,10 @@
+import math
+
 import numpy as np
 import pytest
-import math
 from sklearn.linear_model import LinearRegression
-from doubleml import DoubleMLPLIV, DoubleMLData
+
+from doubleml import DoubleMLData, DoubleMLPLIV
 from doubleml.datasets import make_pliv_CHS2015
 from doubleml.utils import DMLDummyRegressor
 
@@ -31,9 +33,7 @@ def adapted_doubleml_fixture(score, n_rep, dim_z):
     else:
         ext_predictions = {"d": {}}
 
-        data = make_pliv_CHS2015(
-            n_obs=500, dim_x=20, alpha=0.5, dim_z=dim_z, return_type="DataFrame"
-        )
+        data = make_pliv_CHS2015(n_obs=500, dim_x=20, alpha=0.5, dim_z=dim_z, return_type="DataFrame")
 
         np.random.seed(3141)
 
@@ -72,9 +72,7 @@ def adapted_doubleml_fixture(score, n_rep, dim_z):
                 ml_m_key = "ml_m_" + "Z" + str(instr + 1)
                 ext_predictions["d"][ml_m_key] = dml_pliv.predictions[ml_m_key][:, :, 0]
 
-        dml_pliv_ext = DoubleMLPLIV(
-            ml_m=DMLDummyRegressor(), ml_l=DMLDummyRegressor(), ml_r=DMLDummyRegressor(), **kwargs
-        )
+        dml_pliv_ext = DoubleMLPLIV(ml_m=DMLDummyRegressor(), ml_l=DMLDummyRegressor(), ml_r=DMLDummyRegressor(), **kwargs)
 
         np.random.seed(3141)
         dml_pliv_ext.fit(external_predictions=ext_predictions)
diff --git a/doubleml/plm/tests/test_pliv_partial_x.py b/doubleml/plm/tests/test_pliv_partial_x.py
index d8593500b..7fa922402 100644
--- a/doubleml/plm/tests/test_pliv_partial_x.py
+++ b/doubleml/plm/tests/test_pliv_partial_x.py
@@ -1,32 +1,29 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-
 from sklearn.linear_model import Lasso
 
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_pliv_partial_x_manual import fit_pliv_partial_x, boot_pliv_partial_x
+from ._utils_pliv_partial_x_manual import boot_pliv_partial_x, fit_pliv_partial_x
 
 
-@pytest.fixture(scope='module',
-                params=[Lasso(alpha=0.1)])
+@pytest.fixture(scope="module", params=[Lasso(alpha=0.1)])
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['partialling out'])
+@pytest.fixture(scope="module", params=["partialling out"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_pliv_partial_x_fixture(generate_data_pliv_partialX, learner, score):
-    boot_methods = ['Bayes', 'normal', 'wild']
+    boot_methods = ["Bayes", "normal", "wild"]
     n_folds = 2
     n_rep_boot = 503
 
@@ -39,9 +36,7 @@ def dml_pliv_partial_x_fixture(generate_data_pliv_partialX, learner, score):
     ml_r = clone(learner)
 
     np.random.seed(3141)
-    dml_pliv_obj = dml.DoubleMLPLIV._partialX(obj_dml_data,
-                                              ml_l, ml_m, ml_r,
-                                              n_folds=n_folds)
+    dml_pliv_obj = dml.DoubleMLPLIV._partialX(obj_dml_data, ml_l, ml_m, ml_r, n_folds=n_folds)
 
     dml_pliv_obj.fit(store_predictions=True)
 
@@ -53,46 +48,57 @@ def dml_pliv_partial_x_fixture(generate_data_pliv_partialX, learner, score):
     n_obs = len(y)
     all_smpls = draw_smpls(n_obs, n_folds)
 
-    res_manual = fit_pliv_partial_x(y, x, d, z,
-                                    clone(learner), clone(learner), clone(learner),
-                                    all_smpls, score)
+    res_manual = fit_pliv_partial_x(y, x, d, z, clone(learner), clone(learner), clone(learner), all_smpls, score)
 
-    res_dict = {'coef': dml_pliv_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'se': dml_pliv_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_dict = {
+        "coef": dml_pliv_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "se": dml_pliv_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_pliv_partial_x(y, d, z, res_manual['thetas'], res_manual['ses'],
-                                          res_manual['all_l_hat'], res_manual['all_m_hat'],
-                                          res_manual['all_r_hat'],
-                                          all_smpls, score, bootstrap, n_rep_boot)
+        boot_t_stat = boot_pliv_partial_x(
+            y,
+            d,
+            z,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_l_hat"],
+            res_manual["all_m_hat"],
+            res_manual["all_r_hat"],
+            all_smpls,
+            score,
+            bootstrap,
+            n_rep_boot,
+        )
 
         np.random.seed(3141)
         dml_pliv_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
 
-        res_dict['boot_t_stat' + bootstrap] = dml_pliv_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_pliv_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 def test_dml_pliv_coef(dml_pliv_partial_x_fixture):
-    assert math.isclose(dml_pliv_partial_x_fixture['coef'],
-                        dml_pliv_partial_x_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_pliv_partial_x_fixture["coef"], dml_pliv_partial_x_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4
+    )
 
 
 def test_dml_pliv_se(dml_pliv_partial_x_fixture):
-    assert math.isclose(dml_pliv_partial_x_fixture['se'],
-                        dml_pliv_partial_x_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_pliv_partial_x_fixture["se"], dml_pliv_partial_x_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 def test_dml_pliv_boot(dml_pliv_partial_x_fixture):
-    for bootstrap in dml_pliv_partial_x_fixture['boot_methods']:
-        assert np.allclose(dml_pliv_partial_x_fixture['boot_t_stat' + bootstrap],
-                           dml_pliv_partial_x_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_pliv_partial_x_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_pliv_partial_x_fixture["boot_t_stat" + bootstrap],
+            dml_pliv_partial_x_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
diff --git a/doubleml/plm/tests/test_pliv_partial_x_tune.py b/doubleml/plm/tests/test_pliv_partial_x_tune.py
index bd91f60c0..2526c9e45 100644
--- a/doubleml/plm/tests/test_pliv_partial_x_tune.py
+++ b/doubleml/plm/tests/test_pliv_partial_x_tune.py
@@ -1,66 +1,57 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-
-from sklearn.linear_model import ElasticNet
 from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import ElasticNet
 
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_pliv_partial_x_manual import fit_pliv_partial_x, boot_pliv_partial_x, tune_nuisance_pliv_partial_x
+from ._utils_pliv_partial_x_manual import boot_pliv_partial_x, fit_pliv_partial_x, tune_nuisance_pliv_partial_x
 
 
-@pytest.fixture(scope='module',
-                params=[ElasticNet()])
+@pytest.fixture(scope="module", params=[ElasticNet()])
 def learner_l(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[ElasticNet()])
+@pytest.fixture(scope="module", params=[ElasticNet()])
 def learner_m(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[ElasticNet()])
+@pytest.fixture(scope="module", params=[ElasticNet()])
 def learner_r(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['partialling out'])
+@pytest.fixture(scope="module", params=["partialling out"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def tune_on_folds(request):
     return request.param
 
 
 def get_par_grid(learner):
     if learner.__class__ == RandomForestRegressor:
-        par_grid = {'n_estimators': [5, 10, 20]}
+        par_grid = {"n_estimators": [5, 10, 20]}
     else:
         assert learner.__class__ == ElasticNet
-        par_grid = {'l1_ratio': [.1, .5, .7, .9, .95, .99, 1], 'alpha': np.linspace(0.05, 1., 7)}
+        par_grid = {"l1_ratio": [0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1], "alpha": np.linspace(0.05, 1.0, 7)}
     return par_grid
 
 
-@pytest.fixture(scope='module')
-def dml_pliv_partial_x_fixture(generate_data_pliv_partialX, learner_l, learner_m, learner_r, score,
-                               tune_on_folds):
-    par_grid = {'ml_l': get_par_grid(learner_l),
-                'ml_m': get_par_grid(learner_m),
-                'ml_r': get_par_grid(learner_r)}
+@pytest.fixture(scope="module")
+def dml_pliv_partial_x_fixture(generate_data_pliv_partialX, learner_l, learner_m, learner_r, score, tune_on_folds):
+    par_grid = {"ml_l": get_par_grid(learner_l), "ml_m": get_par_grid(learner_m), "ml_r": get_par_grid(learner_r)}
     n_folds_tune = 4
 
-    boot_methods = ['Bayes', 'normal', 'wild']
+    boot_methods = ["Bayes", "normal", "wild"]
     n_folds = 2
     n_rep_boot = 503
 
@@ -73,9 +64,7 @@ def dml_pliv_partial_x_fixture(generate_data_pliv_partialX, learner_l, learner_m
     ml_r = clone(learner_r)
 
     np.random.seed(3141)
-    dml_pliv_obj = dml.DoubleMLPLIV._partialX(obj_dml_data,
-                                              ml_l, ml_m, ml_r,
-                                              n_folds=n_folds)
+    dml_pliv_obj = dml.DoubleMLPLIV._partialX(obj_dml_data, ml_l, ml_m, ml_r, n_folds=n_folds)
 
     # tune hyperparameters
     _ = dml_pliv_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune)
@@ -92,68 +81,103 @@ def dml_pliv_partial_x_fixture(generate_data_pliv_partialX, learner_l, learner_m
     smpls = all_smpls[0]
 
     if tune_on_folds:
-        l_params, m_params, r_params = tune_nuisance_pliv_partial_x(y, x, d, z,
-                                                                    clone(learner_l),
-                                                                    clone(learner_m),
-                                                                    clone(learner_r),
-                                                                    smpls, n_folds_tune,
-                                                                    par_grid['ml_l'],
-                                                                    par_grid['ml_m'],
-                                                                    par_grid['ml_r'])
+        l_params, m_params, r_params = tune_nuisance_pliv_partial_x(
+            y,
+            x,
+            d,
+            z,
+            clone(learner_l),
+            clone(learner_m),
+            clone(learner_r),
+            smpls,
+            n_folds_tune,
+            par_grid["ml_l"],
+            par_grid["ml_m"],
+            par_grid["ml_r"],
+        )
     else:
         xx = [(np.arange(len(y)), np.array([]))]
-        l_params, m_params, r_params = tune_nuisance_pliv_partial_x(y, x, d, z,
-                                                                    clone(learner_l),
-                                                                    clone(learner_m),
-                                                                    clone(learner_r),
-                                                                    xx, n_folds_tune,
-                                                                    par_grid['ml_l'],
-                                                                    par_grid['ml_m'],
-                                                                    par_grid['ml_r'])
+        l_params, m_params, r_params = tune_nuisance_pliv_partial_x(
+            y,
+            x,
+            d,
+            z,
+            clone(learner_l),
+            clone(learner_m),
+            clone(learner_r),
+            xx,
+            n_folds_tune,
+            par_grid["ml_l"],
+            par_grid["ml_m"],
+            par_grid["ml_r"],
+        )
         l_params = l_params * n_folds
         m_params = [xx * n_folds for xx in m_params]
         r_params = r_params * n_folds
 
-    res_manual = fit_pliv_partial_x(y, x, d, z,
-                                    clone(learner_l), clone(learner_m), clone(learner_r),
-                                    all_smpls, score,
-                                    l_params=l_params, m_params=m_params, r_params=r_params)
-
-    res_dict = {'coef': dml_pliv_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'se': dml_pliv_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_manual = fit_pliv_partial_x(
+        y,
+        x,
+        d,
+        z,
+        clone(learner_l),
+        clone(learner_m),
+        clone(learner_r),
+        all_smpls,
+        score,
+        l_params=l_params,
+        m_params=m_params,
+        r_params=r_params,
+    )
+
+    res_dict = {
+        "coef": dml_pliv_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "se": dml_pliv_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_pliv_partial_x(y, d, z, res_manual['thetas'], res_manual['ses'],
-                                          res_manual['all_l_hat'], res_manual['all_m_hat'],
-                                          res_manual['all_r_hat'],
-                                          all_smpls, score, bootstrap, n_rep_boot)
+        boot_t_stat = boot_pliv_partial_x(
+            y,
+            d,
+            z,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_l_hat"],
+            res_manual["all_m_hat"],
+            res_manual["all_r_hat"],
+            all_smpls,
+            score,
+            bootstrap,
+            n_rep_boot,
+        )
 
         np.random.seed(3141)
         dml_pliv_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_pliv_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_pliv_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 def test_dml_pliv_coef(dml_pliv_partial_x_fixture):
-    assert math.isclose(dml_pliv_partial_x_fixture['coef'],
-                        dml_pliv_partial_x_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_pliv_partial_x_fixture["coef"], dml_pliv_partial_x_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4
+    )
 
 
 def test_dml_pliv_se(dml_pliv_partial_x_fixture):
-    assert math.isclose(dml_pliv_partial_x_fixture['se'],
-                        dml_pliv_partial_x_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_pliv_partial_x_fixture["se"], dml_pliv_partial_x_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 def test_dml_pliv_boot(dml_pliv_partial_x_fixture):
-    for bootstrap in dml_pliv_partial_x_fixture['boot_methods']:
-        assert np.allclose(dml_pliv_partial_x_fixture['boot_t_stat' + bootstrap],
-                           dml_pliv_partial_x_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_pliv_partial_x_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_pliv_partial_x_fixture["boot_t_stat" + bootstrap],
+            dml_pliv_partial_x_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
diff --git a/doubleml/plm/tests/test_pliv_partial_xz.py b/doubleml/plm/tests/test_pliv_partial_xz.py
index 4b14eeb05..8eb357e44 100644
--- a/doubleml/plm/tests/test_pliv_partial_xz.py
+++ b/doubleml/plm/tests/test_pliv_partial_xz.py
@@ -1,32 +1,29 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-
 from sklearn.linear_model import Lasso
 
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_pliv_partial_xz_manual import fit_pliv_partial_xz, boot_pliv_partial_xz
+from ._utils_pliv_partial_xz_manual import boot_pliv_partial_xz, fit_pliv_partial_xz
 
 
-@pytest.fixture(scope='module',
-                params=[Lasso(alpha=0.1)])
+@pytest.fixture(scope="module", params=[Lasso(alpha=0.1)])
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['partialling out'])
+@pytest.fixture(scope="module", params=["partialling out"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_pliv_partial_xz_fixture(generate_data_pliv_partialXZ, learner, score):
-    boot_methods = ['Bayes', 'normal', 'wild']
+    boot_methods = ["Bayes", "normal", "wild"]
     n_folds = 2
     n_rep_boot = 503
 
@@ -39,9 +36,7 @@ def dml_pliv_partial_xz_fixture(generate_data_pliv_partialXZ, learner, score):
     ml_r = clone(learner)
 
     np.random.seed(3141)
-    dml_pliv_obj = dml.DoubleMLPLIV._partialXZ(obj_dml_data,
-                                               ml_l, ml_m, ml_r,
-                                               n_folds=n_folds)
+    dml_pliv_obj = dml.DoubleMLPLIV._partialXZ(obj_dml_data, ml_l, ml_m, ml_r, n_folds=n_folds)
 
     dml_pliv_obj.fit()
 
@@ -53,45 +48,58 @@ def dml_pliv_partial_xz_fixture(generate_data_pliv_partialXZ, learner, score):
     n_obs = len(y)
     all_smpls = draw_smpls(n_obs, n_folds)
 
-    res_manual = fit_pliv_partial_xz(y, x, d, z,
-                                     clone(learner), clone(learner), clone(learner),
-                                     all_smpls, score)
+    res_manual = fit_pliv_partial_xz(y, x, d, z, clone(learner), clone(learner), clone(learner), all_smpls, score)
 
-    res_dict = {'coef': dml_pliv_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'se': dml_pliv_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_dict = {
+        "coef": dml_pliv_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "se": dml_pliv_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_pliv_partial_xz(y, d, z, res_manual['thetas'], res_manual['ses'],
-                                           res_manual['all_l_hat'], res_manual['all_m_hat'],
-                                           res_manual['all_r_hat'],
-                                           all_smpls, score, bootstrap, n_rep_boot)
+        boot_t_stat = boot_pliv_partial_xz(
+            y,
+            d,
+            z,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_l_hat"],
+            res_manual["all_m_hat"],
+            res_manual["all_r_hat"],
+            all_smpls,
+            score,
+            bootstrap,
+            n_rep_boot,
+        )
 
         np.random.seed(3141)
         dml_pliv_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_pliv_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_pliv_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 def test_dml_pliv_coef(dml_pliv_partial_xz_fixture):
-    assert math.isclose(dml_pliv_partial_xz_fixture['coef'],
-                        dml_pliv_partial_xz_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_pliv_partial_xz_fixture["coef"], dml_pliv_partial_xz_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4
+    )
 
 
 def test_dml_pliv_se(dml_pliv_partial_xz_fixture):
-    assert math.isclose(dml_pliv_partial_xz_fixture['se'],
-                        dml_pliv_partial_xz_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_pliv_partial_xz_fixture["se"], dml_pliv_partial_xz_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4
+    )
 
 
 def test_dml_pliv_boot(dml_pliv_partial_xz_fixture):
-    for bootstrap in dml_pliv_partial_xz_fixture['boot_methods']:
-        assert np.allclose(dml_pliv_partial_xz_fixture['boot_t_stat' + bootstrap],
-                           dml_pliv_partial_xz_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_pliv_partial_xz_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_pliv_partial_xz_fixture["boot_t_stat" + bootstrap],
+            dml_pliv_partial_xz_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
diff --git a/doubleml/plm/tests/test_pliv_partial_xz_tune.py b/doubleml/plm/tests/test_pliv_partial_xz_tune.py
index 68c6d1bef..48f845613 100644
--- a/doubleml/plm/tests/test_pliv_partial_xz_tune.py
+++ b/doubleml/plm/tests/test_pliv_partial_xz_tune.py
@@ -1,66 +1,57 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-
-from sklearn.linear_model import ElasticNet
 from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import ElasticNet
 
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_pliv_partial_xz_manual import fit_pliv_partial_xz, boot_pliv_partial_xz, tune_nuisance_pliv_partial_xz
+from ._utils_pliv_partial_xz_manual import boot_pliv_partial_xz, fit_pliv_partial_xz, tune_nuisance_pliv_partial_xz
 
 
-@pytest.fixture(scope='module',
-                params=[ElasticNet()])
+@pytest.fixture(scope="module", params=[ElasticNet()])
 def learner_l(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[RandomForestRegressor()])
+@pytest.fixture(scope="module", params=[RandomForestRegressor()])
 def learner_m(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[RandomForestRegressor()])
+@pytest.fixture(scope="module", params=[RandomForestRegressor()])
 def learner_r(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['partialling out'])
+@pytest.fixture(scope="module", params=["partialling out"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def tune_on_folds(request):
     return request.param
 
 
 def get_par_grid(learner):
     if learner.__class__ == RandomForestRegressor:
-        par_grid = {'n_estimators': [5, 10, 20]}
+        par_grid = {"n_estimators": [5, 10, 20]}
     else:
         assert learner.__class__ == ElasticNet
-        par_grid = {'l1_ratio': [.1, .5, .7, .9, .95, .99, 1], 'alpha': np.linspace(0.05, 1., 7)}
+        par_grid = {"l1_ratio": [0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1], "alpha": np.linspace(0.05, 1.0, 7)}
     return par_grid
 
 
-@pytest.fixture(scope='module')
-def dml_pliv_partial_xz_fixture(generate_data_pliv_partialXZ, learner_l, learner_m, learner_r, score,
-                                tune_on_folds):
-    par_grid = {'ml_l': get_par_grid(learner_l),
-                'ml_m': get_par_grid(learner_m),
-                'ml_r': get_par_grid(learner_r)}
+@pytest.fixture(scope="module")
+def dml_pliv_partial_xz_fixture(generate_data_pliv_partialXZ, learner_l, learner_m, learner_r, score, tune_on_folds):
+    par_grid = {"ml_l": get_par_grid(learner_l), "ml_m": get_par_grid(learner_m), "ml_r": get_par_grid(learner_r)}
     n_folds_tune = 4
 
-    boot_methods = ['Bayes', 'normal', 'wild']
+    boot_methods = ["Bayes", "normal", "wild"]
     n_folds = 2
     n_rep_boot = 503
 
@@ -73,9 +64,7 @@ def dml_pliv_partial_xz_fixture(generate_data_pliv_partialXZ, learner_l, learner
     ml_r = clone(learner_r)
 
     np.random.seed(3141)
-    dml_pliv_obj = dml.DoubleMLPLIV._partialXZ(obj_dml_data,
-                                               ml_l, ml_m, ml_r,
-                                               n_folds=n_folds)
+    dml_pliv_obj = dml.DoubleMLPLIV._partialXZ(obj_dml_data, ml_l, ml_m, ml_r, n_folds=n_folds)
 
     # tune hyperparameters
     _ = dml_pliv_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune)
@@ -92,68 +81,105 @@ def dml_pliv_partial_xz_fixture(generate_data_pliv_partialXZ, learner_l, learner
     smpls = all_smpls[0]
 
     if tune_on_folds:
-        l_params, m_params, r_params = tune_nuisance_pliv_partial_xz(y, x, d, z,
-                                                                     clone(learner_l),
-                                                                     clone(learner_m),
-                                                                     clone(learner_r),
-                                                                     smpls, n_folds_tune,
-                                                                     par_grid['ml_l'],
-                                                                     par_grid['ml_m'],
-                                                                     par_grid['ml_r'])
+        l_params, m_params, r_params = tune_nuisance_pliv_partial_xz(
+            y,
+            x,
+            d,
+            z,
+            clone(learner_l),
+            clone(learner_m),
+            clone(learner_r),
+            smpls,
+            n_folds_tune,
+            par_grid["ml_l"],
+            par_grid["ml_m"],
+            par_grid["ml_r"],
+        )
     else:
         xx = [(np.arange(len(y)), np.arange(len(y)))]
-        l_params, m_params, r_params = tune_nuisance_pliv_partial_xz(y, x, d, z,
-                                                                     clone(learner_l),
-                                                                     clone(learner_m),
-                                                                     clone(learner_r),
-                                                                     xx, n_folds_tune,
-                                                                     par_grid['ml_l'],
-                                                                     par_grid['ml_m'],
-                                                                     par_grid['ml_r'])
+        l_params, m_params, r_params = tune_nuisance_pliv_partial_xz(
+            y,
+            x,
+            d,
+            z,
+            clone(learner_l),
+            clone(learner_m),
+            clone(learner_r),
+            xx,
+            n_folds_tune,
+            par_grid["ml_l"],
+            par_grid["ml_m"],
+            par_grid["ml_r"],
+        )
         l_params = l_params * n_folds
         m_params = m_params * n_folds
         r_params = r_params * n_folds
 
-    res_manual = fit_pliv_partial_xz(y, x, d, z,
-                                     clone(learner_l), clone(learner_m), clone(learner_r),
-                                     all_smpls, score,
-                                     l_params=l_params, m_params=m_params, r_params=r_params)
-
-    res_dict = {'coef': dml_pliv_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'se': dml_pliv_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_manual = fit_pliv_partial_xz(
+        y,
+        x,
+        d,
+        z,
+        clone(learner_l),
+        clone(learner_m),
+        clone(learner_r),
+        all_smpls,
+        score,
+        l_params=l_params,
+        m_params=m_params,
+        r_params=r_params,
+    )
+
+    res_dict = {
+        "coef": dml_pliv_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "se": dml_pliv_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_pliv_partial_xz(y, d, z, res_manual['thetas'], res_manual['ses'],
-                                           res_manual['all_l_hat'], res_manual['all_m_hat'],
-                                           res_manual['all_r_hat'],
-                                           all_smpls, score, bootstrap, n_rep_boot)
+        boot_t_stat = boot_pliv_partial_xz(
+            y,
+            d,
+            z,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_l_hat"],
+            res_manual["all_m_hat"],
+            res_manual["all_r_hat"],
+            all_smpls,
+            score,
+            bootstrap,
+            n_rep_boot,
+        )
 
         np.random.seed(3141)
         dml_pliv_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_pliv_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_pliv_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 def test_dml_pliv_coef(dml_pliv_partial_xz_fixture):
-    assert math.isclose(dml_pliv_partial_xz_fixture['coef'],
-                        dml_pliv_partial_xz_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_pliv_partial_xz_fixture["coef"], dml_pliv_partial_xz_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4
+    )
 
 
 def test_dml_pliv_se(dml_pliv_partial_xz_fixture):
-    assert math.isclose(dml_pliv_partial_xz_fixture['se'],
-                        dml_pliv_partial_xz_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_pliv_partial_xz_fixture["se"], dml_pliv_partial_xz_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4
+    )
 
 
 def test_dml_pliv_boot(dml_pliv_partial_xz_fixture):
-    for bootstrap in dml_pliv_partial_xz_fixture['boot_methods']:
-        assert np.allclose(dml_pliv_partial_xz_fixture['boot_t_stat' + bootstrap],
-                           dml_pliv_partial_xz_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_pliv_partial_xz_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_pliv_partial_xz_fixture["boot_t_stat" + bootstrap],
+            dml_pliv_partial_xz_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
diff --git a/doubleml/plm/tests/test_pliv_partial_z.py b/doubleml/plm/tests/test_pliv_partial_z.py
index b4b2af639..05157088b 100644
--- a/doubleml/plm/tests/test_pliv_partial_z.py
+++ b/doubleml/plm/tests/test_pliv_partial_z.py
@@ -1,97 +1,93 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-
 from sklearn.linear_model import Lasso
 
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_pliv_partial_z_manual import fit_pliv_partial_z, boot_pliv_partial_z
+from ._utils_pliv_partial_z_manual import boot_pliv_partial_z, fit_pliv_partial_z
 
 
-@pytest.fixture(scope='module',
-                params=[Lasso(alpha=0.1)])
+@pytest.fixture(scope="module", params=[Lasso(alpha=0.1)])
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['partialling out'])
+@pytest.fixture(scope="module", params=["partialling out"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_pliv_partial_z_fixture(generate_data_pliv_partialZ, learner, score):
-    boot_methods = ['Bayes', 'normal', 'wild']
+    boot_methods = ["Bayes", "normal", "wild"]
     n_folds = 2
     n_rep_boot = 503
 
     # collect data
     data = generate_data_pliv_partialZ
-    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
-    z_cols = data.columns[data.columns.str.startswith('Z')].tolist()
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
+    z_cols = data.columns[data.columns.str.startswith("Z")].tolist()
 
     # Set machine learning methods for r
     ml_r = clone(learner)
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, z_cols)
-    dml_pliv_obj = dml.DoubleMLPLIV._partialZ(obj_dml_data,
-                                              ml_r,
-                                              n_folds=n_folds)
+    obj_dml_data = dml.DoubleMLData(data, "y", ["d"], x_cols, z_cols)
+    dml_pliv_obj = dml.DoubleMLPLIV._partialZ(obj_dml_data, ml_r, n_folds=n_folds)
 
     dml_pliv_obj.fit()
 
     np.random.seed(3141)
-    y = data['y'].values
+    y = data["y"].values
     x = data.loc[:, x_cols].values
-    d = data['d'].values
+    d = data["d"].values
     z = data.loc[:, z_cols].values
     n_obs = len(y)
     all_smpls = draw_smpls(n_obs, n_folds)
 
-    res_manual = fit_pliv_partial_z(y, x, d, z,
-                                    clone(learner),
-                                    all_smpls, score)
+    res_manual = fit_pliv_partial_z(y, x, d, z, clone(learner), all_smpls, score)
 
-    res_dict = {'coef': dml_pliv_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'se': dml_pliv_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_dict = {
+        "coef": dml_pliv_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "se": dml_pliv_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_pliv_partial_z(y, d, z, res_manual['thetas'], res_manual['ses'],
-                                          res_manual['all_r_hat'],
-                                          all_smpls, score, bootstrap, n_rep_boot)
+        boot_t_stat = boot_pliv_partial_z(
+            y, d, z, res_manual["thetas"], res_manual["ses"], res_manual["all_r_hat"], all_smpls, score, bootstrap, n_rep_boot
+        )
 
         np.random.seed(3141)
         dml_pliv_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_pliv_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_pliv_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 def test_dml_pliv_coef(dml_pliv_partial_z_fixture):
-    assert math.isclose(dml_pliv_partial_z_fixture['coef'],
-                        dml_pliv_partial_z_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_pliv_partial_z_fixture["coef"], dml_pliv_partial_z_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4
+    )
 
 
 def test_dml_pliv_se(dml_pliv_partial_z_fixture):
-    assert math.isclose(dml_pliv_partial_z_fixture['se'],
-                        dml_pliv_partial_z_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_pliv_partial_z_fixture["se"], dml_pliv_partial_z_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 def test_dml_pliv_boot(dml_pliv_partial_z_fixture):
-    for bootstrap in dml_pliv_partial_z_fixture['boot_methods']:
-        assert np.allclose(dml_pliv_partial_z_fixture['boot_t_stat' + bootstrap],
-                           dml_pliv_partial_z_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_pliv_partial_z_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_pliv_partial_z_fixture["boot_t_stat" + bootstrap],
+            dml_pliv_partial_z_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
diff --git a/doubleml/plm/tests/test_pliv_partial_z_tune.py b/doubleml/plm/tests/test_pliv_partial_z_tune.py
index a52d1da6c..1763d9c29 100644
--- a/doubleml/plm/tests/test_pliv_partial_z_tune.py
+++ b/doubleml/plm/tests/test_pliv_partial_z_tune.py
@@ -1,63 +1,57 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-
 from sklearn.linear_model import ElasticNet
 
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_pliv_partial_z_manual import fit_pliv_partial_z, boot_pliv_partial_z, tune_nuisance_pliv_partial_z
+from ._utils_pliv_partial_z_manual import boot_pliv_partial_z, fit_pliv_partial_z, tune_nuisance_pliv_partial_z
 
 
-@pytest.fixture(scope='module',
-                params=[ElasticNet()])
+@pytest.fixture(scope="module", params=[ElasticNet()])
 def learner_r(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['partialling out'])
+@pytest.fixture(scope="module", params=["partialling out"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def tune_on_folds(request):
     return request.param
 
 
 def get_par_grid(learner):
     assert learner.__class__ == ElasticNet
-    par_grid = {'l1_ratio': [.1, .5, .7, .9, .95, .99, 1], 'alpha': np.linspace(0.05, 1., 7)}
+    par_grid = {"l1_ratio": [0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1], "alpha": np.linspace(0.05, 1.0, 7)}
     return par_grid
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_pliv_partial_z_fixture(generate_data_pliv_partialZ, learner_r, score, tune_on_folds):
-    par_grid = {'ml_r': get_par_grid(learner_r)}
+    par_grid = {"ml_r": get_par_grid(learner_r)}
     n_folds_tune = 4
 
-    boot_methods = ['Bayes', 'normal', 'wild']
+    boot_methods = ["Bayes", "normal", "wild"]
     n_folds = 2
     n_rep_boot = 503
 
     # collect data
     data = generate_data_pliv_partialZ
-    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
-    z_cols = data.columns[data.columns.str.startswith('Z')].tolist()
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
+    z_cols = data.columns[data.columns.str.startswith("Z")].tolist()
 
     # Set machine learning methods for r
     ml_r = clone(learner_r)
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, z_cols)
-    dml_pliv_obj = dml.DoubleMLPLIV._partialZ(obj_dml_data,
-                                              ml_r,
-                                              n_folds=n_folds)
+    obj_dml_data = dml.DoubleMLData(data, "y", ["d"], x_cols, z_cols)
+    dml_pliv_obj = dml.DoubleMLPLIV._partialZ(obj_dml_data, ml_r, n_folds=n_folds)
 
     # tune hyperparameters
     _ = dml_pliv_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune)
@@ -65,66 +59,60 @@ def dml_pliv_partial_z_fixture(generate_data_pliv_partialZ, learner_r, score, tu
     dml_pliv_obj.fit()
 
     np.random.seed(3141)
-    y = data['y'].values
+    y = data["y"].values
     x = data.loc[:, x_cols].values
-    d = data['d'].values
+    d = data["d"].values
     z = data.loc[:, z_cols].values
     n_obs = len(y)
     all_smpls = draw_smpls(n_obs, n_folds)
     smpls = all_smpls[0]
 
     if tune_on_folds:
-        r_params = tune_nuisance_pliv_partial_z(y, x, d, z,
-                                                clone(learner_r),
-                                                smpls, n_folds_tune,
-                                                par_grid['ml_r'])
+        r_params = tune_nuisance_pliv_partial_z(y, x, d, z, clone(learner_r), smpls, n_folds_tune, par_grid["ml_r"])
     else:
         xx = [(np.arange(len(y)), np.array([]))]
-        r_params = tune_nuisance_pliv_partial_z(y, x, d, z,
-                                                clone(learner_r),
-                                                xx, n_folds_tune,
-                                                par_grid['ml_r'])
+        r_params = tune_nuisance_pliv_partial_z(y, x, d, z, clone(learner_r), xx, n_folds_tune, par_grid["ml_r"])
         r_params = r_params * n_folds
 
-    res_manual = fit_pliv_partial_z(y, x, d, z,
-                                    clone(learner_r),
-                                    all_smpls, score,
-                                    r_params=r_params)
+    res_manual = fit_pliv_partial_z(y, x, d, z, clone(learner_r), all_smpls, score, r_params=r_params)
 
-    res_dict = {'coef': dml_pliv_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'se': dml_pliv_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_dict = {
+        "coef": dml_pliv_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "se": dml_pliv_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_pliv_partial_z(y, d, z, res_manual['thetas'], res_manual['ses'],
-                                          res_manual['all_r_hat'],
-                                          all_smpls, score, bootstrap, n_rep_boot)
+        boot_t_stat = boot_pliv_partial_z(
+            y, d, z, res_manual["thetas"], res_manual["ses"], res_manual["all_r_hat"], all_smpls, score, bootstrap, n_rep_boot
+        )
 
         np.random.seed(3141)
         dml_pliv_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_pliv_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_pliv_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 def test_dml_pliv_coef(dml_pliv_partial_z_fixture):
-    assert math.isclose(dml_pliv_partial_z_fixture['coef'],
-                        dml_pliv_partial_z_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_pliv_partial_z_fixture["coef"], dml_pliv_partial_z_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4
+    )
 
 
 def test_dml_pliv_se(dml_pliv_partial_z_fixture):
-    assert math.isclose(dml_pliv_partial_z_fixture['se'],
-                        dml_pliv_partial_z_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_pliv_partial_z_fixture["se"], dml_pliv_partial_z_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 def test_dml_pliv_boot(dml_pliv_partial_z_fixture):
-    for bootstrap in dml_pliv_partial_z_fixture['boot_methods']:
-        assert np.allclose(dml_pliv_partial_z_fixture['boot_t_stat' + bootstrap],
-                           dml_pliv_partial_z_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_pliv_partial_z_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_pliv_partial_z_fixture["boot_t_stat" + bootstrap],
+            dml_pliv_partial_z_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
diff --git a/doubleml/plm/tests/test_pliv_tune.py b/doubleml/plm/tests/test_pliv_tune.py
index bcd69d322..9da095836 100644
--- a/doubleml/plm/tests/test_pliv_tune.py
+++ b/doubleml/plm/tests/test_pliv_tune.py
@@ -1,173 +1,211 @@
-import numpy as np
-import pytest
 import math
 
-from sklearn.linear_model import Lasso, ElasticNet
+import numpy as np
+import pytest
+from sklearn.linear_model import ElasticNet, Lasso
 
 import doubleml as dml
 
-from ...tests._utils import draw_smpls, _clone
-from ._utils_pliv_manual import fit_pliv, boot_pliv, tune_nuisance_pliv
+from ...tests._utils import _clone, draw_smpls
+from ._utils_pliv_manual import boot_pliv, fit_pliv, tune_nuisance_pliv
 
 
-@pytest.fixture(scope='module',
-                params=[Lasso(),
-                        ElasticNet()])
+@pytest.fixture(scope="module", params=[Lasso(), ElasticNet()])
 def learner_l(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[ElasticNet()])
+@pytest.fixture(scope="module", params=[ElasticNet()])
 def learner_m(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[ElasticNet()])
+@pytest.fixture(scope="module", params=[ElasticNet()])
 def learner_r(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[ElasticNet()])
+@pytest.fixture(scope="module", params=[ElasticNet()])
 def learner_g(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['partialling out', 'IV-type'])
+@pytest.fixture(scope="module", params=["partialling out", "IV-type"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def tune_on_folds(request):
     return request.param
 
 
 def get_par_grid(learner):
     if learner.__class__ == Lasso:
-        par_grid = {'alpha': np.linspace(0.05, .95, 7)}
+        par_grid = {"alpha": np.linspace(0.05, 0.95, 7)}
     else:
         assert learner.__class__ == ElasticNet
-        par_grid = {'l1_ratio': [.1, .5, .7, .9, .95, .99, 1], 'alpha': np.linspace(0.05, 1., 7)}
+        par_grid = {"l1_ratio": [0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1], "alpha": np.linspace(0.05, 1.0, 7)}
     return par_grid
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_pliv_fixture(generate_data_iv, learner_l, learner_m, learner_r, learner_g, score, tune_on_folds):
-    par_grid = {'ml_l': get_par_grid(learner_l),
-                'ml_m': get_par_grid(learner_m),
-                'ml_r': get_par_grid(learner_r),
-                'ml_g': get_par_grid(learner_g)}
+    par_grid = {
+        "ml_l": get_par_grid(learner_l),
+        "ml_m": get_par_grid(learner_m),
+        "ml_r": get_par_grid(learner_r),
+        "ml_g": get_par_grid(learner_g),
+    }
     n_folds_tune = 4
 
-    boot_methods = ['Bayes', 'normal', 'wild']
+    boot_methods = ["Bayes", "normal", "wild"]
     n_folds = 2
     n_rep_boot = 503
 
     # collect data
     data = generate_data_iv
-    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
 
     # Set machine learning methods for l, m, r & g
     ml_l = _clone(learner_l)
     ml_m = _clone(learner_m)
     ml_r = _clone(learner_r)
-    if score == 'IV-type':
+    if score == "IV-type":
         ml_g = _clone(learner_g)
     else:
         ml_g = None
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, 'Z1')
-    dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data,
-                                    ml_l, ml_m, ml_r, ml_g,
-                                    n_folds=n_folds,
-                                    score=score)
+    obj_dml_data = dml.DoubleMLData(data, "y", ["d"], x_cols, "Z1")
+    dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, ml_l, ml_m, ml_r, ml_g, n_folds=n_folds, score=score)
 
     # tune hyperparameters
-    tune_res = dml_pliv_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune,
-                                 return_tune_res=False)
+    tune_res = dml_pliv_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune, return_tune_res=False)
     assert isinstance(tune_res, dml.DoubleMLPLIV)
 
     dml_pliv_obj.fit()
 
     np.random.seed(3141)
-    y = data['y'].values
+    y = data["y"].values
     x = data.loc[:, x_cols].values
-    d = data['d'].values
-    z = data['Z1'].values
+    d = data["d"].values
+    z = data["Z1"].values
     n_obs = len(y)
     all_smpls = draw_smpls(n_obs, n_folds)
     smpls = all_smpls[0]
 
-    tune_g = (score == 'IV-type') | callable(score)
+    tune_g = (score == "IV-type") | callable(score)
     if tune_on_folds:
         l_params, m_params, r_params, g_params = tune_nuisance_pliv(
-            y, x, d, z,
-            _clone(learner_l), _clone(learner_m), _clone(learner_r), _clone(learner_g),
-            smpls, n_folds_tune,
-            par_grid['ml_l'], par_grid['ml_m'], par_grid['ml_r'], par_grid['ml_g'],
-            tune_g)
+            y,
+            x,
+            d,
+            z,
+            _clone(learner_l),
+            _clone(learner_m),
+            _clone(learner_r),
+            _clone(learner_g),
+            smpls,
+            n_folds_tune,
+            par_grid["ml_l"],
+            par_grid["ml_m"],
+            par_grid["ml_r"],
+            par_grid["ml_g"],
+            tune_g,
+        )
     else:
         xx = [(np.arange(len(y)), np.array([]))]
         l_params, m_params, r_params, g_params = tune_nuisance_pliv(
-            y, x, d, z,
-            _clone(learner_l), _clone(learner_m), _clone(learner_r), _clone(learner_g),
-            xx, n_folds_tune,
-            par_grid['ml_l'], par_grid['ml_m'], par_grid['ml_r'], par_grid['ml_g'],
-            tune_g)
+            y,
+            x,
+            d,
+            z,
+            _clone(learner_l),
+            _clone(learner_m),
+            _clone(learner_r),
+            _clone(learner_g),
+            xx,
+            n_folds_tune,
+            par_grid["ml_l"],
+            par_grid["ml_m"],
+            par_grid["ml_r"],
+            par_grid["ml_g"],
+            tune_g,
+        )
 
         l_params = l_params * n_folds
         m_params = m_params * n_folds
         r_params = r_params * n_folds
         g_params = g_params * n_folds
 
-    res_manual = fit_pliv(y, x, d, z, _clone(learner_l), _clone(learner_m), _clone(learner_r), _clone(learner_g),
-                          all_smpls, score,
-                          l_params=l_params, m_params=m_params, r_params=r_params, g_params=g_params)
-
-    res_dict = {'coef': dml_pliv_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'se': dml_pliv_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_manual = fit_pliv(
+        y,
+        x,
+        d,
+        z,
+        _clone(learner_l),
+        _clone(learner_m),
+        _clone(learner_r),
+        _clone(learner_g),
+        all_smpls,
+        score,
+        l_params=l_params,
+        m_params=m_params,
+        r_params=r_params,
+        g_params=g_params,
+    )
+
+    res_dict = {
+        "coef": dml_pliv_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "se": dml_pliv_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_pliv(y, d, z, res_manual['thetas'], res_manual['ses'],
-                                res_manual['all_l_hat'], res_manual['all_m_hat'],
-                                res_manual['all_r_hat'], res_manual['all_g_hat'],
-                                all_smpls, score, bootstrap, n_rep_boot)
+        boot_t_stat = boot_pliv(
+            y,
+            d,
+            z,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_l_hat"],
+            res_manual["all_m_hat"],
+            res_manual["all_r_hat"],
+            res_manual["all_g_hat"],
+            all_smpls,
+            score,
+            bootstrap,
+            n_rep_boot,
+        )
 
         np.random.seed(3141)
         dml_pliv_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_pliv_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_pliv_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_pliv_coef(dml_pliv_fixture):
-    assert math.isclose(dml_pliv_fixture['coef'],
-                        dml_pliv_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_pliv_fixture["coef"], dml_pliv_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_pliv_se(dml_pliv_fixture):
-    assert math.isclose(dml_pliv_fixture['se'],
-                        dml_pliv_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_pliv_fixture["se"], dml_pliv_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_pliv_boot(dml_pliv_fixture):
-    for bootstrap in dml_pliv_fixture['boot_methods']:
-        assert np.allclose(dml_pliv_fixture['boot_t_stat' + bootstrap],
-                           dml_pliv_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_pliv_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_pliv_fixture["boot_t_stat" + bootstrap],
+            dml_pliv_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
diff --git a/doubleml/plm/tests/test_plr.py b/doubleml/plm/tests/test_plr.py
index d46b914c4..79f21f849 100644
--- a/doubleml/plm/tests/test_plr.py
+++ b/doubleml/plm/tests/test_plr.py
@@ -1,43 +1,40 @@
-import pytest
 import math
-import scipy
+
 import numpy as np
 import pandas as pd
-
+import pytest
+import scipy
 from sklearn.base import clone
-
-from sklearn.linear_model import LinearRegression, Lasso
 from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import Lasso, LinearRegression
 
 import doubleml as dml
 
 from ...tests._utils import draw_smpls
-from ._utils_plr_manual import fit_plr, plr_dml2, boot_plr, fit_sensitivity_elements_plr
+from ._utils_plr_manual import boot_plr, fit_plr, fit_sensitivity_elements_plr, plr_dml2
 
 
-@pytest.fixture(scope='module',
-                params=[RandomForestRegressor(max_depth=2, n_estimators=10),
-                        LinearRegression(),
-                        Lasso(alpha=0.1)])
+@pytest.fixture(
+    scope="module", params=[RandomForestRegressor(max_depth=2, n_estimators=10), LinearRegression(), Lasso(alpha=0.1)]
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['IV-type', 'partialling out'])
+@pytest.fixture(scope="module", params=["IV-type", "partialling out"])
 def score(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
 def dml_plr_fixture(generate_data1, learner, score):
-    boot_methods = ['normal']
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 502
 
     # collect data
     data = generate_data1
-    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
 
     # Set machine learning methods for m & g
     ml_l = clone(learner)
@@ -45,175 +42,170 @@ def dml_plr_fixture(generate_data1, learner, score):
     ml_g = clone(learner)
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols)
-    if score == 'partialling out':
-        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
-                                      ml_l, ml_m,
-                                      n_folds=n_folds,
-                                      score=score)
+    obj_dml_data = dml.DoubleMLData(data, "y", ["d"], x_cols)
+    if score == "partialling out":
+        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m, n_folds=n_folds, score=score)
     else:
-        assert score == 'IV-type'
-        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
-                                      ml_l, ml_m, ml_g,
-                                      n_folds,
-                                      score=score)
+        assert score == "IV-type"
+        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m, ml_g, n_folds, score=score)
 
     dml_plr_obj.fit()
 
     np.random.seed(3141)
-    y = data['y'].values
+    y = data["y"].values
     x = data.loc[:, x_cols].values
-    d = data['d'].values
+    d = data["d"].values
     n_obs = len(y)
     all_smpls = draw_smpls(n_obs, n_folds)
 
-    res_manual = fit_plr(y, x, d, clone(learner), clone(learner), clone(learner),
-                         all_smpls, score)
+    res_manual = fit_plr(y, x, d, clone(learner), clone(learner), clone(learner), all_smpls, score)
 
     np.random.seed(3141)
     # test with external nuisance predictions
-    if score == 'partialling out':
-        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data,
-                                          ml_l, ml_m,
-                                          n_folds,
-                                          score=score)
+    if score == "partialling out":
+        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m, n_folds, score=score)
     else:
-        assert score == 'IV-type'
-        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data,
-                                          ml_l, ml_m, ml_g,
-                                          n_folds,
-                                          score=score)
+        assert score == "IV-type"
+        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m, ml_g, n_folds, score=score)
 
     # synchronize the sample splitting
     dml_plr_obj_ext.set_sample_splitting(all_smpls=all_smpls)
 
-    if score == 'partialling out':
-        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, 1),
-                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, 1)}}
+    if score == "partialling out":
+        prediction_dict = {
+            "d": {
+                "ml_l": dml_plr_obj.predictions["ml_l"].reshape(-1, 1),
+                "ml_m": dml_plr_obj.predictions["ml_m"].reshape(-1, 1),
+            }
+        }
     else:
-        assert score == 'IV-type'
-        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, 1),
-                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, 1),
-                                 'ml_g': dml_plr_obj.predictions['ml_g'].reshape(-1, 1)}}
+        assert score == "IV-type"
+        prediction_dict = {
+            "d": {
+                "ml_l": dml_plr_obj.predictions["ml_l"].reshape(-1, 1),
+                "ml_m": dml_plr_obj.predictions["ml_m"].reshape(-1, 1),
+                "ml_g": dml_plr_obj.predictions["ml_g"].reshape(-1, 1),
+            }
+        }
 
     dml_plr_obj_ext.fit(external_predictions=prediction_dict)
 
-    res_dict = {'coef': dml_plr_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'coef_ext': dml_plr_obj_ext.coef.item(),
-                'se': dml_plr_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'se_ext': dml_plr_obj_ext.se.item(),
-                'boot_methods': boot_methods}
+    res_dict = {
+        "coef": dml_plr_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "coef_ext": dml_plr_obj_ext.coef.item(),
+        "se": dml_plr_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "se_ext": dml_plr_obj_ext.se.item(),
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_plr(y, d, res_manual['thetas'], res_manual['ses'],
-                               res_manual['all_l_hat'], res_manual['all_m_hat'], res_manual['all_g_hat'],
-                               all_smpls, score, bootstrap, n_rep_boot)
+        boot_t_stat = boot_plr(
+            y,
+            d,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_l_hat"],
+            res_manual["all_m_hat"],
+            res_manual["all_g_hat"],
+            all_smpls,
+            score,
+            bootstrap,
+            n_rep_boot,
+        )
 
         np.random.seed(3141)
         dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
         np.random.seed(3141)
         dml_plr_obj_ext.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
-        res_dict['boot_t_stat' + bootstrap + '_ext'] = dml_plr_obj_ext.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap] = dml_plr_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap + "_ext"] = dml_plr_obj_ext.boot_t_stat
 
     # sensitivity tests
-    res_dict['sensitivity_elements'] = dml_plr_obj.sensitivity_elements
-    res_dict['sensitivity_elements_manual'] = fit_sensitivity_elements_plr(y, d.reshape(-1, 1),
-                                                                           all_coef=dml_plr_obj.all_coef,
-                                                                           predictions=dml_plr_obj.predictions,
-                                                                           score=score,
-                                                                           n_rep=1)
+    res_dict["sensitivity_elements"] = dml_plr_obj.sensitivity_elements
+    res_dict["sensitivity_elements_manual"] = fit_sensitivity_elements_plr(
+        y, d.reshape(-1, 1), all_coef=dml_plr_obj.all_coef, predictions=dml_plr_obj.predictions, score=score, n_rep=1
+    )
     # check if sensitivity score with rho=0 gives equal asymptotic standard deviation
     dml_plr_obj.sensitivity_analysis(rho=0.0)
-    res_dict['sensitivity_ses'] = dml_plr_obj.sensitivity_params['se']
+    res_dict["sensitivity_ses"] = dml_plr_obj.sensitivity_params["se"]
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_plr_coef(dml_plr_fixture):
-    assert math.isclose(dml_plr_fixture['coef'],
-                        dml_plr_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
-    assert math.isclose(dml_plr_fixture['coef'],
-                        dml_plr_fixture['coef_ext'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_fixture["coef"], dml_plr_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_fixture["coef"], dml_plr_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_plr_se(dml_plr_fixture):
-    assert math.isclose(dml_plr_fixture['se'],
-                        dml_plr_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
-    assert math.isclose(dml_plr_fixture['se'],
-                        dml_plr_fixture['se_ext'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_fixture["se"], dml_plr_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_fixture["se"], dml_plr_fixture["se_ext"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_plr_boot(dml_plr_fixture):
-    for bootstrap in dml_plr_fixture['boot_methods']:
-        assert np.allclose(dml_plr_fixture['boot_t_stat' + bootstrap],
-                           dml_plr_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
-        assert np.allclose(dml_plr_fixture['boot_t_stat' + bootstrap],
-                           dml_plr_fixture['boot_t_stat' + bootstrap + '_ext'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_plr_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_plr_fixture["boot_t_stat" + bootstrap],
+            dml_plr_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
+        assert np.allclose(
+            dml_plr_fixture["boot_t_stat" + bootstrap],
+            dml_plr_fixture["boot_t_stat" + bootstrap + "_ext"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
 
 
 @pytest.mark.ci
 def test_dml_plr_sensitivity(dml_plr_fixture):
-    sensitivity_element_names = ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2']
+    sensitivity_element_names = ["sigma2", "nu2", "psi_sigma2", "psi_nu2"]
     for sensitivity_element in sensitivity_element_names:
-        assert np.allclose(dml_plr_fixture['sensitivity_elements'][sensitivity_element],
-                           dml_plr_fixture['sensitivity_elements_manual'][sensitivity_element])
+        assert np.allclose(
+            dml_plr_fixture["sensitivity_elements"][sensitivity_element],
+            dml_plr_fixture["sensitivity_elements_manual"][sensitivity_element],
+        )
 
 
 @pytest.mark.ci
 def test_dml_plr_sensitivity_rho0(dml_plr_fixture):
-    assert np.allclose(dml_plr_fixture['se'],
-                       dml_plr_fixture['sensitivity_ses']['lower'],
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_plr_fixture['se'],
-                       dml_plr_fixture['sensitivity_ses']['upper'],
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_plr_fixture["se"], dml_plr_fixture["sensitivity_ses"]["lower"], rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_plr_fixture["se"], dml_plr_fixture["sensitivity_ses"]["upper"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.fixture(scope="module")
 def dml_plr_ols_manual_fixture(generate_data1, score):
     learner = LinearRegression()
-    boot_methods = ['Bayes', 'normal', 'wild']
+    boot_methods = ["Bayes", "normal", "wild"]
     n_folds = 2
     n_rep_boot = 501
 
     # collect data
     data = generate_data1
-    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
 
     # Set machine learning methods for m & g
     ml_l = clone(learner)
     ml_g = clone(learner)
     ml_m = clone(learner)
 
-    obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols)
-    if score == 'partialling out':
-        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
-                                      ml_l, ml_m,
-                                      n_folds=n_folds,
-                                      score=score)
+    obj_dml_data = dml.DoubleMLData(data, "y", ["d"], x_cols)
+    if score == "partialling out":
+        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m, n_folds=n_folds, score=score)
     else:
-        assert score == 'IV-type'
-        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
-                                      ml_l, ml_m, ml_g,
-                                      n_folds,
-                                      score=score)
+        assert score == "IV-type"
+        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m, ml_g, n_folds, score=score)
 
     n = data.shape[0]
     this_smpl = list()
-    xx = int(n/2)
+    xx = int(n / 2)
     this_smpl.append((np.arange(xx, n), np.arange(0, xx)))
     this_smpl.append((np.arange(0, xx), np.arange(xx, n)))
     smpls = [this_smpl]
@@ -221,9 +213,9 @@ def dml_plr_ols_manual_fixture(generate_data1, score):
 
     dml_plr_obj.fit()
 
-    y = data['y'].values
+    y = data["y"].values
     x = data.loc[:, x_cols].values
-    d = data['d'].values
+    d = data["d"].values
 
     # add column of ones for intercept
     o = np.ones((n, 1))
@@ -233,7 +225,7 @@ def dml_plr_ols_manual_fixture(generate_data1, score):
 
     l_hat = []
     l_hat_vec = np.full_like(y, np.nan)
-    for (train_index, test_index) in smpls:
+    for train_index, test_index in smpls:
         ols_est = scipy.linalg.lstsq(x[train_index], y[train_index])[0]
         preds = np.dot(x[test_index], ols_est)
         l_hat.append(preds)
@@ -241,68 +233,67 @@ def dml_plr_ols_manual_fixture(generate_data1, score):
 
     m_hat = []
     m_hat_vec = np.full_like(d, np.nan)
-    for (train_index, test_index) in smpls:
+    for train_index, test_index in smpls:
         ols_est = scipy.linalg.lstsq(x[train_index], d[train_index])[0]
         preds = np.dot(x[test_index], ols_est)
         m_hat.append(preds)
         m_hat_vec[test_index] = preds
 
     g_hat = []
-    if score == 'IV-type':
+    if score == "IV-type":
         theta_initial = scipy.linalg.lstsq((d - m_hat_vec).reshape(-1, 1), y - l_hat_vec)[0]
-        for (train_index, test_index) in smpls:
-            ols_est = scipy.linalg.lstsq(x[train_index],
-                                         y[train_index] - d[train_index] * theta_initial)[0]
+        for train_index, test_index in smpls:
+            ols_est = scipy.linalg.lstsq(x[train_index], y[train_index] - d[train_index] * theta_initial)[0]
             g_hat.append(np.dot(x[test_index], ols_est))
 
-    res_manual, se_manual = plr_dml2(y, x, d,
-                                     l_hat, m_hat, g_hat,
-                                     smpls, score)
+    res_manual, se_manual = plr_dml2(y, x, d, l_hat, m_hat, g_hat, smpls, score)
 
-    res_dict = {'coef': dml_plr_obj.coef.item(),
-                'coef_manual': res_manual.item(),
-                'se': dml_plr_obj.se.item(),
-                'se_manual': se_manual.item(),
-                'boot_methods': boot_methods}
+    res_dict = {
+        "coef": dml_plr_obj.coef.item(),
+        "coef_manual": res_manual.item(),
+        "se": dml_plr_obj.se.item(),
+        "se_manual": se_manual.item(),
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_plr(y, d, [res_manual], [se_manual],
-                               [l_hat], [m_hat], [g_hat],
-                               [smpls], score, bootstrap, n_rep_boot)
+        boot_t_stat = boot_plr(
+            y, d, [res_manual], [se_manual], [l_hat], [m_hat], [g_hat], [smpls], score, bootstrap, n_rep_boot
+        )
 
         np.random.seed(3141)
         dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_plr_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_plr_ols_manual_coef(dml_plr_ols_manual_fixture):
-    assert math.isclose(dml_plr_ols_manual_fixture['coef'],
-                        dml_plr_ols_manual_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_plr_ols_manual_fixture["coef"], dml_plr_ols_manual_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4
+    )
 
 
 @pytest.mark.ci
 def test_dml_plr_ols_manual_se(dml_plr_ols_manual_fixture):
-    assert math.isclose(dml_plr_ols_manual_fixture['se'],
-                        dml_plr_ols_manual_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_ols_manual_fixture["se"], dml_plr_ols_manual_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_plr_ols_manual_boot(dml_plr_ols_manual_fixture):
-    for bootstrap in dml_plr_ols_manual_fixture['boot_methods']:
-        assert np.allclose(dml_plr_ols_manual_fixture['boot_t_stat' + bootstrap],
-                           dml_plr_ols_manual_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_plr_ols_manual_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_plr_ols_manual_fixture["boot_t_stat" + bootstrap],
+            dml_plr_ols_manual_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
 
 
-@pytest.fixture(scope='module',
-                params=["nonrobust", "HC0", "HC1", "HC2", "HC3"])
+@pytest.fixture(scope="module", params=["nonrobust", "HC0", "HC1", "HC2", "HC3"])
 def cov_type(request):
     return request.param
 
@@ -318,10 +309,7 @@ def test_dml_plr_cate_gate(score, cov_type):
     ml_g = LinearRegression()
     ml_m = LinearRegression()
 
-    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
-                                  ml_g, ml_m, ml_l,
-                                  n_folds=2,
-                                  score=score)
+    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m, ml_l, n_folds=2, score=score)
     dml_plr_obj.fit()
     random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 5)))
     cate = dml_plr_obj.cate(random_basis, cov_type=cov_type)
@@ -330,10 +318,9 @@ def test_dml_plr_cate_gate(score, cov_type):
     assert cate.blp_model.cov_type == cov_type
 
     groups_1 = pd.DataFrame(
-        np.column_stack([obj_dml_data.data['X1'] <= 0,
-                         obj_dml_data.data['X1'] > 0.2]),
-        columns=['Group 1', 'Group 2'])
-    msg = ('At least one group effect is estimated with less than 6 observations.')
+        np.column_stack([obj_dml_data.data["X1"] <= 0, obj_dml_data.data["X1"] > 0.2]), columns=["Group 1", "Group 2"]
+    )
+    msg = "At least one group effect is estimated with less than 6 observations."
     with pytest.warns(UserWarning, match=msg):
         gate_1 = dml_plr_obj.gate(groups_1, cov_type=cov_type)
     assert isinstance(gate_1, dml.utils.blp.DoubleMLBLP)
@@ -343,7 +330,7 @@ def test_dml_plr_cate_gate(score, cov_type):
 
     np.random.seed(42)
     groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n))
-    msg = ('At least one group effect is estimated with less than 6 observations.')
+    msg = "At least one group effect is estimated with less than 6 observations."
     with pytest.warns(UserWarning, match=msg):
         gate_2 = dml_plr_obj.gate(groups_2, cov_type=cov_type)
     assert isinstance(gate_2, dml.utils.blp.DoubleMLBLP)
diff --git a/doubleml/plm/tests/test_plr_classifier.py b/doubleml/plm/tests/test_plr_classifier.py
index 1fad89a93..6b331346d 100644
--- a/doubleml/plm/tests/test_plr_classifier.py
+++ b/doubleml/plm/tests/test_plr_classifier.py
@@ -1,52 +1,45 @@
-import numpy as np
-import pytest
 import math
 
-from sklearn.linear_model import Lasso, LogisticRegression
+import numpy as np
+import pytest
 from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import Lasso, LogisticRegression
 
 import doubleml as dml
 from doubleml.datasets import fetch_bonus
 
-from ...tests._utils import draw_smpls, _clone
-from ._utils_plr_manual import fit_plr, boot_plr
+from ...tests._utils import _clone, draw_smpls
+from ._utils_plr_manual import boot_plr, fit_plr
 
 bonus_data = fetch_bonus()
 
 
-@pytest.fixture(scope='module',
-                params=[Lasso(),
-                        RandomForestClassifier(max_depth=2, n_estimators=10),
-                        LogisticRegression()])
+@pytest.fixture(scope="module", params=[Lasso(), RandomForestClassifier(max_depth=2, n_estimators=10), LogisticRegression()])
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['IV-type', 'partialling out'])
+@pytest.fixture(scope="module", params=["IV-type", "partialling out"])
 def score(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
 def dml_plr_binary_classifier_fixture(learner, score):
-    boot_methods = ['normal']
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 502
 
     # Set machine learning methods for l, m & g
     ml_l = Lasso(alpha=0.3)
     ml_m = _clone(learner)
-    if score == 'IV-type':
+    if score == "IV-type":
         ml_g = Lasso()
     else:
         ml_g = None
 
     np.random.seed(3141)
-    dml_plr_obj = dml.DoubleMLPLR(bonus_data,
-                                  ml_l, ml_m, ml_g,
-                                  n_folds,
-                                  score=score)
+    dml_plr_obj = dml.DoubleMLPLR(bonus_data, ml_l, ml_m, ml_g, n_folds, score=score)
 
     dml_plr_obj.fit()
 
@@ -57,46 +50,60 @@ def dml_plr_binary_classifier_fixture(learner, score):
     n_obs = len(y)
     all_smpls = draw_smpls(n_obs, n_folds)
 
-    res_manual = fit_plr(y, x, d, _clone(ml_l), _clone(ml_m), _clone(ml_g),
-                         all_smpls, score)
+    res_manual = fit_plr(y, x, d, _clone(ml_l), _clone(ml_m), _clone(ml_g), all_smpls, score)
 
-    res_dict = {'coef': dml_plr_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'se': dml_plr_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_dict = {
+        "coef": dml_plr_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "se": dml_plr_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_plr(y, d, res_manual['thetas'], res_manual['ses'],
-                               res_manual['all_l_hat'], res_manual['all_m_hat'], res_manual['all_g_hat'],
-                               all_smpls, score, bootstrap, n_rep_boot)
+        boot_t_stat = boot_plr(
+            y,
+            d,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_l_hat"],
+            res_manual["all_m_hat"],
+            res_manual["all_g_hat"],
+            all_smpls,
+            score,
+            bootstrap,
+            n_rep_boot,
+        )
 
         np.random.seed(3141)
         dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_plr_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_plr_binary_classifier_coef(dml_plr_binary_classifier_fixture):
-    assert math.isclose(dml_plr_binary_classifier_fixture['coef'],
-                        dml_plr_binary_classifier_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_plr_binary_classifier_fixture["coef"], dml_plr_binary_classifier_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4
+    )
 
 
 @pytest.mark.ci
 def test_dml_plr_binary_classifier_se(dml_plr_binary_classifier_fixture):
-    assert math.isclose(dml_plr_binary_classifier_fixture['se'],
-                        dml_plr_binary_classifier_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_plr_binary_classifier_fixture["se"], dml_plr_binary_classifier_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4
+    )
 
 
 @pytest.mark.ci
 def test_dml_plr_binary_classifier_boot(dml_plr_binary_classifier_fixture):
-    for bootstrap in dml_plr_binary_classifier_fixture['boot_methods']:
-        assert np.allclose(dml_plr_binary_classifier_fixture['boot_t_stat' + bootstrap],
-                           dml_plr_binary_classifier_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_plr_binary_classifier_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_plr_binary_classifier_fixture["boot_t_stat" + bootstrap],
+            dml_plr_binary_classifier_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
diff --git a/doubleml/plm/tests/test_plr_external_predictions.py b/doubleml/plm/tests/test_plr_external_predictions.py
index a06d34ab6..47644555d 100644
--- a/doubleml/plm/tests/test_plr_external_predictions.py
+++ b/doubleml/plm/tests/test_plr_external_predictions.py
@@ -1,8 +1,10 @@
+import math
+
 import numpy as np
 import pytest
-import math
 from sklearn.linear_model import LinearRegression
-from doubleml import DoubleMLPLR, DoubleMLData
+
+from doubleml import DoubleMLData, DoubleMLPLR
 from doubleml.datasets import make_plr_CCDDHNR2018
 from doubleml.utils import DMLDummyRegressor
 
diff --git a/doubleml/plm/tests/test_plr_multi_treat.py b/doubleml/plm/tests/test_plr_multi_treat.py
index 82efbb739..0b4bea01c 100644
--- a/doubleml/plm/tests/test_plr_multi_treat.py
+++ b/doubleml/plm/tests/test_plr_multi_treat.py
@@ -1,44 +1,37 @@
 import numpy as np
 import pytest
-
-from sklearn.linear_model import Lasso
 from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import Lasso
 
 import doubleml as dml
 
-from ...tests._utils import draw_smpls, _clone
-from ._utils_plr_manual import fit_plr_multitreat, boot_plr_multitreat, fit_sensitivity_elements_plr
+from ...tests._utils import _clone, draw_smpls
+from ._utils_plr_manual import boot_plr_multitreat, fit_plr_multitreat, fit_sensitivity_elements_plr
 
 
-@pytest.fixture(scope='module',
-                params=range(2))
+@pytest.fixture(scope="module", params=range(2))
 def idx(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[Lasso(alpha=0.1),
-                        RandomForestRegressor(max_depth=2, n_estimators=10)])
+@pytest.fixture(scope="module", params=[Lasso(alpha=0.1), RandomForestRegressor(max_depth=2, n_estimators=10)])
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['IV-type', 'partialling out'])
+@pytest.fixture(scope="module", params=["IV-type", "partialling out"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 3])
+@pytest.fixture(scope="module", params=[1, 3])
 def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
-def dml_plr_multitreat_fixture(generate_data_bivariate, generate_data_toeplitz, idx, learner,
-                               score, n_rep):
-    boot_methods = ['normal']
+@pytest.fixture(scope="module")
+def dml_plr_multitreat_fixture(generate_data_bivariate, generate_data_toeplitz, idx, learner, score, n_rep):
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 483
 
@@ -48,106 +41,110 @@ def dml_plr_multitreat_fixture(generate_data_bivariate, generate_data_toeplitz,
     else:
         assert idx == 1
         data = generate_data_toeplitz
-    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
-    d_cols = data.columns[data.columns.str.startswith('d')].tolist()
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
+    d_cols = data.columns[data.columns.str.startswith("d")].tolist()
     n_coefs = len(d_cols)
 
     # Set machine learning methods for l, m & g
     ml_l = _clone(learner)
     ml_m = _clone(learner)
-    if score == 'IV-type':
+    if score == "IV-type":
         ml_g = _clone(learner)
     else:
         ml_g = None
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData(data, 'y', d_cols, x_cols)
-    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
-                                  ml_l, ml_m, ml_g,
-                                  n_folds, n_rep,
-                                  score=score)
+    obj_dml_data = dml.DoubleMLData(data, "y", d_cols, x_cols)
+    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m, ml_g, n_folds, n_rep, score=score)
 
     dml_plr_obj.fit()
 
     np.random.seed(3141)
-    y = data['y'].values
+    y = data["y"].values
     x = data.loc[:, x_cols].values
     d = data.loc[:, d_cols].values
     n_obs = len(y)
     all_smpls = draw_smpls(n_obs, n_folds, n_rep)
 
-    res_manual = fit_plr_multitreat(y, x, d,
-                                    _clone(learner), _clone(learner), _clone(learner),
-                                    all_smpls, score, n_rep=n_rep)
+    res_manual = fit_plr_multitreat(y, x, d, _clone(learner), _clone(learner), _clone(learner), all_smpls, score, n_rep=n_rep)
 
-    res_dict = {'coef': dml_plr_obj.coef,
-                'coef_manual': res_manual['theta'],
-                'se': dml_plr_obj.se,
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_dict = {
+        "coef": dml_plr_obj.coef,
+        "coef_manual": res_manual["theta"],
+        "se": dml_plr_obj.se,
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
         boot_t_stat = boot_plr_multitreat(
-            y, d,
-            res_manual['thetas'], res_manual['ses'],
-            res_manual['all_l_hat'], res_manual['all_m_hat'], res_manual['all_g_hat'],
-            all_smpls, score,
-            bootstrap, n_rep_boot, n_rep)
+            y,
+            d,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_l_hat"],
+            res_manual["all_m_hat"],
+            res_manual["all_g_hat"],
+            all_smpls,
+            score,
+            bootstrap,
+            n_rep_boot,
+            n_rep,
+        )
 
         np.random.seed(3141)
         dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, n_coefs, n_rep)
+        res_dict["boot_t_stat" + bootstrap] = dml_plr_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, n_coefs, n_rep)
 
     # sensitivity tests
-    res_dict['sensitivity_elements'] = dml_plr_obj.sensitivity_elements
-    res_dict['sensitivity_elements_manual'] = fit_sensitivity_elements_plr(y, d,
-                                                                           all_coef=dml_plr_obj.all_coef,
-                                                                           predictions=dml_plr_obj.predictions,
-                                                                           score=score,
-                                                                           n_rep=n_rep)
+    res_dict["sensitivity_elements"] = dml_plr_obj.sensitivity_elements
+    res_dict["sensitivity_elements_manual"] = fit_sensitivity_elements_plr(
+        y, d, all_coef=dml_plr_obj.all_coef, predictions=dml_plr_obj.predictions, score=score, n_rep=n_rep
+    )
     # check if sensitivity score with rho=0 gives equal asymptotic standard deviation
     dml_plr_obj.sensitivity_analysis(rho=0.0)
-    res_dict['sensitivity_ses'] = dml_plr_obj.sensitivity_params['se']
+    res_dict["sensitivity_ses"] = dml_plr_obj.sensitivity_params["se"]
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_plr_multitreat_coef(dml_plr_multitreat_fixture):
-    assert np.allclose(dml_plr_multitreat_fixture['coef'],
-                       dml_plr_multitreat_fixture['coef_manual'],
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_plr_multitreat_fixture["coef"], dml_plr_multitreat_fixture["coef_manual"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_plr_multitreat_se(dml_plr_multitreat_fixture):
-    assert np.allclose(dml_plr_multitreat_fixture['se'],
-                       dml_plr_multitreat_fixture['se_manual'],
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_plr_multitreat_fixture["se"], dml_plr_multitreat_fixture["se_manual"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_plr_multitreat_boot(dml_plr_multitreat_fixture):
-    for bootstrap in dml_plr_multitreat_fixture['boot_methods']:
-        assert np.allclose(dml_plr_multitreat_fixture['boot_t_stat' + bootstrap],
-                           dml_plr_multitreat_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_plr_multitreat_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_plr_multitreat_fixture["boot_t_stat" + bootstrap],
+            dml_plr_multitreat_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
 
 
 @pytest.mark.ci
 def test_dml_plr_multitreat_sensitivity(dml_plr_multitreat_fixture):
-    sensitivity_element_names = ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2']
+    sensitivity_element_names = ["sigma2", "nu2", "psi_sigma2", "psi_nu2"]
     for sensitivity_element in sensitivity_element_names:
-        assert np.allclose(dml_plr_multitreat_fixture['sensitivity_elements'][sensitivity_element],
-                           dml_plr_multitreat_fixture['sensitivity_elements_manual'][sensitivity_element])
+        assert np.allclose(
+            dml_plr_multitreat_fixture["sensitivity_elements"][sensitivity_element],
+            dml_plr_multitreat_fixture["sensitivity_elements_manual"][sensitivity_element],
+        )
 
 
 @pytest.mark.ci
 def test_dml_plr_multitreat_sensitivity_rho0(dml_plr_multitreat_fixture):
-    assert np.allclose(dml_plr_multitreat_fixture['se'],
-                       dml_plr_multitreat_fixture['sensitivity_ses']['lower'],
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_plr_multitreat_fixture['se'],
-                       dml_plr_multitreat_fixture['sensitivity_ses']['upper'],
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(
+        dml_plr_multitreat_fixture["se"], dml_plr_multitreat_fixture["sensitivity_ses"]["lower"], rtol=1e-9, atol=1e-4
+    )
+    assert np.allclose(
+        dml_plr_multitreat_fixture["se"], dml_plr_multitreat_fixture["sensitivity_ses"]["upper"], rtol=1e-9, atol=1e-4
+    )
diff --git a/doubleml/plm/tests/test_plr_reestimate_from_scores.py b/doubleml/plm/tests/test_plr_reestimate_from_scores.py
index 2a555ec9b..9f44d61be 100644
--- a/doubleml/plm/tests/test_plr_reestimate_from_scores.py
+++ b/doubleml/plm/tests/test_plr_reestimate_from_scores.py
@@ -1,7 +1,7 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.linear_model import LinearRegression
 
 import doubleml as dml
@@ -9,20 +9,17 @@
 from ...tests._utils import _clone
 
 
-@pytest.fixture(scope='module',
-                params=[LinearRegression()])
+@pytest.fixture(scope="module", params=[LinearRegression()])
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['IV-type', 'partialling out'])
+@pytest.fixture(scope="module", params=["IV-type", "partialling out"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 3])
+@pytest.fixture(scope="module", params=[1, 3])
 def n_rep(request):
     return request.param
 
@@ -33,53 +30,43 @@ def dml_plr_reestimate_fixture(generate_data1, learner, score, n_rep):
 
     # collect data
     data = generate_data1
-    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
 
     # Set machine learning methods for l, m & g
     ml_l = _clone(learner)
     ml_m = _clone(learner)
-    if score == 'IV-type':
+    if score == "IV-type":
         ml_g = _clone(learner)
     else:
         ml_g = None
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols)
-    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
-                                  ml_l, ml_m, ml_g,
-                                  n_folds,
-                                  n_rep,
-                                  score)
+    obj_dml_data = dml.DoubleMLData(data, "y", ["d"], x_cols)
+    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m, ml_g, n_folds, n_rep, score)
     dml_plr_obj.fit()
 
     np.random.seed(3141)
-    dml_plr_obj2 = dml.DoubleMLPLR(obj_dml_data,
-                                   ml_l, ml_m, ml_g,
-                                   n_folds,
-                                   n_rep,
-                                   score)
+    dml_plr_obj2 = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m, ml_g, n_folds, n_rep, score)
     dml_plr_obj2.fit()
     dml_plr_obj2._coef[0] = np.nan
     dml_plr_obj2._se[0] = np.nan
     dml_plr_obj2._est_causal_pars_and_se()
 
-    res_dict = {'coef': dml_plr_obj.coef.item(),
-                'coef2': dml_plr_obj2.coef.item(),
-                'se': dml_plr_obj.se.item(),
-                'se2': dml_plr_obj2.se.item()}
+    res_dict = {
+        "coef": dml_plr_obj.coef.item(),
+        "coef2": dml_plr_obj2.coef.item(),
+        "se": dml_plr_obj.se.item(),
+        "se2": dml_plr_obj2.se.item(),
+    }
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_plr_coef(dml_plr_reestimate_fixture):
-    assert math.isclose(dml_plr_reestimate_fixture['coef'],
-                        dml_plr_reestimate_fixture['coef2'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_reestimate_fixture["coef"], dml_plr_reestimate_fixture["coef2"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_plr_se(dml_plr_reestimate_fixture):
-    assert math.isclose(dml_plr_reestimate_fixture['se'],
-                        dml_plr_reestimate_fixture['se2'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_reestimate_fixture["se"], dml_plr_reestimate_fixture["se2"], rel_tol=1e-9, abs_tol=1e-4)
diff --git a/doubleml/plm/tests/test_plr_rep_cross.py b/doubleml/plm/tests/test_plr_rep_cross.py
index 71b4dcea8..a5f237df1 100644
--- a/doubleml/plm/tests/test_plr_rep_cross.py
+++ b/doubleml/plm/tests/test_plr_rep_cross.py
@@ -1,142 +1,145 @@
-import numpy as np
-import pytest
 import math
 
-from sklearn.linear_model import LinearRegression
+import numpy as np
+import pytest
 from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import LinearRegression
 
 import doubleml as dml
 
-from ...tests._utils import draw_smpls, _clone
-from ._utils_plr_manual import fit_plr, boot_plr
+from ...tests._utils import _clone, draw_smpls
+from ._utils_plr_manual import boot_plr, fit_plr
 
 
-@pytest.fixture(scope='module',
-                params=[RandomForestRegressor(max_depth=2, n_estimators=10),
-                        LinearRegression()])
+@pytest.fixture(scope="module", params=[RandomForestRegressor(max_depth=2, n_estimators=10), LinearRegression()])
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['IV-type', 'partialling out'])
+@pytest.fixture(scope="module", params=["IV-type", "partialling out"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[3])
+@pytest.fixture(scope="module", params=[3])
 def n_rep(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
 def dml_plr_fixture(generate_data1, learner, score, n_rep):
-    boot_methods = ['normal']
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 498
 
     # collect data
     data = generate_data1
-    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
 
     # Set machine learning methods for l, m & g
     ml_l = _clone(learner)
     ml_m = _clone(learner)
-    if score == 'IV-type':
+    if score == "IV-type":
         ml_g = _clone(learner)
     else:
         ml_g = None
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols)
-    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
-                                  ml_l, ml_m, ml_g,
-                                  n_folds,
-                                  n_rep,
-                                  score)
+    obj_dml_data = dml.DoubleMLData(data, "y", ["d"], x_cols)
+    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m, ml_g, n_folds, n_rep, score)
 
     dml_plr_obj.fit()
 
     np.random.seed(3141)
-    y = data['y'].values
+    y = data["y"].values
     x = data.loc[:, x_cols].values
-    d = data['d'].values
+    d = data["d"].values
     n_obs = len(y)
     all_smpls = draw_smpls(n_obs, n_folds, n_rep)
 
-    res_manual = fit_plr(y, x, d, _clone(learner), _clone(learner), _clone(learner),
-                         all_smpls, score, n_rep)
+    res_manual = fit_plr(y, x, d, _clone(learner), _clone(learner), _clone(learner), all_smpls, score, n_rep)
 
     np.random.seed(3141)
     # test with external nuisance predictions
-    if score == 'partialling out':
-        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data,
-                                          ml_l, ml_m,
-                                          n_folds,
-                                          n_rep,
-                                          score=score)
+    if score == "partialling out":
+        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m, n_folds, n_rep, score=score)
     else:
-        assert score == 'IV-type'
-        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data,
-                                          ml_l, ml_m, ml_g,
-                                          n_folds,
-                                          n_rep,
-                                          score=score)
+        assert score == "IV-type"
+        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m, ml_g, n_folds, n_rep, score=score)
 
     # synchronize the sample splitting
     dml_plr_obj_ext.set_sample_splitting(all_smpls=all_smpls)
 
-    if score == 'partialling out':
-        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, n_rep),
-                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, n_rep)}}
+    if score == "partialling out":
+        prediction_dict = {
+            "d": {
+                "ml_l": dml_plr_obj.predictions["ml_l"].reshape(-1, n_rep),
+                "ml_m": dml_plr_obj.predictions["ml_m"].reshape(-1, n_rep),
+            }
+        }
     else:
-        assert score == 'IV-type'
-        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, n_rep),
-                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, n_rep),
-                                 'ml_g': dml_plr_obj.predictions['ml_g'].reshape(-1, n_rep)}}
+        assert score == "IV-type"
+        prediction_dict = {
+            "d": {
+                "ml_l": dml_plr_obj.predictions["ml_l"].reshape(-1, n_rep),
+                "ml_m": dml_plr_obj.predictions["ml_m"].reshape(-1, n_rep),
+                "ml_g": dml_plr_obj.predictions["ml_g"].reshape(-1, n_rep),
+            }
+        }
 
     dml_plr_obj_ext.fit(external_predictions=prediction_dict)
 
-    res_dict = {'coef': dml_plr_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'coef_ext': dml_plr_obj_ext.coef,
-                'se': dml_plr_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'se_ext': dml_plr_obj_ext.se,
-                'boot_methods': boot_methods}
+    res_dict = {
+        "coef": dml_plr_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "coef_ext": dml_plr_obj_ext.coef,
+        "se": dml_plr_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "se_ext": dml_plr_obj_ext.se,
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_plr(y, d, res_manual['thetas'], res_manual['ses'],
-                               res_manual['all_l_hat'], res_manual['all_m_hat'], res_manual['all_g_hat'],
-                               all_smpls, score, bootstrap, n_rep_boot, n_rep)
+        boot_t_stat = boot_plr(
+            y,
+            d,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_l_hat"],
+            res_manual["all_m_hat"],
+            res_manual["all_g_hat"],
+            all_smpls,
+            score,
+            bootstrap,
+            n_rep_boot,
+            n_rep,
+        )
 
         np.random.seed(3141)
         dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, n_rep)
+        res_dict["boot_t_stat" + bootstrap] = dml_plr_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, n_rep)
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_plr_coef(dml_plr_fixture):
-    assert math.isclose(dml_plr_fixture['coef'],
-                        dml_plr_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_fixture["coef"], dml_plr_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_plr_se(dml_plr_fixture):
-    assert math.isclose(dml_plr_fixture['se'],
-                        dml_plr_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_fixture["se"], dml_plr_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_plr_boot(dml_plr_fixture):
-    for bootstrap in dml_plr_fixture['boot_methods']:
-        assert np.allclose(dml_plr_fixture['boot_t_stat' + bootstrap],
-                           dml_plr_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_plr_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_plr_fixture["boot_t_stat" + bootstrap],
+            dml_plr_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
diff --git a/doubleml/plm/tests/test_plr_set_ml_nuisance_pars.py b/doubleml/plm/tests/test_plr_set_ml_nuisance_pars.py
index f86e2632b..08b6f5f16 100644
--- a/doubleml/plm/tests/test_plr_set_ml_nuisance_pars.py
+++ b/doubleml/plm/tests/test_plr_set_ml_nuisance_pars.py
@@ -1,7 +1,7 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.linear_model import Lasso
 
 import doubleml as dml
@@ -9,15 +9,14 @@
 from ...tests._utils import _clone
 
 
-@pytest.fixture(scope='module',
-                params=['IV-type', 'partialling out'])
+@pytest.fixture(scope="module", params=["IV-type", "partialling out"])
 def score(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
 def dml_plr_fixture(generate_data1, score):
-    boot_methods = ['normal']
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 502
 
@@ -29,17 +28,14 @@ def dml_plr_fixture(generate_data1, score):
     # Set machine learning methods for l, m & g
     ml_l = _clone(learner)
     ml_m = _clone(learner)
-    if score == 'IV-type':
+    if score == "IV-type":
         ml_g = _clone(learner)
     else:
         ml_g = None
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData(data, 'y', ['d'])
-    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
-                                  ml_l, ml_m, ml_g,
-                                  n_folds,
-                                  score=score)
+    obj_dml_data = dml.DoubleMLData(data, "y", ["d"])
+    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m, ml_g, n_folds, score=score)
 
     dml_plr_obj.fit()
 
@@ -48,36 +44,35 @@ def dml_plr_fixture(generate_data1, score):
     # Set machine learning methods for l, m & g
     ml_l = _clone(learner)
     ml_m = _clone(learner)
-    if score == 'IV-type':
+    if score == "IV-type":
         ml_g = _clone(learner)
     else:
         ml_g = None
 
-    dml_plr_obj_ext_set_par = dml.DoubleMLPLR(obj_dml_data,
-                                              ml_l, ml_m, ml_g,
-                                              n_folds,
-                                              score=score)
-    dml_plr_obj_ext_set_par.set_ml_nuisance_params('ml_l', 'd', {'alpha': alpha})
-    dml_plr_obj_ext_set_par.set_ml_nuisance_params('ml_m', 'd', {'alpha': alpha})
-    if score == 'IV-type':
-        dml_plr_obj_ext_set_par.set_ml_nuisance_params('ml_g', 'd', {'alpha': alpha})
+    dml_plr_obj_ext_set_par = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m, ml_g, n_folds, score=score)
+    dml_plr_obj_ext_set_par.set_ml_nuisance_params("ml_l", "d", {"alpha": alpha})
+    dml_plr_obj_ext_set_par.set_ml_nuisance_params("ml_m", "d", {"alpha": alpha})
+    if score == "IV-type":
+        dml_plr_obj_ext_set_par.set_ml_nuisance_params("ml_g", "d", {"alpha": alpha})
 
     dml_plr_obj_ext_set_par.fit()
 
-    res_dict = {'coef': dml_plr_obj.coef.item(),
-                'coef_manual': dml_plr_obj_ext_set_par.coef.item(),
-                'se': dml_plr_obj.se.item(),
-                'se_manual': dml_plr_obj_ext_set_par.se.item(),
-                'boot_methods': boot_methods}
+    res_dict = {
+        "coef": dml_plr_obj.coef.item(),
+        "coef_manual": dml_plr_obj_ext_set_par.coef.item(),
+        "se": dml_plr_obj.se.item(),
+        "se_manual": dml_plr_obj_ext_set_par.se.item(),
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(314122)
         dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap] = dml_plr_obj.boot_t_stat
 
         np.random.seed(314122)
         dml_plr_obj_ext_set_par.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = dml_plr_obj_ext_set_par.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = dml_plr_obj_ext_set_par.boot_t_stat
 
     return res_dict
 
@@ -85,23 +80,22 @@ def dml_plr_fixture(generate_data1, score):
 @pytest.mark.ci
 @pytest.mark.filterwarnings("ignore:Using the same")
 def test_dml_plr_coef(dml_plr_fixture):
-    assert math.isclose(dml_plr_fixture['coef'],
-                        dml_plr_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_fixture["coef"], dml_plr_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 @pytest.mark.filterwarnings("ignore:Using the same")
 def test_dml_plr_se(dml_plr_fixture):
-    assert math.isclose(dml_plr_fixture['se'],
-                        dml_plr_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_fixture["se"], dml_plr_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 @pytest.mark.filterwarnings("ignore:Using the same")
 def test_dml_plr_boot(dml_plr_fixture):
-    for bootstrap in dml_plr_fixture['boot_methods']:
-        assert np.allclose(dml_plr_fixture['boot_t_stat' + bootstrap],
-                           dml_plr_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_plr_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_plr_fixture["boot_t_stat" + bootstrap],
+            dml_plr_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
diff --git a/doubleml/plm/tests/test_plr_set_smpls_externally.py b/doubleml/plm/tests/test_plr_set_smpls_externally.py
index eb5cb1e4b..af81c9fc0 100644
--- a/doubleml/plm/tests/test_plr_set_smpls_externally.py
+++ b/doubleml/plm/tests/test_plr_set_smpls_externally.py
@@ -1,7 +1,7 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.linear_model import LinearRegression
 
 import doubleml as dml
@@ -9,20 +9,17 @@
 from ...tests._utils import _clone
 
 
-@pytest.fixture(scope='module',
-                params=[LinearRegression()])
+@pytest.fixture(scope="module", params=[LinearRegression()])
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['IV-type', 'partialling out'])
+@pytest.fixture(scope="module", params=["IV-type", "partialling out"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 3])
+@pytest.fixture(scope="module", params=[1, 3])
 def n_rep(request):
     return request.param
 
@@ -33,52 +30,43 @@ def dml_plr_smpls_fixture(generate_data1, learner, score, n_rep):
 
     # collect data
     data = generate_data1
-    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
 
     # Set machine learning methods for l, m & g
     ml_l = _clone(learner)
     ml_m = _clone(learner)
-    if score == 'IV-type':
+    if score == "IV-type":
         ml_g = _clone(learner)
     else:
         ml_g = None
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols)
-    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
-                                  ml_l, ml_m, ml_g,
-                                  n_folds,
-                                  n_rep,
-                                  score)
+    obj_dml_data = dml.DoubleMLData(data, "y", ["d"], x_cols)
+    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m, ml_g, n_folds, n_rep, score)
 
     dml_plr_obj.fit()
 
     smpls = dml_plr_obj.smpls
 
-    dml_plr_obj2 = dml.DoubleMLPLR(obj_dml_data,
-                                   ml_l, ml_m, ml_g,
-                                   score=score,
-                                   draw_sample_splitting=False)
+    dml_plr_obj2 = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m, ml_g, score=score, draw_sample_splitting=False)
     dml_plr_obj2.set_sample_splitting(smpls)
     dml_plr_obj2.fit()
 
-    res_dict = {'coef': dml_plr_obj.coef.item(),
-                'coef2': dml_plr_obj2.coef.item(),
-                'se': dml_plr_obj.se.item(),
-                'se2': dml_plr_obj2.se.item()}
+    res_dict = {
+        "coef": dml_plr_obj.coef.item(),
+        "coef2": dml_plr_obj2.coef.item(),
+        "se": dml_plr_obj.se.item(),
+        "se2": dml_plr_obj2.se.item(),
+    }
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_plr_coef(dml_plr_smpls_fixture):
-    assert math.isclose(dml_plr_smpls_fixture['coef'],
-                        dml_plr_smpls_fixture['coef2'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_smpls_fixture["coef"], dml_plr_smpls_fixture["coef2"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_plr_se(dml_plr_smpls_fixture):
-    assert math.isclose(dml_plr_smpls_fixture['se'],
-                        dml_plr_smpls_fixture['se2'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_smpls_fixture["se"], dml_plr_smpls_fixture["se2"], rel_tol=1e-9, abs_tol=1e-4)
diff --git a/doubleml/plm/tests/test_plr_tune.py b/doubleml/plm/tests/test_plr_tune.py
index c0ba637de..2e34a6d06 100644
--- a/doubleml/plm/tests/test_plr_tune.py
+++ b/doubleml/plm/tests/test_plr_tune.py
@@ -1,65 +1,55 @@
-import numpy as np
-import pytest
 import math
 
-from sklearn.linear_model import Lasso, ElasticNet
+import numpy as np
+import pytest
+from sklearn.linear_model import ElasticNet, Lasso
 
 import doubleml as dml
 
-from ...tests._utils import draw_smpls, _clone
-from ._utils_plr_manual import fit_plr, boot_plr, tune_nuisance_plr
+from ...tests._utils import _clone, draw_smpls
+from ._utils_plr_manual import boot_plr, fit_plr, tune_nuisance_plr
 
 
-@pytest.fixture(scope='module',
-                params=[Lasso(),
-                        ElasticNet()])
+@pytest.fixture(scope="module", params=[Lasso(), ElasticNet()])
 def learner_l(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[Lasso(),
-                        ElasticNet()])
+@pytest.fixture(scope="module", params=[Lasso(), ElasticNet()])
 def learner_m(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[Lasso(),
-                        ElasticNet()])
+@pytest.fixture(scope="module", params=[Lasso(), ElasticNet()])
 def learner_g(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['partialling out', 'IV-type'])
+@pytest.fixture(scope="module", params=["partialling out", "IV-type"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def tune_on_folds(request):
     return request.param
 
 
 def get_par_grid(learner):
     if learner.__class__ == Lasso:
-        par_grid = {'alpha': np.linspace(0.05, .95, 7)}
+        par_grid = {"alpha": np.linspace(0.05, 0.95, 7)}
     else:
         assert learner.__class__ == ElasticNet
-        par_grid = {'l1_ratio': [.1, .5, .7, .9, .95, .99, 1], 'alpha': np.linspace(0.05, 1., 7)}
+        par_grid = {"l1_ratio": [0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1], "alpha": np.linspace(0.05, 1.0, 7)}
     return par_grid
 
 
 @pytest.fixture(scope="module")
 def dml_plr_fixture(generate_data2, learner_l, learner_m, learner_g, score, tune_on_folds):
-    par_grid = {'ml_l': get_par_grid(learner_l),
-                'ml_m': get_par_grid(learner_m),
-                'ml_g': get_par_grid(learner_g)}
+    par_grid = {"ml_l": get_par_grid(learner_l), "ml_m": get_par_grid(learner_m), "ml_g": get_par_grid(learner_g)}
     n_folds_tune = 4
 
-    boot_methods = ['normal']
+    boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 502
 
@@ -69,20 +59,16 @@ def dml_plr_fixture(generate_data2, learner_l, learner_m, learner_g, score, tune
     # Set machine learning methods for m & g
     ml_l = _clone(learner_l)
     ml_m = _clone(learner_m)
-    if score == 'IV-type':
+    if score == "IV-type":
         ml_g = _clone(learner_g)
     else:
         ml_g = None
 
     np.random.seed(3141)
-    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
-                                  ml_l, ml_m, ml_g,
-                                  n_folds,
-                                  score=score)
+    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m, ml_g, n_folds, score=score)
 
     # tune hyperparameters
-    tune_res = dml_plr_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune,
-                                return_tune_res=True)
+    tune_res = dml_plr_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune, return_tune_res=True)
     assert isinstance(tune_res, list)
 
     # fit with tuned parameters
@@ -96,63 +82,104 @@ def dml_plr_fixture(generate_data2, learner_l, learner_m, learner_g, score, tune
     all_smpls = draw_smpls(n_obs, n_folds)
     smpls = all_smpls[0]
 
-    tune_g = (score == 'IV-type')
+    tune_g = score == "IV-type"
     if tune_on_folds:
-        l_params, m_params, g_params = tune_nuisance_plr(y, x, d,
-                                                         _clone(learner_l), _clone(learner_m), _clone(learner_g),
-                                                         smpls, n_folds_tune,
-                                                         par_grid['ml_l'], par_grid['ml_m'], par_grid['ml_g'], tune_g)
+        l_params, m_params, g_params = tune_nuisance_plr(
+            y,
+            x,
+            d,
+            _clone(learner_l),
+            _clone(learner_m),
+            _clone(learner_g),
+            smpls,
+            n_folds_tune,
+            par_grid["ml_l"],
+            par_grid["ml_m"],
+            par_grid["ml_g"],
+            tune_g,
+        )
     else:
         xx = [(np.arange(len(y)), np.array([]))]
-        l_params, m_params, g_params = tune_nuisance_plr(y, x, d,
-                                                         _clone(learner_l), _clone(learner_m), _clone(learner_g),
-                                                         xx, n_folds_tune,
-                                                         par_grid['ml_l'], par_grid['ml_m'], par_grid['ml_g'], tune_g)
+        l_params, m_params, g_params = tune_nuisance_plr(
+            y,
+            x,
+            d,
+            _clone(learner_l),
+            _clone(learner_m),
+            _clone(learner_g),
+            xx,
+            n_folds_tune,
+            par_grid["ml_l"],
+            par_grid["ml_m"],
+            par_grid["ml_g"],
+            tune_g,
+        )
         l_params = l_params * n_folds
         g_params = g_params * n_folds
         m_params = m_params * n_folds
 
-    res_manual = fit_plr(y, x, d, _clone(learner_l), _clone(learner_m), _clone(learner_g),
-                         all_smpls, score,
-                         l_params=l_params, m_params=m_params, g_params=g_params)
-
-    res_dict = {'coef': dml_plr_obj.coef.item(),
-                'coef_manual': res_manual['theta'],
-                'se': dml_plr_obj.se.item(),
-                'se_manual': res_manual['se'],
-                'boot_methods': boot_methods}
+    res_manual = fit_plr(
+        y,
+        x,
+        d,
+        _clone(learner_l),
+        _clone(learner_m),
+        _clone(learner_g),
+        all_smpls,
+        score,
+        l_params=l_params,
+        m_params=m_params,
+        g_params=g_params,
+    )
+
+    res_dict = {
+        "coef": dml_plr_obj.coef.item(),
+        "coef_manual": res_manual["theta"],
+        "se": dml_plr_obj.se.item(),
+        "se_manual": res_manual["se"],
+        "boot_methods": boot_methods,
+    }
 
     for bootstrap in boot_methods:
         np.random.seed(3141)
-        boot_t_stat = boot_plr(y, d, res_manual['thetas'], res_manual['ses'],
-                               res_manual['all_l_hat'], res_manual['all_m_hat'], res_manual['all_g_hat'],
-                               all_smpls, score, bootstrap, n_rep_boot)
+        boot_t_stat = boot_plr(
+            y,
+            d,
+            res_manual["thetas"],
+            res_manual["ses"],
+            res_manual["all_l_hat"],
+            res_manual["all_m_hat"],
+            res_manual["all_g_hat"],
+            all_smpls,
+            score,
+            bootstrap,
+            n_rep_boot,
+        )
 
         np.random.seed(3141)
         dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
-        res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat
-        res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat.reshape(-1, 1, 1)
+        res_dict["boot_t_stat" + bootstrap] = dml_plr_obj.boot_t_stat
+        res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1)
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_plr_coef(dml_plr_fixture):
-    assert math.isclose(dml_plr_fixture['coef'],
-                        dml_plr_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_fixture["coef"], dml_plr_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_plr_se(dml_plr_fixture):
-    assert math.isclose(dml_plr_fixture['se'],
-                        dml_plr_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_fixture["se"], dml_plr_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_plr_boot(dml_plr_fixture):
-    for bootstrap in dml_plr_fixture['boot_methods']:
-        assert np.allclose(dml_plr_fixture['boot_t_stat' + bootstrap],
-                           dml_plr_fixture['boot_t_stat' + bootstrap + '_manual'],
-                           rtol=1e-9, atol=1e-4)
+    for bootstrap in dml_plr_fixture["boot_methods"]:
+        assert np.allclose(
+            dml_plr_fixture["boot_t_stat" + bootstrap],
+            dml_plr_fixture["boot_t_stat" + bootstrap + "_manual"],
+            rtol=1e-9,
+            atol=1e-4,
+        )
diff --git a/doubleml/rdd/datasets/simple_dgp.py b/doubleml/rdd/datasets/simple_dgp.py
index abfd4ed39..cdde0fc11 100644
--- a/doubleml/rdd/datasets/simple_dgp.py
+++ b/doubleml/rdd/datasets/simple_dgp.py
@@ -53,10 +53,10 @@ def make_simple_rdd_data(n_obs=5000, p=4, fuzzy=True, binary_outcome=False, **kw
         The oracle values contain the potential outcomes.
     """
 
-    cutoff = kwargs.get('cutoff', 0.0)
-    dim_x = kwargs.get('dim_x', 3)
-    a = kwargs.get('a', 0.0)
-    tau = kwargs.get('tau', 1.0)
+    cutoff = kwargs.get("cutoff", 0.0)
+    dim_x = kwargs.get("dim_x", 3)
+    a = kwargs.get("a", 0.0)
+    tau = kwargs.get("tau", 1.0)
 
     score = np.random.normal(size=n_obs)
     # independent covariates
@@ -95,14 +95,8 @@ def make_simple_rdd_data(n_obs=5000, p=4, fuzzy=True, binary_outcome=False, **kw
     Y = Y0 * (1 - D) + Y1 * D
 
     oracle_values = {
-        'Y0': Y0,
-        'Y1': Y1,
-    }
-    res_dict = {
-        'score': score,
-        'Y': Y,
-        'D': D,
-        'X': X,
-        'oracle_values': oracle_values
+        "Y0": Y0,
+        "Y1": Y1,
     }
+    res_dict = {"score": score, "Y": Y, "D": D, "X": X, "oracle_values": oracle_values}
     return res_dict
diff --git a/doubleml/rdd/rdd.py b/doubleml/rdd/rdd.py
index 89aff94c5..858ae5ed1 100644
--- a/doubleml/rdd/rdd.py
+++ b/doubleml/rdd/rdd.py
@@ -1,24 +1,23 @@
 import warnings
-import numpy as np
-import pandas as pd
 from collections.abc import Callable
 
+import numpy as np
+import pandas as pd
 from scipy.stats import norm
-
 from sklearn.base import clone
 from sklearn.utils.multiclass import type_of_target
 
 from doubleml import DoubleMLData
 from doubleml.double_ml import DoubleML
-from doubleml.utils.resampling import DoubleMLResampling
-from doubleml.utils._checks import _check_resampling_specification, _check_supports_sample_weights
 from doubleml.rdd._utils import _is_rdrobust_available
+from doubleml.utils._checks import _check_resampling_specification, _check_supports_sample_weights
+from doubleml.utils.resampling import DoubleMLResampling
 
 # validate optional rdrobust import
 rdrobust = _is_rdrobust_available()
 
 
-class RDFlex():
+class RDFlex:
     """Flexible adjustment with double machine learning for regression discontinuity designs
 
     Parameters
@@ -95,22 +94,22 @@ class RDFlex():
 
     """
 
-    def __init__(self,
-                 obj_dml_data,
-                 ml_g,
-                 ml_m=None,
-                 fuzzy=False,
-                 cutoff=0,
-                 n_folds=5,
-                 n_rep=1,
-                 h_fs=None,
-                 fs_specification="cutoff",
-                 fs_kernel="triangular",
-                 **kwargs):
-
+    def __init__(
+        self,
+        obj_dml_data,
+        ml_g,
+        ml_m=None,
+        fuzzy=False,
+        cutoff=0,
+        n_folds=5,
+        n_rep=1,
+        h_fs=None,
+        fs_specification="cutoff",
+        fs_kernel="triangular",
+        **kwargs,
+    ):
         if rdrobust is None:
-            msg = ("rdrobust is not installed. "
-                   "Please install it using 'pip install DoubleML[rdd]'")
+            msg = "rdrobust is not installed. Please install it using 'pip install DoubleML[rdd]'"
             raise ImportError(msg)
 
         self._check_data(obj_dml_data, cutoff)
@@ -122,7 +121,7 @@ def __init__(self,
         self._fuzzy = fuzzy
 
         if not fuzzy and any(self._dml_data.d != self._intendend_treatment):
-            warnings.warn('A sharp RD design is being estimated, but the data indicate that the design is fuzzy.')
+            warnings.warn("A sharp RD design is being estimated, but the data indicate that the design is fuzzy.")
 
         self._check_and_set_learner(ml_g, ml_m)
 
@@ -132,14 +131,10 @@ def __init__(self,
 
         if h_fs is None:
             fuzzy = self._dml_data.d if self._fuzzy else None
-            self._h_fs = rdrobust.rdbwselect(
-                y=obj_dml_data.y,
-                x=self._score,
-                fuzzy=fuzzy).bws.values.flatten().max()
+            self._h_fs = rdrobust.rdbwselect(y=obj_dml_data.y, x=self._score, fuzzy=fuzzy).bws.values.flatten().max()
         else:
             if not isinstance(h_fs, (float)):
-                raise TypeError("Initial bandwidth 'h_fs' has to be a float. "
-                                f'Object of type {str(type(h_fs))} passed.')
+                raise TypeError(f"Initial bandwidth 'h_fs' has to be a float. Object of type {str(type(h_fs))} passed.")
             self._h_fs = h_fs
 
         self._fs_specification = self._check_fs_specification(fs_specification)
@@ -151,11 +146,11 @@ def __init__(self,
         # TODO: Add further input checks
         self.kwargs = kwargs
 
-        self._smpls = DoubleMLResampling(n_folds=self.n_folds, n_rep=self.n_rep, n_obs=obj_dml_data.n_obs,
-                                         stratify=obj_dml_data.d).split_samples()
+        self._smpls = DoubleMLResampling(
+            n_folds=self.n_folds, n_rep=self.n_rep, n_obs=obj_dml_data.n_obs, stratify=obj_dml_data.d
+        ).split_samples()
 
-        self._M_Y, self._M_D, self._h, self._rdd_obj, \
-            self._all_coef, self._all_se, self._all_ci = self._initialize_arrays()
+        self._M_Y, self._M_D, self._h, self._rdd_obj, self._all_coef, self._all_se, self._all_ci = self._initialize_arrays()
 
         # Initialize all properties to None
         self._coef = None
@@ -199,10 +194,11 @@ def __str__(self):
             result = "\n".join(lines)
 
             additional_info = (
-                "\nDesign Type:        " + ("Fuzzy" if self.fuzzy else "Sharp") +
-                f"\nCutoff:             {self.cutoff}" +
-                f"\nFirst Stage Kernel: {self.fs_kernel}" +
-                f"\nFinal Bandwidth:    {self.h}"
+                "\nDesign Type:        "
+                + ("Fuzzy" if self.fuzzy else "Sharp")
+                + f"\nCutoff:             {self.cutoff}"
+                + f"\nFirst Stage Kernel: {self.fs_kernel}"
+                + f"\nFinal Bandwidth:    {self.h}"
             )
 
             return result + additional_info
@@ -345,13 +341,13 @@ def fit(self, n_iterations=2):
             weights = self.w
 
             for iteration in range(n_iterations):
-                eta_Y = self._fit_nuisance_model(outcome=Y, estimator_name="ml_g",
-                                                 weights=weights, smpls=self._smpls[i_rep])
+                eta_Y = self._fit_nuisance_model(outcome=Y, estimator_name="ml_g", weights=weights, smpls=self._smpls[i_rep])
                 self._M_Y[:, i_rep] = Y - eta_Y
 
                 if self.fuzzy:
-                    eta_D = self._fit_nuisance_model(outcome=D, estimator_name="ml_m",
-                                                     weights=weights, smpls=self._smpls[i_rep])
+                    eta_D = self._fit_nuisance_model(
+                        outcome=D, estimator_name="ml_m", weights=weights, smpls=self._smpls[i_rep]
+                    )
                     self._M_D[:, i_rep] = D - eta_D
 
                 # update weights via iterative bandwidth fitting
@@ -385,31 +381,28 @@ def confint(self, level=0.95):
             A data frame with the confidence interval(s).
         """
         if not isinstance(level, float):
-            raise TypeError('The confidence level must be of float type. '
-                            f'{str(level)} of type {str(type(level))} was passed.')
+            raise TypeError(f"The confidence level must be of float type. {str(level)} of type {str(type(level))} was passed.")
         if (level <= 0) | (level >= 1):
-            raise ValueError('The confidence level must be in (0,1). '
-                             f'{str(level)} was passed.')
+            raise ValueError(f"The confidence level must be in (0,1). {str(level)} was passed.")
 
         # compute critical values
         alpha = 1 - level
-        percentages = np.array([alpha / 2, 1. - alpha / 2])
+        percentages = np.array([alpha / 2, 1.0 - alpha / 2])
 
         critical_values = np.repeat(norm.ppf(percentages[1]), self._n_rep)
 
         # compute all cis over repetitions (shape: n_coef x 2 x n_rep)
         self._all_cis = np.stack(
-            (self.all_coef - self.all_se * critical_values,
-             self.all_coef + self.all_se * critical_values),
-            axis=1)
+            (self.all_coef - self.all_se * critical_values, self.all_coef + self.all_se * critical_values), axis=1
+        )
         ci = np.median(self._all_cis, axis=2)
-        df_ci = pd.DataFrame(ci, columns=['{:.1f} %'.format(i * 100) for i in percentages],
-                             index=['Conventional', 'Bias-Corrected', 'Robust'])
+        df_ci = pd.DataFrame(
+            ci, columns=["{:.1f} %".format(i * 100) for i in percentages], index=["Conventional", "Bias-Corrected", "Robust"]
+        )
 
         return df_ci
 
     def _fit_nuisance_model(self, outcome, estimator_name, weights, smpls):
-
         # Include transformation of score and cutoff if necessary
         if self._fs_specification == "cutoff":
             Z = self._intendend_treatment  # instrument for treatment
@@ -423,8 +416,9 @@ def _fit_nuisance_model(self, outcome, estimator_name, weights, smpls):
             assert self._fs_specification == "interacted cutoff and score"
             Z = np.column_stack((self._intendend_treatment, self._intendend_treatment * self._score, self._score))
             Z_left = np.zeros_like(Z)
-            Z_right = np.column_stack((np.ones_like(self._intendend_treatment), np.zeros_like(self._score),
-                                       np.zeros_like(self._score)))
+            Z_right = np.column_stack(
+                (np.ones_like(self._intendend_treatment), np.zeros_like(self._score), np.zeros_like(self._score))
+            )
 
         X = self._dml_data.x
         ZX = np.column_stack((Z, X))
@@ -445,7 +439,7 @@ def _fit_nuisance_model(self, outcome, estimator_name, weights, smpls):
                 mu_left[test_index] = estimator.predict_proba(ZX_left[test_index])[:, 1]
                 mu_right[test_index] = estimator.predict_proba(ZX_right[test_index])[:, 1]
 
-        return (mu_left + mu_right)/2
+        return (mu_left + mu_right) / 2
 
     def _update_weights(self):
         rdd_res = self._fit_rdd()
@@ -459,12 +453,10 @@ def _update_weights(self):
     def _fit_rdd(self, h=None, b=None):
         if self.fuzzy:
             rdd_res = rdrobust.rdrobust(
-                y=self._M_Y[:, self._i_rep], x=self._score,
-                fuzzy=self._M_D[:, self._i_rep], h=h, b=b, **self.kwargs)
+                y=self._M_Y[:, self._i_rep], x=self._score, fuzzy=self._M_D[:, self._i_rep], h=h, b=b, **self.kwargs
+            )
         else:
-            rdd_res = rdrobust.rdrobust(
-                y=self._M_Y[:, self._i_rep], x=self._score,
-                h=h, b=b, **self.kwargs)
+            rdd_res = rdrobust.rdrobust(y=self._M_Y[:, self._i_rep], x=self._score, h=h, b=b, **self.kwargs)
         return rdd_res
 
     def _set_coefs(self, rdd_res, h):
@@ -491,79 +483,86 @@ def _initialize_arrays(self):
 
     def _check_data(self, obj_dml_data, cutoff):
         if not isinstance(obj_dml_data, DoubleMLData):
-            raise TypeError('The data must be of DoubleMLData type. '
-                            f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.')
+            raise TypeError(
+                f"The data must be of DoubleMLData type. {str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+            )
 
         # score checks
         if obj_dml_data.s_col is None:
-            raise ValueError('Incompatible data. ' +
-                             'Score variable has not been set. ')
-        is_continuous = (type_of_target(obj_dml_data.s) == 'continuous')
+            raise ValueError("Incompatible data. " + "Score variable has not been set. ")
+        is_continuous = type_of_target(obj_dml_data.s) == "continuous"
         if not is_continuous:
-            raise ValueError('Incompatible data. ' +
-                             'Score variable has to be continuous. ')
+            raise ValueError("Incompatible data. " + "Score variable has to be continuous. ")
 
         if not isinstance(cutoff, (int, float)):
-            raise TypeError('Cutoff value has to be a float or int. '
-                            f'Object of type {str(type(cutoff))} passed.')
+            raise TypeError(f"Cutoff value has to be a float or int. Object of type {str(type(cutoff))} passed.")
         if not (obj_dml_data.s.min() <= cutoff <= obj_dml_data.s.max()):
-            raise ValueError('Cutoff value is not within the range of the score variable. ')
+            raise ValueError("Cutoff value is not within the range of the score variable. ")
 
         # treatment checks
-        one_treat = (obj_dml_data.n_treat == 1)
-        binary_treat = (type_of_target(obj_dml_data.d) == 'binary')
+        one_treat = obj_dml_data.n_treat == 1
+        binary_treat = type_of_target(obj_dml_data.d) == "binary"
         zero_one_treat = np.all((np.power(obj_dml_data.d, 2) - obj_dml_data.d) == 0)
         if not (one_treat & binary_treat & zero_one_treat):
-            raise ValueError('Incompatible data. '
-                             'To fit an RDFlex model with DML '
-                             'exactly one binary variable with values 0 and 1 '
-                             'needs to be specified as treatment variable.')
+            raise ValueError(
+                "Incompatible data. "
+                "To fit an RDFlex model with DML "
+                "exactly one binary variable with values 0 and 1 "
+                "needs to be specified as treatment variable."
+            )
 
         # instrument checks
         if obj_dml_data.z_cols is not None:
-            raise ValueError('Incompatible data. ' +
-                             ' and '.join(obj_dml_data.z_cols) +
-                             ' have been set as instrumental variable(s). ')
+            raise ValueError(
+                "Incompatible data. " + " and ".join(obj_dml_data.z_cols) + " have been set as instrumental variable(s). "
+            )
 
     def _check_and_set_learner(self, ml_g, ml_m):
         # check ml_g
-        ml_g_is_classifier = DoubleML._check_learner(ml_g, 'ml_g', regressor=True, classifier=True)
-        _check_supports_sample_weights(ml_g, 'ml_g')
-        self._learner = {'ml_g': ml_g}
+        ml_g_is_classifier = DoubleML._check_learner(ml_g, "ml_g", regressor=True, classifier=True)
+        _check_supports_sample_weights(ml_g, "ml_g")
+        self._learner = {"ml_g": ml_g}
         if ml_g_is_classifier:
             if self._dml_data.binary_outcome:
-                self._predict_method = {'ml_g': 'predict_proba'}
+                self._predict_method = {"ml_g": "predict_proba"}
             else:
-                raise ValueError(f'The ml_g learner {str(ml_g)} was identified as classifier '
-                                 'but the outcome variable is not binary with values 0 and 1.')
+                raise ValueError(
+                    f"The ml_g learner {str(ml_g)} was identified as classifier "
+                    "but the outcome variable is not binary with values 0 and 1."
+                )
         else:
-            self._predict_method = {'ml_g': 'predict'}
+            self._predict_method = {"ml_g": "predict"}
 
         # check ml_m
         if self._fuzzy:
             if ml_m is not None:
-                _ = DoubleML._check_learner(ml_m, 'ml_m', regressor=False, classifier=True)
-                _check_supports_sample_weights(ml_m, 'ml_m')
+                _ = DoubleML._check_learner(ml_m, "ml_m", regressor=False, classifier=True)
+                _check_supports_sample_weights(ml_m, "ml_m")
 
-                self._learner['ml_m'] = ml_m
-                self._predict_method['ml_m'] = 'predict_proba'
+                self._learner["ml_m"] = ml_m
+                self._predict_method["ml_m"] = "predict_proba"
             else:
-                raise ValueError('Fuzzy design requires a classifier ml_m for treatment assignment.')
+                raise ValueError("Fuzzy design requires a classifier ml_m for treatment assignment.")
 
         else:
             if ml_m is not None:
-                warnings.warn(('A learner ml_m has been provided for for a sharp design but will be ignored. '
-                               'A learner ml_m is not required for estimation.'))
+                warnings.warn(
+                    (
+                        "A learner ml_m has been provided for for a sharp design but will be ignored. "
+                        "A learner ml_m is not required for estimation."
+                    )
+                )
 
     def _check_and_set_kernel(self, fs_kernel):
         if not isinstance(fs_kernel, (str, Callable)):
-            raise TypeError('fs_kernel must be either a string or a callable. '
-                            f'{str(fs_kernel)} of type {str(type(fs_kernel))} was passed.')
+            raise TypeError(
+                f"fs_kernel must be either a string or a callable. {str(fs_kernel)} of type {str(type(fs_kernel))} was passed."
+            )
 
         kernel_functions = {
             "uniform": lambda x, h: np.array(np.abs(x) <= h, dtype=float),
             "triangular": lambda x, h: np.array(np.maximum(0, (h - np.abs(x)) / h), dtype=float),
-            "epanechnikov": lambda x, h: np.array(np.where(np.abs(x) < h, .75 * (1 - np.square(x / h)), 0), dtype=float)
+            "epanechnikov": lambda x, h: np.array(np.where(np.abs(x) < h, 0.75 * (1 - np.square(x / h)), 0), dtype=float),
         }
 
         if isinstance(fs_kernel, str):
@@ -577,36 +576,44 @@ def _check_and_set_kernel(self, fs_kernel):
         else:
             assert callable(fs_kernel)
             kernel_function = fs_kernel
-            kernel_name = 'custom_kernel'
+            kernel_name = "custom_kernel"
 
         return kernel_function, kernel_name
 
     def _check_fs_specification(self, fs_specification):
         if not isinstance(fs_specification, str):
-            raise TypeError("fs_specification must be a string. "
-                            f'{str(fs_specification)} of type {str(type(fs_specification))} was passed.')
+            raise TypeError(
+                f"fs_specification must be a string. {str(fs_specification)} of type {str(type(fs_specification))} was passed."
+            )
         expected_specifications = ["cutoff", "cutoff and score", "interacted cutoff and score"]
         if fs_specification not in expected_specifications:
-            raise ValueError(f"Invalid fs_specification '{fs_specification}'. "
-                             f"Valid specifications are {expected_specifications}.")
+            raise ValueError(
+                f"Invalid fs_specification '{fs_specification}'. Valid specifications are {expected_specifications}."
+            )
         return fs_specification
 
     def _check_iterations(self, n_iterations):
         """Validate the number of iterations."""
         if not isinstance(n_iterations, int):
-            raise TypeError('The number of iterations for the iterative bandwidth fitting must be of int type. '
-                            f'{str(n_iterations)} of type {str(type(n_iterations))} was passed.')
+            raise TypeError(
+                "The number of iterations for the iterative bandwidth fitting must be of int type. "
+                f"{str(n_iterations)} of type {str(type(n_iterations))} was passed."
+            )
         if n_iterations < 1:
-            raise ValueError('The number of iterations for the iterative bandwidth fitting has to be positive. '
-                             f'{str(n_iterations)} was passed.')
+            raise ValueError(
+                "The number of iterations for the iterative bandwidth fitting has to be positive. "
+                f"{str(n_iterations)} was passed."
+            )
 
     def _check_effect_sign(self, tolerance=1e-6):
         d_left, d_right = self._dml_data.d[self._score < 0], self._dml_data.d[self._score > 0]
         w_left, w_right = self._w[self._score < 0], self._w[self._score > 0]
         treatment_prob_difference = np.average(d_left, weights=w_left) - np.average(d_right, weights=w_right)
         if treatment_prob_difference > tolerance:
-            warnings.warn("Treatment probability within bandwidth left from cutoff higher than right from cutoff.\n"
-                          "Treatment assignment might be based on the wrong side of the cutoff.")
+            warnings.warn(
+                "Treatment probability within bandwidth left from cutoff higher than right from cutoff.\n"
+                "Treatment assignment might be based on the wrong side of the cutoff."
+            )
 
     def aggregate_over_splits(self):
         var_scaling_factors = np.array([np.sum(res.N_h) for res in self._rdd_obj])
diff --git a/doubleml/rdd/tests/conftest.py b/doubleml/rdd/tests/conftest.py
index 693c198cd..b279ea93c 100644
--- a/doubleml/rdd/tests/conftest.py
+++ b/doubleml/rdd/tests/conftest.py
@@ -1,68 +1,54 @@
-import pytest
-
 import numpy as np
 import pandas as pd
+import pytest
+from sklearn.dummy import DummyClassifier, DummyRegressor
 
-from doubleml.rdd.datasets import make_simple_rdd_data
 from doubleml import DoubleMLData
 from doubleml.rdd import RDFlex
-
-from sklearn.dummy import DummyRegressor, DummyClassifier
-
 from doubleml.rdd._utils import _is_rdrobust_available
+from doubleml.rdd.datasets import make_simple_rdd_data
+
 # validate optional rdrobust import
 rdrobust = _is_rdrobust_available()
 
 DATA_SIZE = 500
 
-ml_g_dummy = DummyRegressor(strategy='constant', constant=0)
-ml_m_dummy = DummyClassifier(strategy='constant', constant=0)
+ml_g_dummy = DummyRegressor(strategy="constant", constant=0)
+ml_m_dummy = DummyClassifier(strategy="constant", constant=0)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def predict_dummy():
     """
     - make predictions using rd-flex with constant model
     - make predictions using rdrobust as a reference
     """
+
     def _predict_dummy(data: DoubleMLData, cutoff, alpha, n_rep, p, fs_specification, ml_g=ml_g_dummy):
         dml_rdflex = RDFlex(
-            data,
-            ml_g=ml_g,
-            ml_m=ml_m_dummy,
-            cutoff=cutoff,
-            n_rep=n_rep,
-            p=p,
-            fs_specification=fs_specification
+            data, ml_g=ml_g, ml_m=ml_m_dummy, cutoff=cutoff, n_rep=n_rep, p=p, fs_specification=fs_specification
         )
         dml_rdflex.fit(n_iterations=1)
-        ci_manual = dml_rdflex.confint(level=1-alpha)
+        ci_manual = dml_rdflex.confint(level=1 - alpha)
 
         if rdrobust is None:
-            msg = ("rdrobust is not installed. "
-                   "Please install it using 'pip install DoubleML[rdd]'")
+            msg = "rdrobust is not installed. Please install it using 'pip install DoubleML[rdd]'"
             raise ImportError(msg)
 
-        rdrobust_model = rdrobust.rdrobust(
-            y=data.y,
-            x=data.s,
-            c=cutoff,
-            level=100*(1-alpha),
-            p=p
-        )
+        rdrobust_model = rdrobust.rdrobust(y=data.y, x=data.s, c=cutoff, level=100 * (1 - alpha), p=p)
 
         reference = {
-            'model': rdrobust_model,
-            'coef': rdrobust_model.coef.values.flatten(),
-            'se': rdrobust_model.se.values.flatten(),
-            'ci': rdrobust_model.ci.values
+            "model": rdrobust_model,
+            "coef": rdrobust_model.coef.values.flatten(),
+            "se": rdrobust_model.se.values.flatten(),
+            "ci": rdrobust_model.ci.values,
         }
 
         actual = {
-            'model': dml_rdflex,
-            'coef': dml_rdflex.coef,
-            'se': dml_rdflex.se,
-            'ci': ci_manual,
+            "model": dml_rdflex,
+            "coef": dml_rdflex.coef,
+            "se": dml_rdflex.se,
+            "ci": ci_manual,
         }
         return reference, actual
 
@@ -70,66 +56,62 @@ def _predict_dummy(data: DoubleMLData, cutoff, alpha, n_rep, p, fs_specification
 
 
 def defier_mask(fuzzy, data, actual_cutoff):
-    if fuzzy == 'left':
+    if fuzzy == "left":
         # right defiers (not treated even if score suggested it
-        return (data['D'] == 0) & (data['score'] >= actual_cutoff)
-    elif fuzzy == 'right':
+        return (data["D"] == 0) & (data["score"] >= actual_cutoff)
+    elif fuzzy == "right":
         # left defiers (treated even if score not suggested it
-        return (data['D'] == 1) & (data['score'] < actual_cutoff)
-    elif fuzzy in ['both', 'none']:
+        return (data["D"] == 1) & (data["score"] < actual_cutoff)
+    elif fuzzy in ["both", "none"]:
         return None
-    raise ValueError(f'Invalid type of fuzzyness {fuzzy}')
+    raise ValueError(f"Invalid type of fuzzyness {fuzzy}")
 
 
-def generate_data(
-    n_obs: int,
-    fuzzy: str,
-    cutoff: float,
-    binary_outcome: bool = False
-):
+def generate_data(n_obs: int, fuzzy: str, cutoff: float, binary_outcome: bool = False):
     data = make_simple_rdd_data(
         n_obs=n_obs,
-        fuzzy=fuzzy in ['both', 'left', 'right'],
+        fuzzy=fuzzy in ["both", "left", "right"],
         cutoff=cutoff,
         binary_outcome=binary_outcome,
     )
 
     mask = defier_mask(fuzzy, data, cutoff)
     if mask is not None:
-        data = {k: v[~mask] for k, v in data.items() if k != 'oracle_values'}
+        data = {k: v[~mask] for k, v in data.items() if k != "oracle_values"}
 
-    columns = ['y', 'd', 'score'] + ['x' + str(i) for i in range(data['X'].shape[1])]
-    df = pd.DataFrame(
-        np.column_stack((data['Y'], data['D'], data['score'], data['X'])),
-        columns=columns
-    )
-    return DoubleMLData(df, y_col='y', d_cols='d', s_col='score')
+    columns = ["y", "d", "score"] + ["x" + str(i) for i in range(data["X"].shape[1])]
+    df = pd.DataFrame(np.column_stack((data["Y"], data["D"], data["score"], data["X"])), columns=columns)
+    return DoubleMLData(df, y_col="y", d_cols="d", s_col="score")
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def rdd_sharp_data():
     def _rdd_sharp_data(cutoff, binary_outcome=False):
-        return generate_data(n_obs=DATA_SIZE, fuzzy='none', cutoff=cutoff, binary_outcome=binary_outcome)
+        return generate_data(n_obs=DATA_SIZE, fuzzy="none", cutoff=cutoff, binary_outcome=binary_outcome)
+
     return _rdd_sharp_data
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def rdd_fuzzy_data():
     def _rdd_fuzzy_data(cutoff, binary_outcome=False):
-        return generate_data(n_obs=DATA_SIZE, fuzzy='both', cutoff=cutoff, binary_outcome=binary_outcome)
+        return generate_data(n_obs=DATA_SIZE, fuzzy="both", cutoff=cutoff, binary_outcome=binary_outcome)
+
     return _rdd_fuzzy_data
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def rdd_fuzzy_left_data():
     def _rdd_fuzzy_left_data(cutoff, binary_outcome=False):
-        return generate_data(n_obs=DATA_SIZE, fuzzy='left', cutoff=cutoff, binary_outcome=binary_outcome)
+        return generate_data(n_obs=DATA_SIZE, fuzzy="left", cutoff=cutoff, binary_outcome=binary_outcome)
+
     return _rdd_fuzzy_left_data
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def rdd_fuzzy_right_data():
     def _rdd_fuzzy_right_data(cutoff, binary_outcome=False):
-        data = generate_data(n_obs=DATA_SIZE, fuzzy='left', cutoff=cutoff, binary_outcome=binary_outcome)
+        data = generate_data(n_obs=DATA_SIZE, fuzzy="left", cutoff=cutoff, binary_outcome=binary_outcome)
         return data
+
     return _rdd_fuzzy_right_data
diff --git a/doubleml/rdd/tests/test_rdd_classifier.py b/doubleml/rdd/tests/test_rdd_classifier.py
index 8054892f9..199fe327d 100644
--- a/doubleml/rdd/tests/test_rdd_classifier.py
+++ b/doubleml/rdd/tests/test_rdd_classifier.py
@@ -1,12 +1,11 @@
-import pytest
-import pandas as pd
 import numpy as np
+import pandas as pd
+import pytest
+from sklearn.linear_model import LogisticRegression
 
-from doubleml.rdd.datasets import make_simple_rdd_data
 import doubleml as dml
 from doubleml.rdd import RDFlex
-
-from sklearn.linear_model import LogisticRegression
+from doubleml.rdd.datasets import make_simple_rdd_data
 
 np.random.seed(3141)
 
@@ -16,10 +15,10 @@
 
 
 df = pd.DataFrame(
-    np.column_stack((data['Y_bin'], data['D'], data['score'], data['X'])),
-    columns=['y', 'd', 'score'] + ['x' + str(i) for i in range(data['X'].shape[1])]
+    np.column_stack((data["Y_bin"], data["D"], data["score"], data["X"])),
+    columns=["y", "d", "score"] + ["x" + str(i) for i in range(data["X"].shape[1])],
 )
-dml_data = dml.DoubleMLData(df, y_col='y', d_cols='d', s_col='score')
+dml_data = dml.DoubleMLData(df, y_col="y", d_cols="d", s_col="score")
 
 
 @pytest.mark.ci_rdd
diff --git a/doubleml/rdd/tests/test_rdd_classifier_fuzzy.py b/doubleml/rdd/tests/test_rdd_classifier_fuzzy.py
index 56be5fc02..34f97692e 100644
--- a/doubleml/rdd/tests/test_rdd_classifier_fuzzy.py
+++ b/doubleml/rdd/tests/test_rdd_classifier_fuzzy.py
@@ -1,61 +1,57 @@
 """
 Dummy test using fixed learner for fuzzy data
 """
-import pytest
+
 import numpy as np
+import pytest
 from sklearn.dummy import DummyClassifier
 
-ml_g_dummy = DummyClassifier(strategy='constant', constant=0)
+ml_g_dummy = DummyClassifier(strategy="constant", constant=0)
 
 
-@pytest.fixture(scope='module',
-                params=[-0.2, 0.0, 0.4])
+@pytest.fixture(scope="module", params=[-0.2, 0.0, 0.4])
 def cutoff(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.05, 0.1])
+@pytest.fixture(scope="module", params=[0.05, 0.1])
 def alpha(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 4])
+@pytest.fixture(scope="module", params=[1, 4])
 def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 2])
+@pytest.fixture(scope="module", params=[1, 2])
 def p(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=["cutoff", "cutoff and score", "interacted cutoff and score"])
+@pytest.fixture(scope="module", params=["cutoff", "cutoff and score", "interacted cutoff and score"])
 def fs_specification(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def data(rdd_fuzzy_data, cutoff):
     return rdd_fuzzy_data(cutoff, binary_outcome=True)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def data_zero(rdd_fuzzy_data):
     return rdd_fuzzy_data(0.0, binary_outcome=True)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def predict_placebo(predict_dummy, data_zero, cutoff, alpha, p, n_rep, fs_specification):
     return predict_dummy(
         data_zero, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification, ml_g=ml_g_dummy
     )
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def predict_nonplacebo(predict_dummy, data, cutoff, alpha, p, n_rep, fs_specification):
     return predict_dummy(
         data, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification, ml_g=ml_g_dummy
@@ -65,34 +61,34 @@ def predict_nonplacebo(predict_dummy, data, cutoff, alpha, p, n_rep, fs_specific
 @pytest.mark.ci_rdd
 def test_rdd_placebo_coef(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["coef"], reference["coef"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_coef(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["coef"], reference["coef"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_placebo_se(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["se"], reference["se"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_se(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["se"], reference["se"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_placebo_ci(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["ci"], reference["ci"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_ci(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["ci"], reference["ci"], rtol=1e-9, atol=1e-4)
diff --git a/doubleml/rdd/tests/test_rdd_classifier_fuzzy_left.py b/doubleml/rdd/tests/test_rdd_classifier_fuzzy_left.py
index ff65bff2f..5fee4acfa 100644
--- a/doubleml/rdd/tests/test_rdd_classifier_fuzzy_left.py
+++ b/doubleml/rdd/tests/test_rdd_classifier_fuzzy_left.py
@@ -1,61 +1,57 @@
 """
 Dummy test using fixed learner for left sided fuzzy data
 """
-import pytest
+
 import numpy as np
+import pytest
 from sklearn.dummy import DummyClassifier
 
-ml_g_dummy = DummyClassifier(strategy='constant', constant=0)
+ml_g_dummy = DummyClassifier(strategy="constant", constant=0)
 
 
-@pytest.fixture(scope='module',
-                params=[-0.2, 0.0, 0.4])
+@pytest.fixture(scope="module", params=[-0.2, 0.0, 0.4])
 def cutoff(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.05, 0.1])
+@pytest.fixture(scope="module", params=[0.05, 0.1])
 def alpha(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 4])
+@pytest.fixture(scope="module", params=[1, 4])
 def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 2])
+@pytest.fixture(scope="module", params=[1, 2])
 def p(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=["cutoff", "cutoff and score", "interacted cutoff and score"])
+@pytest.fixture(scope="module", params=["cutoff", "cutoff and score", "interacted cutoff and score"])
 def fs_specification(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def data(rdd_fuzzy_left_data, cutoff):
     return rdd_fuzzy_left_data(cutoff, binary_outcome=True)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def data_zero(rdd_fuzzy_left_data):
     return rdd_fuzzy_left_data(0.0, binary_outcome=True)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def predict_placebo(predict_dummy, data_zero, cutoff, alpha, p, n_rep, fs_specification):
     return predict_dummy(
         data_zero, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification, ml_g=ml_g_dummy
     )
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def predict_nonplacebo(predict_dummy, data, cutoff, alpha, p, n_rep, fs_specification):
     return predict_dummy(
         data, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification, ml_g=ml_g_dummy
@@ -65,34 +61,34 @@ def predict_nonplacebo(predict_dummy, data, cutoff, alpha, p, n_rep, fs_specific
 @pytest.mark.ci_rdd
 def test_rdd_placebo_coef(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["coef"], reference["coef"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_coef(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["coef"], reference["coef"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_placebo_se(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["se"], reference["se"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_se(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["se"], reference["se"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_placebo_ci(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["ci"], reference["ci"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_ci(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["ci"], reference["ci"], rtol=1e-9, atol=1e-4)
diff --git a/doubleml/rdd/tests/test_rdd_classifier_fuzzy_right.py b/doubleml/rdd/tests/test_rdd_classifier_fuzzy_right.py
index 2969663bc..f0fdd66c2 100644
--- a/doubleml/rdd/tests/test_rdd_classifier_fuzzy_right.py
+++ b/doubleml/rdd/tests/test_rdd_classifier_fuzzy_right.py
@@ -1,61 +1,57 @@
 """
 Dummy test using fixed learner for right sided fuzzy data
 """
-import pytest
+
 import numpy as np
+import pytest
 from sklearn.dummy import DummyClassifier
 
-ml_g_dummy = DummyClassifier(strategy='constant', constant=0)
+ml_g_dummy = DummyClassifier(strategy="constant", constant=0)
 
 
-@pytest.fixture(scope='module',
-                params=[-0.2, 0.0, 0.4])
+@pytest.fixture(scope="module", params=[-0.2, 0.0, 0.4])
 def cutoff(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.05, 0.1])
+@pytest.fixture(scope="module", params=[0.05, 0.1])
 def alpha(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 4])
+@pytest.fixture(scope="module", params=[1, 4])
 def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 2])
+@pytest.fixture(scope="module", params=[1, 2])
 def p(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=["cutoff", "cutoff and score", "interacted cutoff and score"])
+@pytest.fixture(scope="module", params=["cutoff", "cutoff and score", "interacted cutoff and score"])
 def fs_specification(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def data(rdd_fuzzy_right_data, cutoff):
     return rdd_fuzzy_right_data(cutoff, binary_outcome=True)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def data_zero(rdd_fuzzy_right_data):
     return rdd_fuzzy_right_data(0.0, binary_outcome=True)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def predict_placebo(predict_dummy, data_zero, cutoff, alpha, p, n_rep, fs_specification):
     return predict_dummy(
         data_zero, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification, ml_g=ml_g_dummy
     )
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def predict_nonplacebo(predict_dummy, data, cutoff, alpha, p, n_rep, fs_specification):
     return predict_dummy(
         data, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification, ml_g=ml_g_dummy
@@ -65,34 +61,34 @@ def predict_nonplacebo(predict_dummy, data, cutoff, alpha, p, n_rep, fs_specific
 @pytest.mark.ci_rdd
 def test_rdd_placebo_coef(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["coef"], reference["coef"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_coef(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["coef"], reference["coef"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_placebo_se(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["se"], reference["se"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_se(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["se"], reference["se"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_placebo_ci(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["ci"], reference["ci"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_ci(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["ci"], reference["ci"], rtol=1e-9, atol=1e-4)
diff --git a/doubleml/rdd/tests/test_rdd_classifier_sharp.py b/doubleml/rdd/tests/test_rdd_classifier_sharp.py
index b0a742a40..f2efe736a 100644
--- a/doubleml/rdd/tests/test_rdd_classifier_sharp.py
+++ b/doubleml/rdd/tests/test_rdd_classifier_sharp.py
@@ -1,61 +1,57 @@
 """
 Dummy test using fixed learner for sharp data
 """
-import pytest
+
 import numpy as np
+import pytest
 from sklearn.dummy import DummyClassifier
 
-ml_g_dummy = DummyClassifier(strategy='constant', constant=0)
+ml_g_dummy = DummyClassifier(strategy="constant", constant=0)
 
 
-@pytest.fixture(scope='module',
-                params=[-0.2, 0.0, 0.4])
+@pytest.fixture(scope="module", params=[-0.2, 0.0, 0.4])
 def cutoff(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.05, 0.1])
+@pytest.fixture(scope="module", params=[0.05, 0.1])
 def alpha(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 4])
+@pytest.fixture(scope="module", params=[1, 4])
 def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 2])
+@pytest.fixture(scope="module", params=[1, 2])
 def p(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=["cutoff", "cutoff and score", "interacted cutoff and score"])
+@pytest.fixture(scope="module", params=["cutoff", "cutoff and score", "interacted cutoff and score"])
 def fs_specification(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def data(rdd_sharp_data, cutoff):
     return rdd_sharp_data(cutoff, binary_outcome=True)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def data_zero(rdd_sharp_data):
     return rdd_sharp_data(0.0, binary_outcome=True)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def predict_placebo(predict_dummy, data_zero, cutoff, alpha, p, n_rep, fs_specification):
     return predict_dummy(
         data_zero, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification, ml_g=ml_g_dummy
     )
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def predict_nonplacebo(predict_dummy, data, cutoff, alpha, p, n_rep, fs_specification):
     return predict_dummy(
         data, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification, ml_g=ml_g_dummy
@@ -65,34 +61,34 @@ def predict_nonplacebo(predict_dummy, data, cutoff, alpha, p, n_rep, fs_specific
 @pytest.mark.ci_rdd
 def test_rdd_placebo_coef(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["coef"], reference["coef"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_coef(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["coef"], reference["coef"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_placebo_se(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["se"], reference["se"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_se(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["se"], reference["se"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_placebo_ci(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["ci"], reference["ci"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_ci(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["ci"], reference["ci"], rtol=1e-9, atol=1e-4)
diff --git a/doubleml/rdd/tests/test_rdd_default_values.py b/doubleml/rdd/tests/test_rdd_default_values.py
index edd58b3e9..2f0657f15 100644
--- a/doubleml/rdd/tests/test_rdd_default_values.py
+++ b/doubleml/rdd/tests/test_rdd_default_values.py
@@ -1,22 +1,21 @@
-import pytest
 import numpy as np
 import pandas as pd
+import pytest
+from sklearn.linear_model import Lasso, LogisticRegression
 
 import doubleml as dml
 from doubleml.rdd import RDFlex
 from doubleml.rdd.datasets import make_simple_rdd_data
 
-from sklearn.linear_model import Lasso, LogisticRegression
-
 np.random.seed(3141)
 
 n_obs = 300
 data = make_simple_rdd_data(n_obs=n_obs, fuzzy=False)
 df = pd.DataFrame(
-    np.column_stack((data['Y'], data['D'], data['score'], data['X'])),
-    columns=['y', 'd', 'score'] + ['x' + str(i) for i in range(data['X'].shape[1])]
+    np.column_stack((data["Y"], data["D"], data["score"], data["X"])),
+    columns=["y", "d", "score"] + ["x" + str(i) for i in range(data["X"].shape[1])],
 )
-dml_data = dml.DoubleMLData(df, y_col='y', d_cols='d', s_col='score')
+dml_data = dml.DoubleMLData(df, y_col="y", d_cols="d", s_col="score")
 
 
 def _assert_resampling_default_settings(dml_obj):
diff --git a/doubleml/rdd/tests/test_rdd_exceptions.py b/doubleml/rdd/tests/test_rdd_exceptions.py
index beee396d0..6abf901eb 100644
--- a/doubleml/rdd/tests/test_rdd_exceptions.py
+++ b/doubleml/rdd/tests/test_rdd_exceptions.py
@@ -1,23 +1,23 @@
-import pytest
-import pandas as pd
-import numpy as np
 import copy
 
+import numpy as np
+import pandas as pd
+import pytest
+from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
+from sklearn.linear_model import Lasso, LogisticRegression
+
 from doubleml import DoubleMLData
-from doubleml.rdd.datasets import make_simple_rdd_data
 from doubleml.rdd import RDFlex
-
-from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin
-from sklearn.linear_model import Lasso, LogisticRegression
+from doubleml.rdd.datasets import make_simple_rdd_data
 
 n = 500
 data = make_simple_rdd_data(n_obs=n, fuzzy=False)
 df = pd.DataFrame(
-    np.column_stack((data['Y'], data['D'], data['score'], data['X'])),
-    columns=['y', 'd', 'score'] + ['x' + str(i) for i in range(data['X'].shape[1])]
+    np.column_stack((data["Y"], data["D"], data["score"], data["X"])),
+    columns=["y", "d", "score"] + ["x" + str(i) for i in range(data["X"].shape[1])],
 )
 
-dml_data = DoubleMLData(df, y_col='y', d_cols='d', s_col='score')
+dml_data = DoubleMLData(df, y_col="y", d_cols="d", s_col="score")
 
 ml_g = Lasso()
 ml_m = LogisticRegression()
@@ -29,6 +29,7 @@ class DummyRegressorNoSampleWeight(BaseEstimator, RegressorMixin):
     A dummy regressor that predicts the mean of the target values,
     and does not support sample weights.
     """
+
     def fit(self, X, y):
         self.mean_ = np.mean(y)
         return self
@@ -42,6 +43,7 @@ class DummyClassifierNoSampleWeight(BaseEstimator, ClassifierMixin):
     A dummy classifier that predicts the most frequent class,
     and does not support sample weights.
     """
+
     def fit(self, X, y):
         self.classes_, self.counts_ = np.unique(y, return_counts=True)
         self.most_frequent_ = self.classes_[np.argmax(self.counts_)]
@@ -51,10 +53,7 @@ def predict(self, X):
         return np.full(shape=(X.shape[0],), fill_value=self.most_frequent_)
 
     def predict_proba(self, X):
-        return np.column_stack(
-            (np.full(shape=(X.shape[0],), fill_value=1),
-             np.full(shape=(X.shape[0],), fill_value=0))
-        )
+        return np.column_stack((np.full(shape=(X.shape[0],), fill_value=1), np.full(shape=(X.shape[0],), fill_value=0)))
 
 
 @pytest.mark.ci_rdd
@@ -65,39 +64,41 @@ def test_rdd_exception_data():
         _ = RDFlex([], ml_g)
 
     # score column
-    msg = 'Incompatible data. Score variable has not been set. '
+    msg = "Incompatible data. Score variable has not been set. "
     with pytest.raises(ValueError, match=msg):
         tmp_dml_data = copy.deepcopy(dml_data)
         tmp_dml_data._s_col = None
         _ = RDFlex(tmp_dml_data, ml_g)
-    msg = 'Incompatible data. Score variable has to be continuous. '
+    msg = "Incompatible data. Score variable has to be continuous. "
     with pytest.raises(ValueError, match=msg):
         tmp_dml_data = copy.deepcopy(dml_data)
         tmp_dml_data._s = tmp_dml_data._d
         _ = RDFlex(tmp_dml_data, ml_g)
 
     # existing instruments
-    msg = r'Incompatible data. x0 have been set as instrumental variable\(s\). '
+    msg = r"Incompatible data. x0 have been set as instrumental variable\(s\). "
     with pytest.raises(ValueError, match=msg):
         tmp_dml_data = copy.deepcopy(dml_data)
-        tmp_dml_data._z_cols = ['x0']
+        tmp_dml_data._z_cols = ["x0"]
         _ = RDFlex(tmp_dml_data, ml_g)
 
     # treatment exceptions
-    msg = ('Incompatible data. '
-           'To fit an RDFlex model with DML '
-           'exactly one binary variable with values 0 and 1 '
-           'needs to be specified as treatment variable.')
+    msg = (
+        "Incompatible data. "
+        "To fit an RDFlex model with DML "
+        "exactly one binary variable with values 0 and 1 "
+        "needs to be specified as treatment variable."
+    )
     # multiple treatment variables
     with pytest.raises(ValueError, match=msg):
         tmp_dml_data = copy.deepcopy(dml_data)
-        tmp_dml_data._d_cols = ['d', 'x0']
+        tmp_dml_data._d_cols = ["d", "x0"]
         _ = RDFlex(tmp_dml_data, ml_g)
     # non-binary treatment
     with pytest.raises(ValueError, match=msg):
         tmp_dml_data = copy.deepcopy(dml_data)
-        tmp_dml_data.x_cols = ['x1']  # reset x to only x1 to enable setting d to x0
-        tmp_dml_data.d_cols = ['x0']
+        tmp_dml_data.x_cols = ["x1"]  # reset x to only x1 to enable setting d to x0
+        tmp_dml_data.d_cols = ["x0"]
         _ = RDFlex(tmp_dml_data, ml_g)
 
 
@@ -107,62 +108,69 @@ def test_rdd_exception_cutoff():
     with pytest.raises(TypeError, match=msg):
         _ = RDFlex(dml_data, ml_g, cutoff=[200])
 
-    msg = 'Cutoff value is not within the range of the score variable. '
+    msg = "Cutoff value is not within the range of the score variable. "
     with pytest.raises(ValueError, match=msg):
         _ = RDFlex(dml_data, ml_g, cutoff=200)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_warning_fuzzy():
-    msg = 'A sharp RD design is being estimated, but the data indicate that the design is fuzzy.'
+    msg = "A sharp RD design is being estimated, but the data indicate that the design is fuzzy."
     with pytest.warns(UserWarning, match=msg):
         _ = RDFlex(dml_data, ml_g, cutoff=0.1)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_warning_treatment_assignment():
-    msg = ("Treatment probability within bandwidth left from cutoff higher than right from cutoff.\n"
-           "Treatment assignment might be based on the wrong side of the cutoff.")
+    msg = (
+        "Treatment probability within bandwidth left from cutoff higher than right from cutoff.\n"
+        "Treatment assignment might be based on the wrong side of the cutoff."
+    )
     with pytest.warns(UserWarning, match=msg):
         tmp_dml_data = copy.deepcopy(dml_data)
-        tmp_dml_data._s = -1.0*tmp_dml_data._s
+        tmp_dml_data._s = -1.0 * tmp_dml_data._s
         _ = RDFlex(tmp_dml_data, ml_g, ml_m, fuzzy=True)
 
 
 @pytest.mark.ci_rdd
-@pytest.mark.filterwarnings(
-    "ignore:Learner provided for ml_m is probably invalid.*no classifier.*:UserWarning"
-)
+@pytest.mark.filterwarnings("ignore:Learner provided for ml_m is probably invalid.*no classifier.*:UserWarning")
 def test_rdd_exception_learner():
-
     # ml_g
-    msg = (r'The ml_g learner LogisticRegression\(\) was identified as classifier but the outcome variable is not'
-           ' binary with values 0 and 1.')
+    msg = (
+        r"The ml_g learner LogisticRegression\(\) was identified as classifier but the outcome variable is not"
+        " binary with values 0 and 1."
+    )
     with pytest.raises(ValueError, match=msg):
         _ = RDFlex(dml_data, ml_g=LogisticRegression())
-    msg = (r"The ml_g learner DummyRegressorNoSampleWeight\(\) does not support sample weights. Please choose a learner"
-           " that supports sample weights.")
+    msg = (
+        r"The ml_g learner DummyRegressorNoSampleWeight\(\) does not support sample weights. Please choose a learner"
+        " that supports sample weights."
+    )
     with pytest.raises(ValueError, match=msg):
         _ = RDFlex(dml_data, ml_g=DummyRegressorNoSampleWeight(), ml_m=ml_m)
 
     # ml_m
-    msg = r'Invalid learner provided for ml_m: Lasso\(\) has no method .predict_proba\(\).'
+    msg = r"Invalid learner provided for ml_m: Lasso\(\) has no method .predict_proba\(\)."
     with pytest.raises(TypeError, match=msg):
         _ = RDFlex(dml_data, ml_g, ml_m=Lasso(), fuzzy=True)
-    msg = 'Fuzzy design requires a classifier ml_m for treatment assignment.'
+    msg = "Fuzzy design requires a classifier ml_m for treatment assignment."
     with pytest.raises(ValueError, match=msg):
         _ = RDFlex(dml_data, ml_g, fuzzy=True)
-    msg = (r"The ml_m learner DummyClassifierNoSampleWeight\(\) does not support sample weights. Please choose a learner"
-           " that supports sample weights.")
+    msg = (
+        r"The ml_m learner DummyClassifierNoSampleWeight\(\) does not support sample weights. Please choose a learner"
+        " that supports sample weights."
+    )
     with pytest.raises(ValueError, match=msg):
         _ = RDFlex(dml_data, ml_g, ml_m=DummyClassifierNoSampleWeight(), fuzzy=True)
 
-    msg = ('A learner ml_m has been provided for for a sharp design but will be ignored. '
-           'A learner ml_m is not required for estimation.')
+    msg = (
+        "A learner ml_m has been provided for for a sharp design but will be ignored. "
+        "A learner ml_m is not required for estimation."
+    )
     with pytest.warns(UserWarning, match=msg):
         tmp_dml_data = copy.deepcopy(dml_data)
-        tmp_dml_data._data['sharp_d'] = (tmp_dml_data.s >= 0)
-        tmp_dml_data.d_cols = 'sharp_d'
+        tmp_dml_data._data["sharp_d"] = tmp_dml_data.s >= 0
+        tmp_dml_data.d_cols = "sharp_d"
         _ = RDFlex(tmp_dml_data, ml_g, ml_m, fuzzy=False)
 
 
@@ -172,7 +180,7 @@ def test_rdd_exception_resampling():
     msg = r"The number of folds must be of int type. \[1\] of type <class 'list'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = RDFlex(dml_data, ml_g, ml_m, n_folds=[1])
-    msg = 'The number of folds greater or equal to 2. 1 was passed.'
+    msg = "The number of folds greater or equal to 2. 1 was passed."
     with pytest.raises(ValueError, match=msg):
         _ = RDFlex(dml_data, ml_g, ml_m, n_folds=1)
 
@@ -180,7 +188,7 @@ def test_rdd_exception_resampling():
     msg = r"The number of repetitions for the sample splitting must be of int type. \[0\] of type <class 'list'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = RDFlex(dml_data, ml_g, ml_m, n_rep=[0])
-    msg = 'The number of repetitions for the sample splitting has to be positive. 0 was passed.'
+    msg = "The number of repetitions for the sample splitting has to be positive. 0 was passed."
     with pytest.raises(ValueError, match=msg):
         _ = RDFlex(dml_data, ml_g, ml_m, n_rep=0)
 
@@ -192,7 +200,7 @@ def test_rdd_exception_kernel():
         _ = RDFlex(dml_data, ml_g, ml_m, fs_kernel=2)
     msg = r"Invalid kernel 'rbf'. Valid kernels are \['uniform', 'triangular', 'epanechnikov'\]."
     with pytest.raises(ValueError, match=msg):
-        _ = RDFlex(dml_data, ml_g, ml_m, fs_kernel='rbf')
+        _ = RDFlex(dml_data, ml_g, ml_m, fs_kernel="rbf")
 
 
 @pytest.mark.ci_rdd
@@ -208,20 +216,24 @@ def test_rdd_exception_fs_specification():
     with pytest.raises(TypeError, match=msg):
         _ = RDFlex(dml_data, ml_g, ml_m, fs_specification=1)
 
-    msg = ("Invalid fs_specification 'local_constant'. "
-           r"Valid specifications are \['cutoff', 'cutoff and score', 'interacted cutoff and score'\].")
+    msg = (
+        "Invalid fs_specification 'local_constant'. "
+        r"Valid specifications are \['cutoff', 'cutoff and score', 'interacted cutoff and score'\]."
+    )
     with pytest.raises(ValueError, match=msg):
-        _ = RDFlex(dml_data, ml_g, ml_m, fs_specification='local_constant')
+        _ = RDFlex(dml_data, ml_g, ml_m, fs_specification="local_constant")
 
 
 @pytest.mark.ci_rdd
 def test_rdd_exception_fit():
     rdd_model = RDFlex(dml_data, ml_g, ml_m)
-    msg = (r"The number of iterations for the iterative bandwidth fitting must be of int type. \[0\] of type <class 'list'> "
-           "was passed.")
+    msg = (
+        r"The number of iterations for the iterative bandwidth fitting must be of int type. \[0\] of type <class 'list'> "
+        "was passed."
+    )
     with pytest.raises(TypeError, match=msg):
         rdd_model.fit(n_iterations=[0])
 
-    msg = 'The number of iterations for the iterative bandwidth fitting has to be positive. 0 was passed.'
+    msg = "The number of iterations for the iterative bandwidth fitting has to be positive. 0 was passed."
     with pytest.raises(ValueError, match=msg):
         rdd_model.fit(n_iterations=0)
diff --git a/doubleml/rdd/tests/test_rdd_fuzzy.py b/doubleml/rdd/tests/test_rdd_fuzzy.py
index 8e41b3c5e..15b7d75ff 100644
--- a/doubleml/rdd/tests/test_rdd_fuzzy.py
+++ b/doubleml/rdd/tests/test_rdd_fuzzy.py
@@ -1,95 +1,87 @@
 """
 Dummy test using fixed learner for fuzzy data
 """
-import pytest
+
 import numpy as np
+import pytest
 
 
-@pytest.fixture(scope='module',
-                params=[-0.2, 0.0, 0.4])
+@pytest.fixture(scope="module", params=[-0.2, 0.0, 0.4])
 def cutoff(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.05, 0.1])
+@pytest.fixture(scope="module", params=[0.05, 0.1])
 def alpha(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 4])
+@pytest.fixture(scope="module", params=[1, 4])
 def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 2])
+@pytest.fixture(scope="module", params=[1, 2])
 def p(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=["cutoff", "cutoff and score", "interacted cutoff and score"])
+@pytest.fixture(scope="module", params=["cutoff", "cutoff and score", "interacted cutoff and score"])
 def fs_specification(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def data(rdd_fuzzy_data, cutoff):
     return rdd_fuzzy_data(cutoff)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def data_zero(rdd_fuzzy_data):
     return rdd_fuzzy_data(0.0)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def predict_placebo(predict_dummy, data_zero, cutoff, alpha, p, n_rep, fs_specification):
-    return predict_dummy(
-        data_zero, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification
-    )
+    return predict_dummy(data_zero, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def predict_nonplacebo(predict_dummy, data, cutoff, alpha, p, n_rep, fs_specification):
-    return predict_dummy(
-        data, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification
-    )
+    return predict_dummy(data, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_placebo_coef(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["coef"], reference["coef"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_coef(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["coef"], reference["coef"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_placebo_se(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["se"], reference["se"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_se(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["se"], reference["se"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_placebo_ci(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["ci"], reference["ci"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_ci(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["ci"], reference["ci"], rtol=1e-9, atol=1e-4)
diff --git a/doubleml/rdd/tests/test_rdd_fuzzy_left.py b/doubleml/rdd/tests/test_rdd_fuzzy_left.py
index f8ca4a3e2..5f239c527 100644
--- a/doubleml/rdd/tests/test_rdd_fuzzy_left.py
+++ b/doubleml/rdd/tests/test_rdd_fuzzy_left.py
@@ -1,95 +1,87 @@
 """
 Dummy test using fixed learner for left sided fuzzy data
 """
-import pytest
+
 import numpy as np
+import pytest
 
 
-@pytest.fixture(scope='module',
-                params=[-0.2, 0.0, 0.4])
+@pytest.fixture(scope="module", params=[-0.2, 0.0, 0.4])
 def cutoff(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.05, 0.1])
+@pytest.fixture(scope="module", params=[0.05, 0.1])
 def alpha(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 4])
+@pytest.fixture(scope="module", params=[1, 4])
 def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 2])
+@pytest.fixture(scope="module", params=[1, 2])
 def p(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=["cutoff", "cutoff and score", "interacted cutoff and score"])
+@pytest.fixture(scope="module", params=["cutoff", "cutoff and score", "interacted cutoff and score"])
 def fs_specification(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def data(rdd_fuzzy_left_data, cutoff):
     return rdd_fuzzy_left_data(cutoff)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def data_zero(rdd_fuzzy_left_data):
     return rdd_fuzzy_left_data(0.0)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def predict_placebo(predict_dummy, data_zero, cutoff, alpha, p, n_rep, fs_specification):
-    return predict_dummy(
-        data_zero, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification
-    )
+    return predict_dummy(data_zero, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def predict_nonplacebo(predict_dummy, data, cutoff, alpha, p, n_rep, fs_specification):
-    return predict_dummy(
-        data, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification
-    )
+    return predict_dummy(data, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_placebo_coef(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["coef"], reference["coef"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_coef(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["coef"], reference["coef"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_placebo_se(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["se"], reference["se"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_se(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["se"], reference["se"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_placebo_ci(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["ci"], reference["ci"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_ci(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["ci"], reference["ci"], rtol=1e-9, atol=1e-4)
diff --git a/doubleml/rdd/tests/test_rdd_fuzzy_right.py b/doubleml/rdd/tests/test_rdd_fuzzy_right.py
index 627bfde91..7885cf520 100644
--- a/doubleml/rdd/tests/test_rdd_fuzzy_right.py
+++ b/doubleml/rdd/tests/test_rdd_fuzzy_right.py
@@ -1,95 +1,87 @@
 """
 Dummy test using fixed learner for right sided fuzzy data
 """
-import pytest
+
 import numpy as np
+import pytest
 
 
-@pytest.fixture(scope='module',
-                params=[-0.2, 0.0, 0.4])
+@pytest.fixture(scope="module", params=[-0.2, 0.0, 0.4])
 def cutoff(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.05, 0.1])
+@pytest.fixture(scope="module", params=[0.05, 0.1])
 def alpha(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 4])
+@pytest.fixture(scope="module", params=[1, 4])
 def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 2])
+@pytest.fixture(scope="module", params=[1, 2])
 def p(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=["cutoff", "cutoff and score", "interacted cutoff and score"])
+@pytest.fixture(scope="module", params=["cutoff", "cutoff and score", "interacted cutoff and score"])
 def fs_specification(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def data(rdd_fuzzy_right_data, cutoff):
     return rdd_fuzzy_right_data(cutoff)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def data_zero(rdd_fuzzy_right_data):
     return rdd_fuzzy_right_data(0.0)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def predict_placebo(predict_dummy, data_zero, cutoff, alpha, p, n_rep, fs_specification):
-    return predict_dummy(
-        data_zero, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification
-    )
+    return predict_dummy(data_zero, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def predict_nonplacebo(predict_dummy, data, cutoff, alpha, p, n_rep, fs_specification):
-    return predict_dummy(
-        data, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification
-    )
+    return predict_dummy(data, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_placebo_coef(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["coef"], reference["coef"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_coef(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["coef"], reference["coef"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_placebo_se(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["se"], reference["se"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_se(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["se"], reference["se"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_placebo_ci(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["ci"], reference["ci"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_ci(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["ci"], reference["ci"], rtol=1e-9, atol=1e-4)
diff --git a/doubleml/rdd/tests/test_rdd_not_installed.py b/doubleml/rdd/tests/test_rdd_not_installed.py
index f8e21ebc2..b8b49cfd1 100644
--- a/doubleml/rdd/tests/test_rdd_not_installed.py
+++ b/doubleml/rdd/tests/test_rdd_not_installed.py
@@ -1,12 +1,13 @@
-import pytest
 from unittest.mock import patch
 
+import pytest
+
 import doubleml as dml
 
 
 @pytest.mark.ci
 def test_rdrobust_import_error():
-    with patch('doubleml.rdd.rdd.rdrobust', None):
+    with patch("doubleml.rdd.rdd.rdrobust", None):
         msg = r"rdrobust is not installed. Please install it using 'pip install DoubleML\[rdd\]'"
         with pytest.raises(ImportError, match=msg):
             dml.rdd.RDFlex(None, None)
diff --git a/doubleml/rdd/tests/test_rdd_return_types.py b/doubleml/rdd/tests/test_rdd_return_types.py
index b7a98085b..13248afd1 100644
--- a/doubleml/rdd/tests/test_rdd_return_types.py
+++ b/doubleml/rdd/tests/test_rdd_return_types.py
@@ -1,28 +1,27 @@
-import pytest
 import numpy as np
 import pandas as pd
+import pytest
+from sklearn.linear_model import Lasso, LogisticRegression
 
 import doubleml as dml
 from doubleml.rdd import RDFlex
 from doubleml.rdd.datasets import make_simple_rdd_data
 
-from sklearn.linear_model import Lasso, LogisticRegression
-
 np.random.seed(3141)
 
 n_obs = 300
 data = make_simple_rdd_data(n_obs=n_obs)
 df = pd.DataFrame(
-    np.column_stack((data['Y'], data['D'], data['score'], data['X'])),
-    columns=['y', 'd', 'score'] + ['x' + str(i) for i in range(data['X'].shape[1])]
+    np.column_stack((data["Y"], data["D"], data["score"], data["X"])),
+    columns=["y", "d", "score"] + ["x" + str(i) for i in range(data["X"].shape[1])],
 )
-dml_data = dml.DoubleMLData(df, y_col='y', d_cols='d', s_col='score')
+dml_data = dml.DoubleMLData(df, y_col="y", d_cols="d", s_col="score")
 
 
 def _assert_return_types(dml_obj):
     assert isinstance(dml_obj.n_folds, int)
     assert isinstance(dml_obj.n_rep, int)
-    assert (isinstance(dml_obj.cutoff, float) | isinstance(dml_obj.cutoff, int))
+    assert isinstance(dml_obj.cutoff, float) | isinstance(dml_obj.cutoff, int)
     assert isinstance(dml_obj.fuzzy, bool)
     assert isinstance(dml_obj.fs_kernel, str)
     assert isinstance(dml_obj.w, np.ndarray)
@@ -37,7 +36,7 @@ def _assert_return_types_after_fit(dml_obj):
     assert isinstance(dml_obj.__str__(), str)
     assert isinstance(dml_obj.n_folds, int)
     assert isinstance(dml_obj.n_rep, int)
-    assert (isinstance(dml_obj.cutoff, float) | isinstance(dml_obj.cutoff, int))
+    assert isinstance(dml_obj.cutoff, float) | isinstance(dml_obj.cutoff, int)
     assert isinstance(dml_obj.fuzzy, bool)
     assert isinstance(dml_obj.fs_kernel, str)
     assert isinstance(dml_obj.w, np.ndarray)
diff --git a/doubleml/rdd/tests/test_rdd_sharp.py b/doubleml/rdd/tests/test_rdd_sharp.py
index b2b95968d..d4e699e30 100644
--- a/doubleml/rdd/tests/test_rdd_sharp.py
+++ b/doubleml/rdd/tests/test_rdd_sharp.py
@@ -1,95 +1,87 @@
 """
 Dummy test using fixed learner for sharp data
 """
-import pytest
+
 import numpy as np
+import pytest
 
 
-@pytest.fixture(scope='module',
-                params=[-0.2, 0.0, 0.4])
+@pytest.fixture(scope="module", params=[-0.2, 0.0, 0.4])
 def cutoff(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.05, 0.1])
+@pytest.fixture(scope="module", params=[0.05, 0.1])
 def alpha(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 4])
+@pytest.fixture(scope="module", params=[1, 4])
 def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 2])
+@pytest.fixture(scope="module", params=[1, 2])
 def p(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=["cutoff", "cutoff and score", "interacted cutoff and score"])
+@pytest.fixture(scope="module", params=["cutoff", "cutoff and score", "interacted cutoff and score"])
 def fs_specification(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def data(rdd_sharp_data, cutoff):
     return rdd_sharp_data(cutoff)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def data_zero(rdd_sharp_data):
     return rdd_sharp_data(0.0)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def predict_placebo(predict_dummy, data_zero, cutoff, alpha, p, n_rep, fs_specification):
-    return predict_dummy(
-        data_zero, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification
-    )
+    return predict_dummy(data_zero, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification)
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def predict_nonplacebo(predict_dummy, data, cutoff, alpha, p, n_rep, fs_specification):
-    return predict_dummy(
-        data, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification
-    )
+    return predict_dummy(data, cutoff=cutoff, alpha=alpha, n_rep=n_rep, p=p, fs_specification=fs_specification)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_placebo_coef(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["coef"], reference["coef"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_coef(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['coef'], reference['coef'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["coef"], reference["coef"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_placebo_se(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["se"], reference["se"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_se(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['se'], reference['se'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["se"], reference["se"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_placebo_ci(predict_placebo):
     reference, actual = predict_placebo
-    assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["ci"], reference["ci"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci_rdd
 def test_rdd_nonplacebo_ci(predict_nonplacebo):
     reference, actual = predict_nonplacebo
-    assert np.allclose(actual['ci'], reference['ci'], rtol=1e-9, atol=1e-4)
+    assert np.allclose(actual["ci"], reference["ci"], rtol=1e-9, atol=1e-4)
diff --git a/doubleml/tests/_utils.py b/doubleml/tests/_utils.py
index 18ceef883..eeeaab3d6 100644
--- a/doubleml/tests/_utils.py
+++ b/doubleml/tests/_utils.py
@@ -1,16 +1,15 @@
 import numpy as np
-from sklearn.model_selection import KFold, GridSearchCV, StratifiedKFold
-from sklearn.base import clone
 import pandas as pd
 from scipy.stats import norm
+from sklearn.base import clone
+from sklearn.model_selection import GridSearchCV, KFold, StratifiedKFold
 
-from ..utils._estimation import _var_est, _aggregate_coefs_and_ses
 from ..double_ml_data import DoubleMLBaseData
+from ..utils._estimation import _aggregate_coefs_and_ses, _var_est
 
 
 class DummyDataClass(DoubleMLBaseData):
-    def __init__(self,
-                 data):
+    def __init__(self, data):
         DoubleMLBaseData.__init__(self, data)
 
     @property
@@ -22,11 +21,9 @@ def draw_smpls(n_obs, n_folds, n_rep=1, groups=None):
     all_smpls = []
     for _ in range(n_rep):
         if groups is None:
-            resampling = KFold(n_splits=n_folds,
-                               shuffle=True)
+            resampling = KFold(n_splits=n_folds, shuffle=True)
         else:
-            resampling = StratifiedKFold(n_splits=n_folds,
-                                         shuffle=True)
+            resampling = StratifiedKFold(n_splits=n_folds, shuffle=True)
         smpls = [(train, test) for train, test in resampling.split(X=np.zeros(n_obs), y=groups)]
         all_smpls.append(smpls)
     return all_smpls
@@ -69,8 +66,7 @@ def tune_grid_search(y, x, ml_model, smpls, param_grid, n_folds_tune, train_cond
     tune_res = [None] * len(smpls)
     for idx, (train_index, _) in enumerate(smpls):
         g_tune_resampling = KFold(n_splits=n_folds_tune, shuffle=True)
-        g_grid_search = GridSearchCV(ml_model, param_grid,
-                                     cv=g_tune_resampling)
+        g_grid_search = GridSearchCV(ml_model, param_grid, cv=g_tune_resampling)
         if train_cond is None:
             tune_res[idx] = g_grid_search.fit(x[train_index, :], y[train_index])
         else:
@@ -93,17 +89,12 @@ def generate_dml_dict(psi_a, psi_b):
     n_thetas = psi_a.shape[1]
     n_rep = psi_a.shape[2]
 
-    all_thetas = -1.0*np.mean(psi_b, axis=0)
+    all_thetas = -1.0 * np.mean(psi_b, axis=0)
     all_ses = np.zeros(shape=(n_thetas, n_rep))
     for i_rep in range(n_rep):
         for i_theta in range(n_thetas):
-            psi = psi_a[:, i_theta, i_rep]*all_thetas[i_theta, i_rep] + psi_b[:, i_theta, i_rep]
-            var_estimate, _ = _var_est(
-                psi=psi,
-                psi_deriv=psi_a[:, i_theta, i_rep],
-                smpls=None,
-                is_cluster_data=False
-            )
+            psi = psi_a[:, i_theta, i_rep] * all_thetas[i_theta, i_rep] + psi_b[:, i_theta, i_rep]
+            var_estimate, _ = _var_est(psi=psi, psi_deriv=psi_a[:, i_theta, i_rep], smpls=None, is_cluster_data=False)
             all_ses[i_theta, i_rep] = np.sqrt(var_estimate)
 
     var_scaling_factors = np.full(n_thetas, n_obs)
@@ -115,20 +106,20 @@ def generate_dml_dict(psi_a, psi_b):
     scaled_psi = psi_b / np.mean(psi_a, axis=0)
 
     doubleml_dict = {
-        'thetas': thetas,
-        'ses': ses,
-        'all_thetas': all_thetas,
-        'all_ses': all_ses,
-        'var_scaling_factors': var_scaling_factors,
-        'scaled_psi': scaled_psi,
+        "thetas": thetas,
+        "ses": ses,
+        "all_thetas": all_thetas,
+        "all_ses": all_ses,
+        "var_scaling_factors": var_scaling_factors,
+        "scaled_psi": scaled_psi,
     }
 
     return doubleml_dict
 
 
 def confint_manual(coef, se, index_names, boot_t_stat=None, joint=True, level=0.95):
-    a = (1 - level)
-    ab = np.array([a / 2, 1. - a / 2])
+    a = 1 - level
+    ab = np.array([a / 2, 1.0 - a / 2])
     if joint:
         assert boot_t_stat.shape[2] == 1
         sim = np.amax(np.abs(boot_t_stat[:, :, 0]), 1)
@@ -138,7 +129,5 @@ def confint_manual(coef, se, index_names, boot_t_stat=None, joint=True, level=0.
         fac = norm.ppf(ab)
         ci = np.vstack((coef + se * fac[0], coef + se * fac[1])).T
 
-    df_ci = pd.DataFrame(ci,
-                         columns=['{:.1f} %'.format(i * 100) for i in ab],
-                         index=index_names)
+    df_ci = pd.DataFrame(ci, columns=["{:.1f} %".format(i * 100) for i in ab], index=index_names)
     return df_ci
diff --git a/doubleml/tests/_utils_boot.py b/doubleml/tests/_utils_boot.py
index 191259f39..2d6113d3f 100644
--- a/doubleml/tests/_utils_boot.py
+++ b/doubleml/tests/_utils_boot.py
@@ -2,12 +2,12 @@
 
 
 def draw_weights(method, n_rep_boot, n_obs):
-    if method == 'Bayes':
-        weights = np.random.exponential(scale=1.0, size=(n_rep_boot, n_obs)) - 1.
-    elif method == 'normal':
+    if method == "Bayes":
+        weights = np.random.exponential(scale=1.0, size=(n_rep_boot, n_obs)) - 1.0
+    elif method == "normal":
         weights = np.random.normal(loc=0.0, scale=1.0, size=(n_rep_boot, n_obs))
     else:
-        assert method == 'wild'
+        assert method == "wild"
         xx = np.random.normal(loc=0.0, scale=1.0, size=(n_rep_boot, n_obs))
         yy = np.random.normal(loc=0.0, scale=1.0, size=(n_rep_boot, n_obs))
         weights = xx / np.sqrt(2) + (np.power(yy, 2) - 1) / 2
@@ -20,11 +20,9 @@ def boot_manual(psi, J, smpls, se, weights, n_rep, apply_cross_fitting=True):
     for i_rep in range(n_rep):
         this_weights = weights[i_rep, :]
         if apply_cross_fitting:
-            boot_t_stat[i_rep] = np.mean(np.multiply(np.divide(this_weights, se),
-                                                     psi / J))
+            boot_t_stat[i_rep] = np.mean(np.multiply(np.divide(this_weights, se), psi / J))
         else:
             test_index = smpls[0][1]
-            boot_t_stat[i_rep] = np.mean(np.multiply(np.divide(this_weights, se),
-                                                     psi[test_index] / J))
+            boot_t_stat[i_rep] = np.mean(np.multiply(np.divide(this_weights, se), psi[test_index] / J))
 
     return boot_t_stat
diff --git a/doubleml/tests/_utils_cluster.py b/doubleml/tests/_utils_cluster.py
index 090a165cc..425796cd4 100644
--- a/doubleml/tests/_utils_cluster.py
+++ b/doubleml/tests/_utils_cluster.py
@@ -1,17 +1,15 @@
-import pandas as pd
-import numpy as np
+import itertools
 
+import numpy as np
+import pandas as pd
 from sklearn.model_selection import KFold
-import itertools
 
 
 class DoubleMLMultiwayResampling:
-    def __init__(self,
-                 n_folds,
-                 smpl_sizes):
+    def __init__(self, n_folds, smpl_sizes):
         self.n_folds = n_folds
         self.smpl_sizes = smpl_sizes
-        assert len(smpl_sizes), 'For DoubleMLMultiwayResampling mmultiple sample sizes need to be provided'
+        assert len(smpl_sizes), "For DoubleMLMultiwayResampling mmultiple sample sizes need to be provided"
         self.n_ways = len(smpl_sizes)
         self.resampling = KFold(n_splits=n_folds, shuffle=True)
 
@@ -27,28 +25,28 @@ def split_samples(self):
             smpls.append([(train, test) for train, test in self.resampling.split(np.zeros(self.smpl_sizes[i_way]))])
 
         smpls_multi_ind = []
-        xx = n_ways*[range(self.n_folds)]
+        xx = n_ways * [range(self.n_folds)]
         for ind_index_set in itertools.product(*xx):
             smpls_train_list = [smpls[i][ind_index_set[i]][0] for i in range(n_ways)]
             smpls_test_list = [smpls[i][ind_index_set[i]][1] for i in range(n_ways)]
 
-            smpls_multi_ind.append((pd.MultiIndex.from_product(smpls_train_list).values,
-                                    pd.MultiIndex.from_product(smpls_test_list).values))
+            smpls_multi_ind.append(
+                (pd.MultiIndex.from_product(smpls_train_list).values, pd.MultiIndex.from_product(smpls_test_list).values)
+            )
 
-        smpls_lin_ind = [(multi_to_lin_ind.loc[x[0]].values,
-                          multi_to_lin_ind.loc[x[1]].values) for x in smpls_multi_ind]
+        smpls_lin_ind = [(multi_to_lin_ind.loc[x[0]].values, multi_to_lin_ind.loc[x[1]].values) for x in smpls_multi_ind]
 
         return smpls_multi_ind, smpls_lin_ind
 
 
 def est_one_way_cluster_dml2(psi_a, psi_b, cluster_var, smpls_one_split):
-    psi_a_subsample = 0.
-    psi_b_subsample = 0.
-    for (_, test_index) in smpls_one_split:
+    psi_a_subsample = 0.0
+    psi_b_subsample = 0.0
+    for _, test_index in smpls_one_split:
         I_k = np.unique(cluster_var[test_index])
-        const = 1/len(I_k)
-        psi_a_subsample += const*np.sum(psi_a[test_index])
-        psi_b_subsample += const*np.sum(psi_b[test_index])
+        const = 1 / len(I_k)
+        psi_a_subsample += const * np.sum(psi_a[test_index])
+        psi_b_subsample += const * np.sum(psi_b[test_index])
     theta = -psi_b_subsample / psi_a_subsample
     return theta
 
@@ -56,31 +54,31 @@ def est_one_way_cluster_dml2(psi_a, psi_b, cluster_var, smpls_one_split):
 def var_one_way_cluster(psi, psi_a, cluster_var, smpls_one_split):
     gamma_hat = 0
     j_hat = 0
-    for (_, test_index) in smpls_one_split:
+    for _, test_index in smpls_one_split:
         I_k = np.unique(cluster_var[test_index])
-        const = 1/len(I_k)
+        const = 1 / len(I_k)
         for i in I_k:
             ind = cluster_var == i
             for val_i in psi[ind]:
                 for val_j in psi[ind]:
                     gamma_hat += const * val_i * val_j
-        j_hat += np.sum(psi_a[test_index])/len(I_k)
+        j_hat += np.sum(psi_a[test_index]) / len(I_k)
     n_folds = len(smpls_one_split)
-    gamma_hat = gamma_hat/n_folds
-    j_hat = j_hat/n_folds
-    var = gamma_hat / (j_hat ** 2) / len(np.unique(cluster_var))
+    gamma_hat = gamma_hat / n_folds
+    j_hat = j_hat / n_folds
+    var = gamma_hat / (j_hat**2) / len(np.unique(cluster_var))
     return var
 
 
 def est_two_way_cluster_dml2(psi_a, psi_b, cluster_var1, cluster_var2, smpls_one_split):
-    psi_a_subsample = 0.
-    psi_b_subsample = 0.
-    for (_, test_index) in smpls_one_split:
+    psi_a_subsample = 0.0
+    psi_b_subsample = 0.0
+    for _, test_index in smpls_one_split:
         I_k = np.unique(cluster_var1[test_index])
         J_l = np.unique(cluster_var2[test_index])
-        const = 1/(len(I_k) * len(J_l))
-        psi_a_subsample += const*np.sum(psi_a[test_index])
-        psi_b_subsample += const*np.sum(psi_b[test_index])
+        const = 1 / (len(I_k) * len(J_l))
+        psi_a_subsample += const * np.sum(psi_a[test_index])
+        psi_b_subsample += const * np.sum(psi_b[test_index])
     theta = -psi_b_subsample / psi_a_subsample
     return theta
 
@@ -88,10 +86,10 @@ def est_two_way_cluster_dml2(psi_a, psi_b, cluster_var1, cluster_var2, smpls_one
 def var_two_way_cluster(psi, psi_a, cluster_var1, cluster_var2, smpls_one_split):
     gamma_hat = 0
     j_hat = 0
-    for (_, test_index) in smpls_one_split:
+    for _, test_index in smpls_one_split:
         I_k = np.unique(cluster_var1[test_index])
         J_l = np.unique(cluster_var2[test_index])
-        const = min(len(I_k), len(J_l))/(len(I_k)*len(J_l))**2
+        const = min(len(I_k), len(J_l)) / (len(I_k) * len(J_l)) ** 2
         for i in I_k:
             for j in J_l:
                 for j_ in J_l:
@@ -104,12 +102,12 @@ def var_two_way_cluster(psi, psi_a, cluster_var1, cluster_var2, smpls_one_split)
                     ind1 = (cluster_var1 == i) & (cluster_var2 == j)
                     ind2 = (cluster_var1 == i_) & (cluster_var2 == j)
                     gamma_hat += const * psi[ind1] * psi[ind2]
-        j_hat += np.sum(psi_a[test_index])/(len(I_k)*len(J_l))
+        j_hat += np.sum(psi_a[test_index]) / (len(I_k) * len(J_l))
     n_folds = len(smpls_one_split)
-    gamma_hat = gamma_hat/n_folds
-    j_hat = j_hat/n_folds
+    gamma_hat = gamma_hat / n_folds
+    j_hat = j_hat / n_folds
     n_clusters1 = len(np.unique(cluster_var1))
     n_clusters2 = len(np.unique(cluster_var2))
     var_scaling_factor = min(n_clusters1, n_clusters2)
-    var = gamma_hat / (j_hat ** 2) / var_scaling_factor
+    var = gamma_hat / (j_hat**2) / var_scaling_factor
     return var
diff --git a/doubleml/tests/_utils_dml_cv_predict.py b/doubleml/tests/_utils_dml_cv_predict.py
index 7691d28b3..8b65854fa 100644
--- a/doubleml/tests/_utils_dml_cv_predict.py
+++ b/doubleml/tests/_utils_dml_cv_predict.py
@@ -1,16 +1,13 @@
 import numpy as np
-
 import scipy.sparse as sp
 from joblib import Parallel, delayed
-
 from sklearn.base import clone
-from sklearn.utils.validation import _num_samples
+from sklearn.model_selection._validation import _check_is_permutation, _fit_and_predict
 from sklearn.preprocessing import LabelEncoder
-from sklearn.model_selection._validation import _fit_and_predict, _check_is_permutation
+from sklearn.utils.validation import _num_samples
 
 
-def _dml_cv_predict_ut_version(estimator, x, y, smpls=None,
-                               n_jobs=None, est_params=None, method='predict'):
+def _dml_cv_predict_ut_version(estimator, x, y, smpls=None, n_jobs=None, est_params=None, method="predict"):
     # this is an adapted version of the sklearn function cross_val_predict which allows to set fold-specific parameters
     # original https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/model_selection/_validation.py
 
@@ -22,23 +19,19 @@ def _dml_cv_predict_ut_version(estimator, x, y, smpls=None,
         train_index, test_index = smpls[0]
         # set some defaults aligned with cross_val_predict
         fit_params = None
-        if method == 'predict_proba':
+        if method == "predict_proba":
             predictions = np.full((len(y), 2), np.nan)
         else:
             predictions = np.full(len(y), np.nan)
         if est_params is None:
-            xx = _fit_and_predict(
-                clone(estimator),
-                x, y, train_index, test_index, fit_params, method)
+            xx = _fit_and_predict(clone(estimator), x, y, train_index, test_index, fit_params, method)
         else:
             assert isinstance(est_params, dict)
-            xx = _fit_and_predict(
-                clone(estimator).set_params(**est_params),
-                x, y, train_index, test_index, fit_params, method)
+            xx = _fit_and_predict(clone(estimator).set_params(**est_params), x, y, train_index, test_index, fit_params, method)
 
         # implementation is (also at other parts) restricted to a sorted set of test_indices, but this could be fixed
         # inv_test_indices = np.argsort(test_indices)
-        assert np.all(np.diff(test_indices) > 0), 'test_indices not sorted'
+        assert np.all(np.diff(test_indices) > 0), "test_indices not sorted"
         if isinstance(xx, np.ndarray):
             # this is sklearn >= 0.24
             predictions[test_indices] = xx
@@ -49,36 +42,39 @@ def _dml_cv_predict_ut_version(estimator, x, y, smpls=None,
     # set some defaults aligned with cross_val_predict
     fit_params = None
     verbose = 0
-    pre_dispatch = '2*n_jobs'
+    pre_dispatch = "2*n_jobs"
 
-    encode = (method == 'predict_proba')
+    encode = method == "predict_proba"
 
     if encode:
         y = np.asarray(y)
         le = LabelEncoder()
         y = le.fit_transform(y)
 
-    parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
-                        pre_dispatch=pre_dispatch)
+    parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)
     # FixMe: Find a better way to handle the different combinations of paramters and smpls_is_partition
     if est_params is None:
-        prediction_blocks = parallel(delayed(_fit_and_predict)(
-            estimator,
-            x, y, train_index, test_index, fit_params, method)
-                                     for idx, (train_index, test_index) in enumerate(smpls))
+        prediction_blocks = parallel(
+            delayed(_fit_and_predict)(estimator, x, y, train_index, test_index, fit_params, method)
+            for idx, (train_index, test_index) in enumerate(smpls)
+        )
     elif isinstance(est_params, dict):
         # if no fold-specific parameters we redirect to the standard method
         # warnings.warn("Using the same (hyper-)parameters for all folds")
-        prediction_blocks = parallel(delayed(_fit_and_predict)(
-            clone(estimator).set_params(**est_params),
-            x, y, train_index, test_index, fit_params, method)
-                                     for idx, (train_index, test_index) in enumerate(smpls))
+        prediction_blocks = parallel(
+            delayed(_fit_and_predict)(
+                clone(estimator).set_params(**est_params), x, y, train_index, test_index, fit_params, method
+            )
+            for idx, (train_index, test_index) in enumerate(smpls)
+        )
     else:
-        assert len(est_params) == len(smpls), 'provide one parameter setting per fold'
-        prediction_blocks = parallel(delayed(_fit_and_predict)(
-            clone(estimator).set_params(**est_params[idx]),
-            x, y, train_index, test_index, fit_params, method)
-            for idx, (train_index, test_index) in enumerate(smpls))
+        assert len(est_params) == len(smpls), "provide one parameter setting per fold"
+        prediction_blocks = parallel(
+            delayed(_fit_and_predict)(
+                clone(estimator).set_params(**est_params[idx]), x, y, train_index, test_index, fit_params, method
+            )
+            for idx, (train_index, test_index) in enumerate(smpls)
+        )
 
     # Concatenate the predictions
     if isinstance(prediction_blocks[0], np.ndarray):
@@ -88,7 +84,7 @@ def _dml_cv_predict_ut_version(estimator, x, y, smpls=None,
         predictions = [pred_block_i for pred_block_i, _ in prediction_blocks]
 
     if not _check_is_permutation(test_indices, _num_samples(x)):
-        raise ValueError('_dml_cross_val_predict only works for partitions')
+        raise ValueError("_dml_cross_val_predict only works for partitions")
 
     inv_test_indices = np.empty(len(test_indices), dtype=int)
     inv_test_indices[test_indices] = np.arange(len(test_indices))
diff --git a/doubleml/tests/_utils_doubleml_sensitivity_manual.py b/doubleml/tests/_utils_doubleml_sensitivity_manual.py
index c20e2e4bc..e8969a5e2 100644
--- a/doubleml/tests/_utils_doubleml_sensitivity_manual.py
+++ b/doubleml/tests/_utils_doubleml_sensitivity_manual.py
@@ -1,21 +1,21 @@
+import copy
+
 import numpy as np
 import pandas as pd
 from scipy.stats import norm
-import copy
 
 from ..utils._estimation import _aggregate_coefs_and_ses
 
 
 def doubleml_sensitivity_manual(sensitivity_elements, all_coefs, psi, psi_deriv, cf_y, cf_d, rho, level):
-
     # specify the parameters
-    sigma2 = sensitivity_elements['sigma2']
-    nu2 = sensitivity_elements['nu2']
-    psi_sigma = sensitivity_elements['psi_sigma2']
-    psi_nu = sensitivity_elements['psi_nu2']
+    sigma2 = sensitivity_elements["sigma2"]
+    nu2 = sensitivity_elements["nu2"]
+    psi_sigma = sensitivity_elements["psi_sigma2"]
+    psi_nu = sensitivity_elements["psi_nu2"]
     psi_scaled = np.divide(psi, np.mean(psi_deriv, axis=0))
 
-    confounding_strength = np.multiply(np.abs(rho), np.sqrt(np.multiply(cf_y, np.divide(cf_d, 1.0-cf_d))))
+    confounding_strength = np.multiply(np.abs(rho), np.sqrt(np.multiply(cf_y, np.divide(cf_d, 1.0 - cf_d))))
     S = np.sqrt(np.multiply(sigma2, nu2))
 
     all_theta_lower = all_coefs - np.multiply(np.transpose(np.squeeze(S, axis=0)), confounding_strength)
@@ -42,18 +42,13 @@ def doubleml_sensitivity_manual(sensitivity_elements, all_coefs, psi, psi_deriv,
     ci_lower = np.median(all_ci_lower, axis=1)
     ci_upper = np.median(all_ci_upper, axis=1)
 
-    theta_dict = {'lower': theta_lower,
-                  'upper': theta_upper}
+    theta_dict = {"lower": theta_lower, "upper": theta_upper}
 
-    se_dict = {'lower': sigma_lower,
-               'upper': sigma_upper}
+    se_dict = {"lower": sigma_lower, "upper": sigma_upper}
 
-    ci_dict = {'lower': ci_lower,
-               'upper': ci_upper}
+    ci_dict = {"lower": ci_lower, "upper": ci_upper}
 
-    res_dict = {'theta': theta_dict,
-                'se': se_dict,
-                'ci': ci_dict}
+    res_dict = {"theta": theta_dict, "se": se_dict, "ci": ci_dict}
 
     return res_dict
 
@@ -67,10 +62,10 @@ def doubleml_sensitivity_benchmark_manual(dml_obj, benchmarking_set):
     dml_short.fit()
 
     var_y = np.var(dml_obj._dml_data.y)
-    var_y_long = np.squeeze(dml_obj.sensitivity_elements['sigma2'], axis=0)
-    nu2_long = np.squeeze(dml_obj.sensitivity_elements['nu2'], axis=0)
-    var_y_short = np.squeeze(dml_short.sensitivity_elements['sigma2'], axis=0)
-    nu2_short = np.squeeze(dml_short.sensitivity_elements['nu2'], axis=0)
+    var_y_long = np.squeeze(dml_obj.sensitivity_elements["sigma2"], axis=0)
+    nu2_long = np.squeeze(dml_obj.sensitivity_elements["nu2"], axis=0)
+    var_y_short = np.squeeze(dml_short.sensitivity_elements["sigma2"], axis=0)
+    nu2_short = np.squeeze(dml_short.sensitivity_elements["nu2"], axis=0)
 
     R2_y_long = 1.0 - var_y_long / var_y
     R2_y_short = 1.0 - var_y_short / var_y
@@ -88,15 +83,15 @@ def doubleml_sensitivity_benchmark_manual(dml_obj, benchmarking_set):
     var_g = var_y_short - var_y_long
     var_riesz = nu2_long - nu2_short
     denom = np.sqrt(np.multiply(var_g, var_riesz), out=np.zeros_like(var_g), where=(var_g > 0) & (var_riesz > 0))
-    all_rho_benchmark = np.sign(all_delta_theta) * \
-        np.clip(np.divide(np.absolute(all_delta_theta), denom, out=np.ones_like(all_delta_theta), where=denom != 0),
-                0, 1)
+    all_rho_benchmark = np.sign(all_delta_theta) * np.clip(
+        np.divide(np.absolute(all_delta_theta), denom, out=np.ones_like(all_delta_theta), where=denom != 0), 0, 1
+    )
     rho_benchmark = np.median(all_rho_benchmark, axis=0)
 
     benchmark_dict = {
-        'cf_y': cf_y_benchmark,
-        'cf_d': cf_d_benchmark,
-        'rho': rho_benchmark,
-        'delta_theta': delta_theta,
+        "cf_y": cf_y_benchmark,
+        "cf_d": cf_d_benchmark,
+        "rho": rho_benchmark,
+        "delta_theta": delta_theta,
     }
     return pd.DataFrame(benchmark_dict, index=dml_obj._dml_data.d_cols)
diff --git a/doubleml/tests/conftest.py b/doubleml/tests/conftest.py
index 328a1211f..248697b8f 100644
--- a/doubleml/tests/conftest.py
+++ b/doubleml/tests/conftest.py
@@ -1,22 +1,17 @@
 import numpy as np
 import pandas as pd
-
 import pytest
-
-from sklearn.datasets import make_spd_matrix
-from sklearn.datasets import make_regression, make_classification
-
-from doubleml.datasets import make_plr_turrell2018, make_irm_data, \
-    make_pliv_CHS2015
+from sklearn.datasets import make_classification, make_regression, make_spd_matrix
 
 from doubleml import DoubleMLData
+from doubleml.datasets import make_irm_data, make_pliv_CHS2015, make_plr_turrell2018
 
 
 def _g(x):
     return np.power(np.sin(x), 2)
 
 
-def _m(x, nu=0., gamma=1.):
+def _m(x, nu=0.0, gamma=1.0):
     return 0.5 / np.pi * (np.sinh(gamma)) / (np.cosh(gamma) - np.cos(x - nu))
 
 
@@ -24,8 +19,7 @@ def _m2(x):
     return np.power(x, 2)
 
 
-@pytest.fixture(scope='session',
-                params=[(500, 5)])
+@pytest.fixture(scope="session", params=[(500, 5)])
 def generate_data_simple(request):
     n_p = request.param
     np.random.seed(1111)
@@ -39,18 +33,14 @@ def generate_data_simple(request):
     D2 = 1.0 * (np.random.uniform(size=n) > 0.5)
     X = np.random.normal(size=(n, p))
     Y = theta * D1 + np.dot(X, np.ones(p)) + np.random.normal(size=n)
-    df = pd.DataFrame(np.column_stack((X, Y, D1, D2)),
-                      columns=[f'X{i + 1}' for i in np.arange(p)] + ['Y', 'D1', 'D2'])
-    data_d1 = DoubleMLData(df, 'Y', 'D1')
-    data_d2 = DoubleMLData(df, 'Y', 'D2')
+    df = pd.DataFrame(np.column_stack((X, Y, D1, D2)), columns=[f"X{i + 1}" for i in np.arange(p)] + ["Y", "D1", "D2"])
+    data_d1 = DoubleMLData(df, "Y", "D1")
+    data_d2 = DoubleMLData(df, "Y", "D2")
 
     return data_d1, data_d2
 
 
-@pytest.fixture(scope='session',
-                params=[(500, 10),
-                        (1000, 20),
-                        (1000, 100)])
+@pytest.fixture(scope="session", params=[(500, 10), (1000, 20), (1000, 100)])
 def generate_data1(request):
     n_p = request.param
     np.random.seed(1111)
@@ -65,9 +55,7 @@ def generate_data1(request):
     return data
 
 
-@pytest.fixture(scope='session',
-                params=[(500, 10),
-                        (1000, 20)])
+@pytest.fixture(scope="session", params=[(500, 10), (1000, 20)])
 def generate_data_irm_w_missings(request):
     n_p = request.param
     np.random.seed(1111)
@@ -77,19 +65,17 @@ def generate_data_irm_w_missings(request):
     theta = 0.5
 
     # generating data
-    (x, y, d) = make_irm_data(n, p, theta, return_type='array')
+    (x, y, d) = make_irm_data(n, p, theta, return_type="array")
 
     # randomly set some entries to np.nan
-    ind = np.random.choice(np.arange(x.size), replace=False,
-                           size=int(x.size * 0.05))
+    ind = np.random.choice(np.arange(x.size), replace=False, size=int(x.size * 0.05))
     x[np.unravel_index(ind, x.shape)] = np.nan
     data = (x, y, d)
 
     return data
 
 
-@pytest.fixture(scope='session',
-                params=[(1000, 20)])
+@pytest.fixture(scope="session", params=[(1000, 20)])
 def generate_data_iv(request):
     n_p = request.param
     np.random.seed(1111)
@@ -104,9 +90,7 @@ def generate_data_iv(request):
     return data
 
 
-@pytest.fixture(scope='session',
-                params=[(253, 10, False), (501, 52, False),
-                        (253, 10, True), (501, 52, True)])
+@pytest.fixture(scope="session", params=[(253, 10, False), (501, 52, False), (253, 10, True), (501, 52, True)])
 def generate_data_cv_predict(request):
     np.random.seed(3141)
     # setting parameters
@@ -125,8 +109,7 @@ def generate_data_cv_predict(request):
     return data
 
 
-@pytest.fixture(scope='session',
-                params=[(1000, 20)])
+@pytest.fixture(scope="session", params=[(1000, 20)])
 def generate_data_bivariate(request):
     n_p = request.param
     np.random.seed(1111)
@@ -138,17 +121,38 @@ def generate_data_bivariate(request):
     sigma = make_spd_matrix(p)
 
     # generating data
-    x = np.random.multivariate_normal(np.zeros(p), sigma, size=[n, ])
+    x = np.random.multivariate_normal(
+        np.zeros(p),
+        sigma,
+        size=[
+            n,
+        ],
+    )
     G = _g(np.dot(x, b))
     M0 = _m(np.dot(x, b))
     M1 = _m2(np.dot(x, b))
-    D0 = M0 + np.random.standard_normal(size=[n, ])
-    D1 = M1 + np.random.standard_normal(size=[n, ])
-    y = theta[0] * D0 + theta[1] * D1 + G + np.random.standard_normal(size=[n, ])
+    D0 = M0 + np.random.standard_normal(
+        size=[
+            n,
+        ]
+    )
+    D1 = M1 + np.random.standard_normal(
+        size=[
+            n,
+        ]
+    )
+    y = (
+        theta[0] * D0
+        + theta[1] * D1
+        + G
+        + np.random.standard_normal(
+            size=[
+                n,
+            ]
+        )
+    )
     d = np.column_stack((D0, D1))
-    column_names = [f'X{i + 1}' for i in np.arange(p)] + ['y'] + \
-                   [f'd{i + 1}' for i in np.arange(2)]
-    data = pd.DataFrame(np.column_stack((x, y, d)),
-                        columns=column_names)
+    column_names = [f"X{i + 1}" for i in np.arange(p)] + ["y"] + [f"d{i + 1}" for i in np.arange(2)]
+    data = pd.DataFrame(np.column_stack((x, y, d)), columns=column_names)
 
     return data
diff --git a/doubleml/tests/test_cv_predict.py b/doubleml/tests/test_cv_predict.py
index bafe1713f..9887c88cf 100644
--- a/doubleml/tests/test_cv_predict.py
+++ b/doubleml/tests/test_cv_predict.py
@@ -1,40 +1,36 @@
 import numpy as np
 import pytest
-
+from sklearn.linear_model import Lasso, LogisticRegression
 from sklearn.model_selection import KFold, train_test_split
 
-from sklearn.linear_model import Lasso, LogisticRegression
+from doubleml.utils._estimation import _dml_cv_predict
 
 from ._utils_dml_cv_predict import _dml_cv_predict_ut_version
-from doubleml.utils._estimation import _dml_cv_predict
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def cross_fit(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[None, 'global', 'per_fold'])
+@pytest.fixture(scope="module", params=[None, "global", "per_fold"])
 def params(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def cv_predict_fixture(generate_data_cv_predict, cross_fit, params):
     n_folds = 4
     # collect data
     (x, y, classifier) = generate_data_cv_predict
 
     if classifier:
-        method = 'predict_proba'
+        method = "predict_proba"
     else:
-        method = 'predict'
+        method = "predict"
 
     if cross_fit:
-        smpls = [(train, test) for train, test in KFold(n_splits=n_folds,
-                                                        shuffle=True).split(x)]
+        smpls = [(train, test) for train, test in KFold(n_splits=n_folds, shuffle=True).split(x)]
     else:
         n_obs = len(y)
         smpls = train_test_split(np.arange(n_obs), test_size=0.23)
@@ -42,44 +38,41 @@ def cv_predict_fixture(generate_data_cv_predict, cross_fit, params):
 
     if params is None:
         est_params = None
-    elif params == 'global':
-        if method == 'predict_proba':
-            est_params = {'C': 0.5}
+    elif params == "global":
+        if method == "predict_proba":
+            est_params = {"C": 0.5}
         else:
-            est_params = {'alpha': 0.5}
+            est_params = {"alpha": 0.5}
     else:
-        assert params == 'per_fold'
-        if method == 'predict_proba':
+        assert params == "per_fold"
+        if method == "predict_proba":
             if cross_fit:
-                est_params = [{'C': np.random.uniform()} for i in range(n_folds)]
+                est_params = [{"C": np.random.uniform()} for i in range(n_folds)]
             else:
-                est_params = {'C': 1.}
+                est_params = {"C": 1.0}
         else:
             if cross_fit:
-                est_params = [{'alpha': np.random.uniform()} for i in range(n_folds)]
+                est_params = [{"alpha": np.random.uniform()} for i in range(n_folds)]
             else:
-                est_params = {'alpha': 1.}
+                est_params = {"alpha": 1.0}
 
-    if method == 'predict_proba':
-        preds = _dml_cv_predict(LogisticRegression(), x, y, smpls,
-                                est_params=est_params, method=method)
-        preds_ut = _dml_cv_predict_ut_version(LogisticRegression(), x, y, smpls,
-                                              est_params=est_params, method=method)[:, 1]
+    if method == "predict_proba":
+        preds = _dml_cv_predict(LogisticRegression(), x, y, smpls, est_params=est_params, method=method)
+        preds_ut = _dml_cv_predict_ut_version(LogisticRegression(), x, y, smpls, est_params=est_params, method=method)[:, 1]
     else:
         preds = _dml_cv_predict(Lasso(), x, y, smpls, est_params=est_params, method=method)
         preds_ut = _dml_cv_predict_ut_version(Lasso(), x, y, smpls, est_params=est_params, method=method)
 
-    res_dict = {'preds': preds['preds'],
-                'preds_ut': preds_ut}
+    res_dict = {"preds": preds["preds"], "preds_ut": preds_ut}
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_cv_predict(cv_predict_fixture):
-    ind_nan_preds = np.isnan(cv_predict_fixture['preds'])
-    ind_nan_preds_ut = np.isnan(cv_predict_fixture['preds_ut'])
+    ind_nan_preds = np.isnan(cv_predict_fixture["preds"])
+    ind_nan_preds_ut = np.isnan(cv_predict_fixture["preds_ut"])
     assert np.array_equal(ind_nan_preds, ind_nan_preds_ut)
-    assert np.allclose(cv_predict_fixture['preds'][~ind_nan_preds],
-                       cv_predict_fixture['preds_ut'][~ind_nan_preds],
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(
+        cv_predict_fixture["preds"][~ind_nan_preds], cv_predict_fixture["preds_ut"][~ind_nan_preds], rtol=1e-9, atol=1e-4
+    )
diff --git a/doubleml/tests/test_datasets.py b/doubleml/tests/test_datasets.py
index 80f71d0e6..050e4f9af 100644
--- a/doubleml/tests/test_datasets.py
+++ b/doubleml/tests/test_datasets.py
@@ -1,45 +1,58 @@
-import pytest
-import pandas as pd
 import numpy as np
+import pandas as pd
+import pytest
 
-from doubleml import DoubleMLData, DoubleMLClusterData
-from doubleml.datasets import fetch_401K, fetch_bonus, make_plr_CCDDHNR2018, make_plr_turrell2018, \
-    make_irm_data, make_iivm_data, _make_pliv_data, make_pliv_CHS2015, make_pliv_multiway_cluster_CKMS2021, \
-    make_did_SZ2020, make_confounded_irm_data, make_confounded_plr_data, make_heterogeneous_data, make_ssm_data, \
-    make_irm_data_discrete_treatments
-
-msg_inv_return_type = 'Invalid return_type.'
+from doubleml import DoubleMLClusterData, DoubleMLData
+from doubleml.datasets import (
+    _make_pliv_data,
+    fetch_401K,
+    fetch_bonus,
+    make_confounded_irm_data,
+    make_confounded_plr_data,
+    make_did_SZ2020,
+    make_heterogeneous_data,
+    make_iivm_data,
+    make_irm_data,
+    make_irm_data_discrete_treatments,
+    make_pliv_CHS2015,
+    make_pliv_multiway_cluster_CKMS2021,
+    make_plr_CCDDHNR2018,
+    make_plr_turrell2018,
+    make_ssm_data,
+)
+
+msg_inv_return_type = "Invalid return_type."
 
 
 def test_fetch_401K_return_types():
-    res = fetch_401K('DoubleMLData')
+    res = fetch_401K("DoubleMLData")
     assert isinstance(res, DoubleMLData)
-    res = fetch_401K('DataFrame')
+    res = fetch_401K("DataFrame")
     assert isinstance(res, pd.DataFrame)
     with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = fetch_401K('matrix')
+        _ = fetch_401K("matrix")
 
 
 def test_fetch_401K_poly():
-    msg = 'polynomial_features os not implemented yet for fetch_401K.'
+    msg = "polynomial_features os not implemented yet for fetch_401K."
     with pytest.raises(NotImplementedError, match=msg):
         _ = fetch_401K(polynomial_features=True)
 
 
 def test_fetch_bonus_return_types():
-    res = fetch_bonus('DoubleMLData')
+    res = fetch_bonus("DoubleMLData")
     assert isinstance(res, DoubleMLData)
-    res = fetch_bonus('DataFrame')
+    res = fetch_bonus("DataFrame")
     assert isinstance(res, pd.DataFrame)
     with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = fetch_bonus('matrix')
+        _ = fetch_bonus("matrix")
 
 
 def test_fetch_bonus_poly():
     data_bonus_wo_poly = fetch_bonus(polynomial_features=False)
     n_x = len(data_bonus_wo_poly.x_cols)
     data_bonus_w_poly = fetch_bonus(polynomial_features=True)
-    assert len(data_bonus_w_poly.x_cols) == ((n_x+1) * n_x / 2 + n_x)
+    assert len(data_bonus_w_poly.x_cols) == ((n_x + 1) * n_x / 2 + n_x)
 
 
 @pytest.mark.ci
@@ -54,112 +67,110 @@ def test_make_plr_CCDDHNR2018_return_types():
     assert isinstance(y, np.ndarray)
     assert isinstance(d, np.ndarray)
     with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_plr_CCDDHNR2018(n_obs=100, return_type='matrix')
+        _ = make_plr_CCDDHNR2018(n_obs=100, return_type="matrix")
 
 
 @pytest.mark.ci
 def test_make_plr_turrell2018_return_types():
     np.random.seed(3141)
-    res = make_plr_turrell2018(n_obs=100, return_type='DoubleMLData')
+    res = make_plr_turrell2018(n_obs=100, return_type="DoubleMLData")
     assert isinstance(res, DoubleMLData)
-    res = make_plr_turrell2018(n_obs=100, return_type='DataFrame')
+    res = make_plr_turrell2018(n_obs=100, return_type="DataFrame")
     assert isinstance(res, pd.DataFrame)
-    x, y, d = make_plr_turrell2018(n_obs=100, return_type='array')
+    x, y, d = make_plr_turrell2018(n_obs=100, return_type="array")
     assert isinstance(x, np.ndarray)
     assert isinstance(y, np.ndarray)
     assert isinstance(d, np.ndarray)
     with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_plr_turrell2018(n_obs=100, return_type='matrix')
+        _ = make_plr_turrell2018(n_obs=100, return_type="matrix")
 
 
 @pytest.mark.ci
 def test_make_irm_data_return_types():
     np.random.seed(3141)
-    res = make_irm_data(n_obs=100, return_type='DoubleMLData')
+    res = make_irm_data(n_obs=100, return_type="DoubleMLData")
     assert isinstance(res, DoubleMLData)
-    res = make_irm_data(n_obs=100, return_type='DataFrame')
+    res = make_irm_data(n_obs=100, return_type="DataFrame")
     assert isinstance(res, pd.DataFrame)
-    x, y, d = make_irm_data(n_obs=100, return_type='array')
+    x, y, d = make_irm_data(n_obs=100, return_type="array")
     assert isinstance(x, np.ndarray)
     assert isinstance(y, np.ndarray)
     assert isinstance(d, np.ndarray)
     with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_irm_data(n_obs=100, return_type='matrix')
+        _ = make_irm_data(n_obs=100, return_type="matrix")
 
 
 @pytest.mark.ci
 def test_make_iivm_data_return_types():
     np.random.seed(3141)
-    res = make_iivm_data(n_obs=100, return_type='DoubleMLData')
+    res = make_iivm_data(n_obs=100, return_type="DoubleMLData")
     assert isinstance(res, DoubleMLData)
-    res = make_iivm_data(n_obs=100, return_type='DataFrame')
+    res = make_iivm_data(n_obs=100, return_type="DataFrame")
     assert isinstance(res, pd.DataFrame)
-    x, y, d, z = make_iivm_data(n_obs=100, return_type='array')
+    x, y, d, z = make_iivm_data(n_obs=100, return_type="array")
     assert isinstance(x, np.ndarray)
     assert isinstance(y, np.ndarray)
     assert isinstance(d, np.ndarray)
     assert isinstance(z, np.ndarray)
     with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_iivm_data(n_obs=100, return_type='matrix')
+        _ = make_iivm_data(n_obs=100, return_type="matrix")
 
 
 @pytest.mark.ci
 def test_make_pliv_data_return_types():
     np.random.seed(3141)
-    res = _make_pliv_data(n_obs=100, return_type='DoubleMLData')
+    res = _make_pliv_data(n_obs=100, return_type="DoubleMLData")
     assert isinstance(res, DoubleMLData)
-    res = _make_pliv_data(n_obs=100, return_type='DataFrame')
+    res = _make_pliv_data(n_obs=100, return_type="DataFrame")
     assert isinstance(res, pd.DataFrame)
-    x, y, d, z = _make_pliv_data(n_obs=100, return_type='array')
+    x, y, d, z = _make_pliv_data(n_obs=100, return_type="array")
     assert isinstance(x, np.ndarray)
     assert isinstance(y, np.ndarray)
     assert isinstance(d, np.ndarray)
     assert isinstance(z, np.ndarray)
     with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = _make_pliv_data(n_obs=100, return_type='matrix')
+        _ = _make_pliv_data(n_obs=100, return_type="matrix")
 
 
 @pytest.mark.ci
 def test_make_pliv_CHS2015_return_types():
     np.random.seed(3141)
-    res = make_pliv_CHS2015(n_obs=100, return_type='DoubleMLData')
+    res = make_pliv_CHS2015(n_obs=100, return_type="DoubleMLData")
     assert isinstance(res, DoubleMLData)
-    res = make_pliv_CHS2015(n_obs=100, return_type='DataFrame')
+    res = make_pliv_CHS2015(n_obs=100, return_type="DataFrame")
     assert isinstance(res, pd.DataFrame)
-    x, y, d, z = make_pliv_CHS2015(n_obs=100, return_type='array')
+    x, y, d, z = make_pliv_CHS2015(n_obs=100, return_type="array")
     assert isinstance(x, np.ndarray)
     assert isinstance(y, np.ndarray)
     assert isinstance(d, np.ndarray)
     assert isinstance(z, np.ndarray)
     with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_pliv_CHS2015(n_obs=100, return_type='matrix')
+        _ = make_pliv_CHS2015(n_obs=100, return_type="matrix")
 
 
 @pytest.mark.ci
 def test_make_pliv_multiway_cluster_CKMS2021_return_types():
     np.random.seed(3141)
-    res = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type='DoubleMLClusterData')
+    res = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="DoubleMLClusterData")
     assert isinstance(res, DoubleMLClusterData)
-    res = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type='DataFrame')
+    res = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="DataFrame")
     assert isinstance(res, pd.DataFrame)
-    x, y, d, cluster_vars, z = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type='array')
+    x, y, d, cluster_vars, z = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="array")
     assert isinstance(x, np.ndarray)
     assert isinstance(y, np.ndarray)
     assert isinstance(d, np.ndarray)
     assert isinstance(cluster_vars, np.ndarray)
     assert isinstance(z, np.ndarray)
     with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type='matrix')
+        _ = make_pliv_multiway_cluster_CKMS2021(N=10, M=10, return_type="matrix")
 
 
-@pytest.fixture(scope='function',
-                params=[False, True])
+@pytest.fixture(scope="function", params=[False, True])
 def cross_sectional(request):
     return request.param
 
 
-@pytest.fixture(scope='function',
-                params=[1, 2, 3, 4, 5, 6])
+@pytest.fixture(scope="function", params=[1, 2, 3, 4, 5, 6])
 def dgp_type(request):
     return request.param
 
@@ -172,8 +183,9 @@ def test_make_did_SZ2020_return_types(cross_sectional, dgp_type):
     res = make_did_SZ2020(n_obs=100, dgp_type=dgp_type, cross_sectional_data=cross_sectional, return_type=pd.DataFrame)
     assert isinstance(res, pd.DataFrame)
     if cross_sectional:
-        x, y, d, t = make_did_SZ2020(n_obs=100, dgp_type=dgp_type, cross_sectional_data=cross_sectional,
-                                     return_type=np.ndarray)
+        x, y, d, t = make_did_SZ2020(
+            n_obs=100, dgp_type=dgp_type, cross_sectional_data=cross_sectional, return_type=np.ndarray
+        )
         assert isinstance(t, np.ndarray)
     else:
         x, y, d = make_did_SZ2020(n_obs=100, dgp_type=dgp_type, cross_sectional_data=cross_sectional, return_type=np.ndarray)
@@ -181,14 +193,13 @@ def test_make_did_SZ2020_return_types(cross_sectional, dgp_type):
     assert isinstance(y, np.ndarray)
     assert isinstance(d, np.ndarray)
     with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_did_SZ2020(n_obs=100, dgp_type=dgp_type, cross_sectional_data=cross_sectional, return_type='matrix')
-    msg = 'The dgp_type is not valid.'
+        _ = make_did_SZ2020(n_obs=100, dgp_type=dgp_type, cross_sectional_data=cross_sectional, return_type="matrix")
+    msg = "The dgp_type is not valid."
     with pytest.raises(ValueError, match=msg):
-        _ = make_did_SZ2020(n_obs=100, dgp_type="5", cross_sectional_data=cross_sectional, return_type='matrix')
+        _ = make_did_SZ2020(n_obs=100, dgp_type="5", cross_sectional_data=cross_sectional, return_type="matrix")
 
 
-@pytest.fixture(scope='function',
-                params=[True, False])
+@pytest.fixture(scope="function", params=[True, False])
 def linear(request):
     return request.param
 
@@ -198,26 +209,26 @@ def test_make_confounded_irm_data_return_types(linear):
     np.random.seed(3141)
     res = make_confounded_irm_data(linear=linear)
     assert isinstance(res, dict)
-    assert isinstance(res['x'], np.ndarray)
-    assert isinstance(res['y'], np.ndarray)
-    assert isinstance(res['d'], np.ndarray)
-
-    assert isinstance(res['oracle_values'], dict)
-    assert isinstance(res['oracle_values']['g_long'], np.ndarray)
-    assert isinstance(res['oracle_values']['g_short'], np.ndarray)
-    assert isinstance(res['oracle_values']['m_long'], np.ndarray)
-    assert isinstance(res['oracle_values']['m_short'], np.ndarray)
-    assert isinstance(res['oracle_values']['gamma_a'], float)
-    assert isinstance(res['oracle_values']['beta_a'], float)
-    assert isinstance(res['oracle_values']['a'], np.ndarray)
-    assert isinstance(res['oracle_values']['y_0'], np.ndarray)
-    assert isinstance(res['oracle_values']['y_1'], np.ndarray)
-    assert isinstance(res['oracle_values']['z'], np.ndarray)
-    assert isinstance(res['oracle_values']['cf_y'], float)
-    assert isinstance(res['oracle_values']['cf_d_ate'], float)
-    assert isinstance(res['oracle_values']['cf_d_atte'], float)
-    assert isinstance(res['oracle_values']['rho_ate'], float)
-    assert isinstance(res['oracle_values']['rho_atte'], float)
+    assert isinstance(res["x"], np.ndarray)
+    assert isinstance(res["y"], np.ndarray)
+    assert isinstance(res["d"], np.ndarray)
+
+    assert isinstance(res["oracle_values"], dict)
+    assert isinstance(res["oracle_values"]["g_long"], np.ndarray)
+    assert isinstance(res["oracle_values"]["g_short"], np.ndarray)
+    assert isinstance(res["oracle_values"]["m_long"], np.ndarray)
+    assert isinstance(res["oracle_values"]["m_short"], np.ndarray)
+    assert isinstance(res["oracle_values"]["gamma_a"], float)
+    assert isinstance(res["oracle_values"]["beta_a"], float)
+    assert isinstance(res["oracle_values"]["a"], np.ndarray)
+    assert isinstance(res["oracle_values"]["y_0"], np.ndarray)
+    assert isinstance(res["oracle_values"]["y_1"], np.ndarray)
+    assert isinstance(res["oracle_values"]["z"], np.ndarray)
+    assert isinstance(res["oracle_values"]["cf_y"], float)
+    assert isinstance(res["oracle_values"]["cf_d_ate"], float)
+    assert isinstance(res["oracle_values"]["cf_d_atte"], float)
+    assert isinstance(res["oracle_values"]["rho_ate"], float)
+    assert isinstance(res["oracle_values"]["rho_atte"], float)
 
 
 @pytest.mark.ci
@@ -225,30 +236,28 @@ def test_make_confounded_plr_data_return_types():
     np.random.seed(3141)
     res = make_confounded_plr_data(theta=5.0)
     assert isinstance(res, dict)
-    assert isinstance(res['x'], np.ndarray)
-    assert isinstance(res['y'], np.ndarray)
-    assert isinstance(res['d'], np.ndarray)
-
-    assert isinstance(res['oracle_values'], dict)
-    assert isinstance(res['oracle_values']['g_long'], np.ndarray)
-    assert isinstance(res['oracle_values']['g_short'], np.ndarray)
-    assert isinstance(res['oracle_values']['m_long'], np.ndarray)
-    assert isinstance(res['oracle_values']['m_short'], np.ndarray)
-    assert isinstance(res['oracle_values']['theta'], float)
-    assert isinstance(res['oracle_values']['gamma_a'], float)
-    assert isinstance(res['oracle_values']['beta_a'], float)
-    assert isinstance(res['oracle_values']['a'], np.ndarray)
-    assert isinstance(res['oracle_values']['z'], np.ndarray)
-
-
-@pytest.fixture(scope='function',
-                params=[False, True])
+    assert isinstance(res["x"], np.ndarray)
+    assert isinstance(res["y"], np.ndarray)
+    assert isinstance(res["d"], np.ndarray)
+
+    assert isinstance(res["oracle_values"], dict)
+    assert isinstance(res["oracle_values"]["g_long"], np.ndarray)
+    assert isinstance(res["oracle_values"]["g_short"], np.ndarray)
+    assert isinstance(res["oracle_values"]["m_long"], np.ndarray)
+    assert isinstance(res["oracle_values"]["m_short"], np.ndarray)
+    assert isinstance(res["oracle_values"]["theta"], float)
+    assert isinstance(res["oracle_values"]["gamma_a"], float)
+    assert isinstance(res["oracle_values"]["beta_a"], float)
+    assert isinstance(res["oracle_values"]["a"], np.ndarray)
+    assert isinstance(res["oracle_values"]["z"], np.ndarray)
+
+
+@pytest.fixture(scope="function", params=[False, True])
 def binary_treatment(request):
     return request.param
 
 
-@pytest.fixture(scope='function',
-                params=[1, 2])
+@pytest.fixture(scope="function", params=[1, 2])
 def n_x(request):
     return request.param
 
@@ -258,18 +267,18 @@ def test_make_heterogeneous_data_return_types(binary_treatment, n_x):
     np.random.seed(3141)
     res = make_heterogeneous_data(n_obs=100, n_x=n_x, binary_treatment=binary_treatment)
     assert isinstance(res, dict)
-    assert isinstance(res['data'], pd.DataFrame)
-    assert isinstance(res['effects'], np.ndarray)
-    assert callable(res['treatment_effect'])
+    assert isinstance(res["data"], pd.DataFrame)
+    assert isinstance(res["effects"], np.ndarray)
+    assert callable(res["treatment_effect"])
 
     # test input checks
-    msg = 'n_x must be either 1 or 2.'
+    msg = "n_x must be either 1 or 2."
     with pytest.raises(AssertionError, match=msg):
         _ = make_heterogeneous_data(n_obs=100, n_x=0, binary_treatment=binary_treatment)
-    msg = 'support_size must be smaller than p.'
+    msg = "support_size must be smaller than p."
     with pytest.raises(AssertionError, match=msg):
         _ = make_heterogeneous_data(n_obs=100, n_x=n_x, support_size=31, binary_treatment=binary_treatment)
-    msg = 'binary_treatment must be a boolean.'
+    msg = "binary_treatment must be a boolean."
     with pytest.raises(AssertionError, match=msg):
         _ = make_heterogeneous_data(n_obs=100, n_x=n_x, binary_treatment=2)
 
@@ -279,20 +288,19 @@ def test_make_ssm_data_return_types():
     np.random.seed(3141)
     res = make_ssm_data(n_obs=100)
     assert isinstance(res, DoubleMLData)
-    res = make_ssm_data(n_obs=100, return_type='DataFrame')
+    res = make_ssm_data(n_obs=100, return_type="DataFrame")
     assert isinstance(res, pd.DataFrame)
-    x, y, d, z, s = make_ssm_data(n_obs=100, return_type='array')
+    x, y, d, z, s = make_ssm_data(n_obs=100, return_type="array")
     assert isinstance(x, np.ndarray)
     assert isinstance(y, np.ndarray)
     assert isinstance(d, np.ndarray)
     assert isinstance(z, np.ndarray)
     assert isinstance(s, np.ndarray)
     with pytest.raises(ValueError, match=msg_inv_return_type):
-        _ = make_ssm_data(n_obs=100, return_type='matrix')
+        _ = make_ssm_data(n_obs=100, return_type="matrix")
 
 
-@pytest.fixture(scope='function',
-                params=[3, 5])
+@pytest.fixture(scope="function", params=[3, 5])
 def n_levels(request):
     return request.param
 
@@ -302,21 +310,21 @@ def test_make_data_discrete_treatments(n_levels):
     n = 100
     data_apo = make_irm_data_discrete_treatments(n_obs=n, n_levels=3)
     assert isinstance(data_apo, dict)
-    assert isinstance(data_apo['y'], np.ndarray)
-    assert isinstance(data_apo['d'], np.ndarray)
-    assert isinstance(data_apo['x'], np.ndarray)
-    assert isinstance(data_apo['oracle_values'], dict)
-
-    assert isinstance(data_apo['oracle_values']['cont_d'], np.ndarray)
-    assert isinstance(data_apo['oracle_values']['level_bounds'], np.ndarray)
-    assert isinstance(data_apo['oracle_values']['potential_level'], np.ndarray)
-    assert isinstance(data_apo['oracle_values']['ite'], np.ndarray)
-    assert isinstance(data_apo['oracle_values']['y0'], np.ndarray)
-
-    msg = 'n_levels must be at least 2.'
+    assert isinstance(data_apo["y"], np.ndarray)
+    assert isinstance(data_apo["d"], np.ndarray)
+    assert isinstance(data_apo["x"], np.ndarray)
+    assert isinstance(data_apo["oracle_values"], dict)
+
+    assert isinstance(data_apo["oracle_values"]["cont_d"], np.ndarray)
+    assert isinstance(data_apo["oracle_values"]["level_bounds"], np.ndarray)
+    assert isinstance(data_apo["oracle_values"]["potential_level"], np.ndarray)
+    assert isinstance(data_apo["oracle_values"]["ite"], np.ndarray)
+    assert isinstance(data_apo["oracle_values"]["y0"], np.ndarray)
+
+    msg = "n_levels must be at least 2."
     with pytest.raises(ValueError, match=msg):
         _ = make_irm_data_discrete_treatments(n_obs=n, n_levels=1)
 
-    msg = 'n_levels must be an integer.'
+    msg = "n_levels must be an integer."
     with pytest.raises(ValueError, match=msg):
         _ = make_irm_data_discrete_treatments(n_obs=n, n_levels=1.1)
diff --git a/doubleml/tests/test_dml_data.py b/doubleml/tests/test_dml_data.py
index f9575d56a..ef5371d28 100644
--- a/doubleml/tests/test_dml_data.py
+++ b/doubleml/tests/test_dml_data.py
@@ -1,15 +1,19 @@
-import pytest
 import numpy as np
 import pandas as pd
+import pytest
+from sklearn.linear_model import Lasso, LogisticRegression
 
-from doubleml import DoubleMLData, DoubleMLPLR, DoubleMLClusterData, DoubleMLDIDCS, \
-    DoubleMLSSM
-from doubleml.datasets import make_plr_CCDDHNR2018, _make_pliv_data, make_pliv_CHS2015, \
-    make_pliv_multiway_cluster_CKMS2021, make_did_SZ2020, make_ssm_data
+from doubleml import DoubleMLClusterData, DoubleMLData, DoubleMLDIDCS, DoubleMLPLR, DoubleMLSSM
+from doubleml.datasets import (
+    _make_pliv_data,
+    make_did_SZ2020,
+    make_pliv_CHS2015,
+    make_pliv_multiway_cluster_CKMS2021,
+    make_plr_CCDDHNR2018,
+    make_ssm_data,
+)
 from doubleml.double_ml_data import DoubleMLBaseData
 
-from sklearn.linear_model import Lasso, LogisticRegression
-
 
 class DummyDataClass(DoubleMLBaseData):
     def __init__(self, data):
@@ -23,7 +27,7 @@ def n_coefs(self):
 @pytest.mark.ci
 def test_doubleml_basedata():
     dummy_dml_data = DummyDataClass(pd.DataFrame(np.zeros((100, 10))))
-    assert dummy_dml_data.d_cols[0] == 'theta'
+    assert dummy_dml_data.d_cols[0] == "theta"
     assert dummy_dml_data.n_treat == 1
     assert dummy_dml_data.n_coefs == 1
 
@@ -32,116 +36,117 @@ def test_doubleml_basedata():
 def dml_data_fixture(generate_data1):
     data = generate_data1
     np.random.seed(3141)
-    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
 
-    obj_from_np = DoubleMLData.from_arrays(data.loc[:, x_cols].values,
-                                           data['y'].values, data['d'].values)
+    obj_from_np = DoubleMLData.from_arrays(data.loc[:, x_cols].values, data["y"].values, data["d"].values)
 
-    obj_from_pd = DoubleMLData(data, 'y', ['d'], x_cols)
+    obj_from_pd = DoubleMLData(data, "y", ["d"], x_cols)
 
-    return {'obj_from_np': obj_from_np,
-            'obj_from_pd': obj_from_pd}
+    return {"obj_from_np": obj_from_np, "obj_from_pd": obj_from_pd}
 
 
 @pytest.mark.ci
 def test_dml_data_x(dml_data_fixture):
-    assert np.allclose(dml_data_fixture['obj_from_np'].x,
-                       dml_data_fixture['obj_from_pd'].x,
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_data_fixture["obj_from_np"].x, dml_data_fixture["obj_from_pd"].x, rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_data_y(dml_data_fixture):
-    assert np.allclose(dml_data_fixture['obj_from_np'].y,
-                       dml_data_fixture['obj_from_pd'].y,
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_data_fixture["obj_from_np"].y, dml_data_fixture["obj_from_pd"].y, rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_data_d(dml_data_fixture):
-    assert np.allclose(dml_data_fixture['obj_from_np'].d,
-                       dml_data_fixture['obj_from_pd'].d,
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_data_fixture["obj_from_np"].d, dml_data_fixture["obj_from_pd"].d, rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_obj_vs_from_arrays():
     np.random.seed(3141)
     dml_data = make_plr_CCDDHNR2018(n_obs=100)
-    dml_data_from_array = DoubleMLData.from_arrays(dml_data.data[dml_data.x_cols],
-                                                   dml_data.data[dml_data.y_col],
-                                                   dml_data.data[dml_data.d_cols])
+    dml_data_from_array = DoubleMLData.from_arrays(
+        dml_data.data[dml_data.x_cols], dml_data.data[dml_data.y_col], dml_data.data[dml_data.d_cols]
+    )
     assert dml_data_from_array.data.equals(dml_data.data)
 
     dml_data = _make_pliv_data(n_obs=100)
-    dml_data_from_array = DoubleMLData.from_arrays(dml_data.data[dml_data.x_cols],
-                                                   dml_data.data[dml_data.y_col],
-                                                   dml_data.data[dml_data.d_cols],
-                                                   dml_data.data[dml_data.z_cols])
+    dml_data_from_array = DoubleMLData.from_arrays(
+        dml_data.data[dml_data.x_cols],
+        dml_data.data[dml_data.y_col],
+        dml_data.data[dml_data.d_cols],
+        dml_data.data[dml_data.z_cols],
+    )
     assert dml_data_from_array.data.equals(dml_data.data)
 
     dml_data = make_pliv_CHS2015(n_obs=100, dim_z=5)
-    dml_data_from_array = DoubleMLData.from_arrays(dml_data.data[dml_data.x_cols],
-                                                   dml_data.data[dml_data.y_col],
-                                                   dml_data.data[dml_data.d_cols],
-                                                   dml_data.data[dml_data.z_cols])
+    dml_data_from_array = DoubleMLData.from_arrays(
+        dml_data.data[dml_data.x_cols],
+        dml_data.data[dml_data.y_col],
+        dml_data.data[dml_data.d_cols],
+        dml_data.data[dml_data.z_cols],
+    )
     assert np.array_equal(dml_data_from_array.data, dml_data.data)  # z_cols name differ
 
     dml_data = make_plr_CCDDHNR2018(n_obs=100)
     df = dml_data.data.copy().iloc[:, :10]
-    df.columns = [f'X{i+1}' for i in np.arange(7)] + ['y', 'd1', 'd2']
-    dml_data = DoubleMLData(df, 'y', ['d1', 'd2'], [f'X{i+1}' for i in np.arange(7)])
-    dml_data_from_array = DoubleMLData.from_arrays(dml_data.data[dml_data.x_cols],
-                                                   dml_data.data[dml_data.y_col],
-                                                   dml_data.data[dml_data.d_cols])
+    df.columns = [f"X{i + 1}" for i in np.arange(7)] + ["y", "d1", "d2"]
+    dml_data = DoubleMLData(df, "y", ["d1", "d2"], [f"X{i + 1}" for i in np.arange(7)])
+    dml_data_from_array = DoubleMLData.from_arrays(
+        dml_data.data[dml_data.x_cols], dml_data.data[dml_data.y_col], dml_data.data[dml_data.d_cols]
+    )
     assert np.array_equal(dml_data_from_array.data, dml_data.data)
 
     dml_data = make_did_SZ2020(n_obs=100, cross_sectional_data=False)
-    dml_data_from_array = DoubleMLData.from_arrays(x=dml_data.data[dml_data.x_cols],
-                                                   y=dml_data.data[dml_data.y_col],
-                                                   d=dml_data.data[dml_data.d_cols])
+    dml_data_from_array = DoubleMLData.from_arrays(
+        x=dml_data.data[dml_data.x_cols], y=dml_data.data[dml_data.y_col], d=dml_data.data[dml_data.d_cols]
+    )
     assert np.array_equal(dml_data_from_array.data, dml_data.data)
 
     dml_data = make_did_SZ2020(n_obs=100, cross_sectional_data=True)
-    dml_data_from_array = DoubleMLData.from_arrays(x=dml_data.data[dml_data.x_cols],
-                                                   y=dml_data.data[dml_data.y_col],
-                                                   d=dml_data.data[dml_data.d_cols],
-                                                   t=dml_data.data[dml_data.t_col])
+    dml_data_from_array = DoubleMLData.from_arrays(
+        x=dml_data.data[dml_data.x_cols],
+        y=dml_data.data[dml_data.y_col],
+        d=dml_data.data[dml_data.d_cols],
+        t=dml_data.data[dml_data.t_col],
+    )
     assert np.array_equal(dml_data_from_array.data, dml_data.data)
 
     # check with instrument and time variable
     dml_data = make_did_SZ2020(n_obs=100, cross_sectional_data=True)
-    dml_data.data['z'] = dml_data.data['t']
-    dml_data_from_array = DoubleMLData.from_arrays(x=dml_data.data[dml_data.x_cols],
-                                                   y=dml_data.data[dml_data.y_col],
-                                                   d=dml_data.data[dml_data.d_cols],
-                                                   z=dml_data.data['z'],
-                                                   t=dml_data.data[dml_data.t_col])
+    dml_data.data["z"] = dml_data.data["t"]
+    dml_data_from_array = DoubleMLData.from_arrays(
+        x=dml_data.data[dml_data.x_cols],
+        y=dml_data.data[dml_data.y_col],
+        d=dml_data.data[dml_data.d_cols],
+        z=dml_data.data["z"],
+        t=dml_data.data[dml_data.t_col],
+    )
     assert np.array_equal(dml_data_from_array.data, dml_data.data)
 
     dml_data = make_pliv_multiway_cluster_CKMS2021(N=10, M=10)
-    dml_data_from_array = DoubleMLClusterData.from_arrays(dml_data.data[dml_data.x_cols],
-                                                          dml_data.data[dml_data.y_col],
-                                                          dml_data.data[dml_data.d_cols],
-                                                          dml_data.data[dml_data.cluster_cols],
-                                                          dml_data.data[dml_data.z_cols])
+    dml_data_from_array = DoubleMLClusterData.from_arrays(
+        dml_data.data[dml_data.x_cols],
+        dml_data.data[dml_data.y_col],
+        dml_data.data[dml_data.d_cols],
+        dml_data.data[dml_data.cluster_cols],
+        dml_data.data[dml_data.z_cols],
+    )
     df = dml_data.data.copy()
-    df.rename(columns={'cluster_var_i': 'cluster_var1',
-                       'cluster_var_j': 'cluster_var2',
-                       'Y': 'y', 'D': 'd', 'Z': 'z'},
-              inplace=True)
+    df.rename(
+        columns={"cluster_var_i": "cluster_var1", "cluster_var_j": "cluster_var2", "Y": "y", "D": "d", "Z": "z"}, inplace=True
+    )
     assert dml_data_from_array.data.equals(df)
 
     # with a single cluster variable
-    dml_data_from_array = DoubleMLClusterData.from_arrays(dml_data.data[dml_data.x_cols],
-                                                          dml_data.data[dml_data.y_col],
-                                                          dml_data.data[dml_data.d_cols],
-                                                          dml_data.data[dml_data.cluster_cols[1]],
-                                                          dml_data.data[dml_data.z_cols])
-    df = dml_data.data.copy().drop(columns='cluster_var_i')
-    df.rename(columns={'cluster_var_j': 'cluster_var',
-                       'Y': 'y', 'D': 'd', 'Z': 'z'},
-              inplace=True)
+    dml_data_from_array = DoubleMLClusterData.from_arrays(
+        dml_data.data[dml_data.x_cols],
+        dml_data.data[dml_data.y_col],
+        dml_data.data[dml_data.d_cols],
+        dml_data.data[dml_data.cluster_cols[1]],
+        dml_data.data[dml_data.z_cols],
+    )
+    df = dml_data.data.copy().drop(columns="cluster_var_i")
+    df.rename(columns={"cluster_var_j": "cluster_var", "Y": "y", "D": "d", "Z": "z"}, inplace=True)
     assert dml_data_from_array.data.equals(df)
 
 
@@ -149,9 +154,9 @@ def test_obj_vs_from_arrays():
 def test_add_vars_in_df():
     # additional variables in the df shouldn't affect results
     np.random.seed(3141)
-    df = make_plr_CCDDHNR2018(n_obs=100, return_type='DataFrame')
-    dml_data_full_df = DoubleMLData(df, 'y', 'd', ['X1', 'X11', 'X13'])
-    dml_data_subset = DoubleMLData(df[['X1', 'X11', 'X13'] + ['y', 'd']], 'y', 'd', ['X1', 'X11', 'X13'])
+    df = make_plr_CCDDHNR2018(n_obs=100, return_type="DataFrame")
+    dml_data_full_df = DoubleMLData(df, "y", "d", ["X1", "X11", "X13"])
+    dml_data_subset = DoubleMLData(df[["X1", "X11", "X13"] + ["y", "d"]], "y", "d", ["X1", "X11", "X13"])
     dml_plr_full_df = DoubleMLPLR(dml_data_full_df, Lasso(), Lasso())
     dml_plr_subset = DoubleMLPLR(dml_data_subset, Lasso(), Lasso(), draw_sample_splitting=False)
     dml_plr_subset.set_sample_splitting(dml_plr_full_df.smpls)
@@ -169,7 +174,7 @@ def test_dml_data_no_instr_no_time_no_selection():
     assert dml_data.n_instr == 0
     assert dml_data.t is None
 
-    x, y, d = make_plr_CCDDHNR2018(n_obs=100, return_type='array')
+    x, y, d = make_plr_CCDDHNR2018(n_obs=100, return_type="array")
     dml_data = DoubleMLData.from_arrays(x, y, d)
     assert dml_data.z is None
     assert dml_data.n_instr == 0
@@ -203,110 +208,90 @@ def test_dml_summary_with_selection():
 
 @pytest.mark.ci
 def test_x_cols_setter_defaults():
-    df = pd.DataFrame(np.tile(np.arange(4), (4, 1)),
-                      columns=['yy', 'dd', 'xx1', 'xx2'])
-    dml_data = DoubleMLData(df, y_col='yy', d_cols='dd')
-    assert dml_data.x_cols == ['xx1', 'xx2']
+    df = pd.DataFrame(np.tile(np.arange(4), (4, 1)), columns=["yy", "dd", "xx1", "xx2"])
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd")
+    assert dml_data.x_cols == ["xx1", "xx2"]
 
     # with instrument
-    df = pd.DataFrame(np.tile(np.arange(5), (4, 1)),
-                      columns=['yy', 'dd', 'xx1', 'xx2', 'zz'])
-    dml_data = DoubleMLData(df, y_col='yy', d_cols='dd', z_cols='zz')
-    assert dml_data.x_cols == ['xx1', 'xx2']
+    df = pd.DataFrame(np.tile(np.arange(5), (4, 1)), columns=["yy", "dd", "xx1", "xx2", "zz"])
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", z_cols="zz")
+    assert dml_data.x_cols == ["xx1", "xx2"]
 
     # without instrument with time
-    df = pd.DataFrame(np.tile(np.arange(5), (4, 1)),
-                      columns=['yy', 'dd', 'xx1', 'xx2', 'tt'])
-    dml_data = DoubleMLData(df, y_col='yy', d_cols='dd', t_col='tt')
-    assert dml_data.x_cols == ['xx1', 'xx2']
+    df = pd.DataFrame(np.tile(np.arange(5), (4, 1)), columns=["yy", "dd", "xx1", "xx2", "tt"])
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", t_col="tt")
+    assert dml_data.x_cols == ["xx1", "xx2"]
 
     # with instrument with time
-    df = pd.DataFrame(np.tile(np.arange(6), (4, 1)),
-                      columns=['yy', 'dd', 'xx1', 'xx2', 'zz', 'tt'])
-    dml_data = DoubleMLData(df, y_col='yy', d_cols='dd', z_cols='zz', t_col='tt')
-    assert dml_data.x_cols == ['xx1', 'xx2']
+    df = pd.DataFrame(np.tile(np.arange(6), (4, 1)), columns=["yy", "dd", "xx1", "xx2", "zz", "tt"])
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", z_cols="zz", t_col="tt")
+    assert dml_data.x_cols == ["xx1", "xx2"]
 
     # without instrument with selection
-    df = pd.DataFrame(np.tile(np.arange(5), (4, 1)),
-                      columns=['yy', 'dd', 'xx1', 'xx2', 'ss'])
-    dml_data = DoubleMLData(df, y_col='yy', d_cols='dd', s_col='ss')
-    assert dml_data.x_cols == ['xx1', 'xx2']
+    df = pd.DataFrame(np.tile(np.arange(5), (4, 1)), columns=["yy", "dd", "xx1", "xx2", "ss"])
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", s_col="ss")
+    assert dml_data.x_cols == ["xx1", "xx2"]
 
     # with instrument with selection
-    df = pd.DataFrame(np.tile(np.arange(6), (4, 1)),
-                      columns=['yy', 'dd', 'xx1', 'xx2', 'zz', 'ss'])
-    dml_data = DoubleMLData(df, y_col='yy', d_cols='dd', z_cols='zz', s_col='ss')
-    assert dml_data.x_cols == ['xx1', 'xx2']
+    df = pd.DataFrame(np.tile(np.arange(6), (4, 1)), columns=["yy", "dd", "xx1", "xx2", "zz", "ss"])
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", z_cols="zz", s_col="ss")
+    assert dml_data.x_cols == ["xx1", "xx2"]
 
     # with selection and time
-    df = pd.DataFrame(np.tile(np.arange(6), (4, 1)),
-                      columns=['yy', 'dd', 'xx1', 'xx2', 'tt', 'ss'])
-    dml_data = DoubleMLData(df, y_col='yy', d_cols='dd', t_col='tt', s_col='ss')
-    assert dml_data.x_cols == ['xx1', 'xx2']
+    df = pd.DataFrame(np.tile(np.arange(6), (4, 1)), columns=["yy", "dd", "xx1", "xx2", "tt", "ss"])
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", t_col="tt", s_col="ss")
+    assert dml_data.x_cols == ["xx1", "xx2"]
 
     # with instrument, selection and time
-    df = pd.DataFrame(np.tile(np.arange(7), (4, 1)),
-                      columns=['yy', 'dd', 'xx1', 'xx2', 'zz', 'tt', 'ss'])
-    dml_data = DoubleMLData(df, y_col='yy', d_cols='dd', z_cols='zz', t_col='tt', s_col='ss')
-    assert dml_data.x_cols == ['xx1', 'xx2']
+    df = pd.DataFrame(np.tile(np.arange(7), (4, 1)), columns=["yy", "dd", "xx1", "xx2", "zz", "tt", "ss"])
+    dml_data = DoubleMLData(df, y_col="yy", d_cols="dd", z_cols="zz", t_col="tt", s_col="ss")
+    assert dml_data.x_cols == ["xx1", "xx2"]
 
 
 @pytest.mark.ci
 def test_x_cols_setter_defaults_w_cluster():
-    df = pd.DataFrame(np.tile(np.arange(6), (6, 1)),
-                      columns=['yy', 'dd', 'xx1', 'xx2', 'xx3', 'cluster1'])
-    dml_data = DoubleMLClusterData(df, y_col='yy', d_cols='dd', cluster_cols='cluster1')
-    assert dml_data.x_cols == ['xx1', 'xx2', 'xx3']
-    dml_data.x_cols = ['xx1', 'xx3']
-    assert dml_data.x_cols == ['xx1', 'xx3']
+    df = pd.DataFrame(np.tile(np.arange(6), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "xx3", "cluster1"])
+    dml_data = DoubleMLClusterData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1")
+    assert dml_data.x_cols == ["xx1", "xx2", "xx3"]
+    dml_data.x_cols = ["xx1", "xx3"]
+    assert dml_data.x_cols == ["xx1", "xx3"]
     dml_data.x_cols = None
-    assert dml_data.x_cols == ['xx1', 'xx2', 'xx3']
+    assert dml_data.x_cols == ["xx1", "xx2", "xx3"]
 
     # with instrument
-    df = pd.DataFrame(np.tile(np.arange(6), (6, 1)),
-                      columns=['yy', 'dd', 'xx1', 'xx2', 'z', 'cluster1'])
-    dml_data = DoubleMLClusterData(df, y_col='yy', d_cols='dd', cluster_cols='cluster1', z_cols='z')
-    assert dml_data.x_cols == ['xx1', 'xx2']
+    df = pd.DataFrame(np.tile(np.arange(6), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "z", "cluster1"])
+    dml_data = DoubleMLClusterData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="z")
+    assert dml_data.x_cols == ["xx1", "xx2"]
 
     # without instrument and with time
-    df = pd.DataFrame(np.tile(np.arange(6), (6, 1)),
-                      columns=['yy', 'dd', 'xx1', 'xx2', 'tt', 'cluster1'])
-    dml_data = DoubleMLClusterData(df, y_col='yy', d_cols='dd', cluster_cols='cluster1', t_col='tt')
-    assert dml_data.x_cols == ['xx1', 'xx2']
+    df = pd.DataFrame(np.tile(np.arange(6), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "tt", "cluster1"])
+    dml_data = DoubleMLClusterData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", t_col="tt")
+    assert dml_data.x_cols == ["xx1", "xx2"]
 
     # with instrument and with time
-    df = pd.DataFrame(np.tile(np.arange(7), (6, 1)),
-                      columns=['yy', 'dd', 'xx1', 'xx2', 'zz', 'tt', 'cluster1'])
-    dml_data = DoubleMLClusterData(df, y_col='yy', d_cols='dd', cluster_cols='cluster1',
-                                   z_cols='zz', t_col='tt')
-    assert dml_data.x_cols == ['xx1', 'xx2']
+    df = pd.DataFrame(np.tile(np.arange(7), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "zz", "tt", "cluster1"])
+    dml_data = DoubleMLClusterData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="zz", t_col="tt")
+    assert dml_data.x_cols == ["xx1", "xx2"]
 
     # without instrument and with selection
-    df = pd.DataFrame(np.tile(np.arange(6), (6, 1)),
-                      columns=['yy', 'dd', 'xx1', 'xx2', 'ss', 'cluster1'])
-    dml_data = DoubleMLClusterData(df, y_col='yy', d_cols='dd', cluster_cols='cluster1', s_col='ss')
-    assert dml_data.x_cols == ['xx1', 'xx2']
+    df = pd.DataFrame(np.tile(np.arange(6), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "ss", "cluster1"])
+    dml_data = DoubleMLClusterData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", s_col="ss")
+    assert dml_data.x_cols == ["xx1", "xx2"]
 
     # with instrument and with selection
-    df = pd.DataFrame(np.tile(np.arange(7), (6, 1)),
-                      columns=['yy', 'dd', 'xx1', 'xx2', 'zz', 'ss', 'cluster1'])
-    dml_data = DoubleMLClusterData(df, y_col='yy', d_cols='dd', cluster_cols='cluster1',
-                                   z_cols='zz', s_col='ss')
-    assert dml_data.x_cols == ['xx1', 'xx2']
+    df = pd.DataFrame(np.tile(np.arange(7), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "zz", "ss", "cluster1"])
+    dml_data = DoubleMLClusterData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="zz", s_col="ss")
+    assert dml_data.x_cols == ["xx1", "xx2"]
 
     # without instrument with time with selection
-    df = pd.DataFrame(np.tile(np.arange(7), (6, 1)),
-                      columns=['yy', 'dd', 'xx1', 'xx2', 'tt', 'ss', 'cluster1'])
-    dml_data = DoubleMLClusterData(df, y_col='yy', d_cols='dd', cluster_cols='cluster1', t_col='tt',
-                                   s_col='ss')
-    assert dml_data.x_cols == ['xx1', 'xx2']
+    df = pd.DataFrame(np.tile(np.arange(7), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "tt", "ss", "cluster1"])
+    dml_data = DoubleMLClusterData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", t_col="tt", s_col="ss")
+    assert dml_data.x_cols == ["xx1", "xx2"]
 
     # with instrument with time with selection
-    df = pd.DataFrame(np.tile(np.arange(8), (6, 1)),
-                      columns=['yy', 'dd', 'xx1', 'xx2', 'zz', 'tt', 'ss', 'cluster1'])
-    dml_data = DoubleMLClusterData(df, y_col='yy', d_cols='dd', cluster_cols='cluster1',
-                                   z_cols='zz', t_col='tt', s_col='ss')
-    assert dml_data.x_cols == ['xx1', 'xx2']
+    df = pd.DataFrame(np.tile(np.arange(8), (6, 1)), columns=["yy", "dd", "xx1", "xx2", "zz", "tt", "ss", "cluster1"])
+    dml_data = DoubleMLClusterData(df, y_col="yy", d_cols="dd", cluster_cols="cluster1", z_cols="zz", t_col="tt", s_col="ss")
+    assert dml_data.x_cols == ["xx1", "xx2"]
 
 
 @pytest.mark.ci
@@ -316,22 +301,21 @@ def test_x_cols_setter():
     orig_x_cols = dml_data.x_cols
 
     # check that after changing the x_cols, the x array gets updated
-    x_comp = dml_data.data[['X1', 'X11', 'X13']].values
-    dml_data.x_cols = ['X1', 'X11', 'X13']
+    x_comp = dml_data.data[["X1", "X11", "X13"]].values
+    dml_data.x_cols = ["X1", "X11", "X13"]
     assert np.array_equal(dml_data.x, x_comp)
 
-    msg = 'Invalid covariates x_cols. At least one covariate is no data column.'
+    msg = "Invalid covariates x_cols. At least one covariate is no data column."
     with pytest.raises(ValueError, match=msg):
-        dml_data.x_cols = ['X1', 'X11', 'A13']
+        dml_data.x_cols = ["X1", "X11", "A13"]
 
-    msg = (r'The covariates x_cols must be of str or list type \(or None\). '
-           "5 of type <class 'int'> was passed.")
+    msg = r"The covariates x_cols must be of str or list type \(or None\). " "5 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml_data.x_cols = 5
 
     # check single covariate
-    x_comp = dml_data.data[['X13']].values
-    dml_data.x_cols = 'X13'
+    x_comp = dml_data.data[["X13"]].values
+    dml_data.x_cols = "X13"
     assert np.array_equal(dml_data.x, x_comp)
 
     # check setting None brings us back to orig_x_cols
@@ -345,28 +329,27 @@ def test_d_cols_setter():
     np.random.seed(3141)
     dml_data = make_plr_CCDDHNR2018(n_obs=100)
     df = dml_data.data.copy().iloc[:, :10]
-    df.columns = [f'X{i + 1}' for i in np.arange(7)] + ['y', 'd1', 'd2']
-    dml_data = DoubleMLData(df, 'y', ['d1', 'd2'], [f'X{i + 1}' for i in np.arange(7)])
+    df.columns = [f"X{i + 1}" for i in np.arange(7)] + ["y", "d1", "d2"]
+    dml_data = DoubleMLData(df, "y", ["d1", "d2"], [f"X{i + 1}" for i in np.arange(7)])
 
     # check that after changing d_cols, the d array gets updated
-    d_comp = dml_data.data['d2'].values
-    dml_data.d_cols = ['d2', 'd1']
+    d_comp = dml_data.data["d2"].values
+    dml_data.d_cols = ["d2", "d1"]
     assert np.array_equal(dml_data.d, d_comp)
 
-    msg = r'Invalid treatment variable\(s\) d_cols. At least one treatment variable is no data column.'
+    msg = r"Invalid treatment variable\(s\) d_cols. At least one treatment variable is no data column."
     with pytest.raises(ValueError, match=msg):
-        dml_data.d_cols = ['d1', 'd13']
+        dml_data.d_cols = ["d1", "d13"]
     with pytest.raises(ValueError, match=msg):
-        dml_data.d_cols = 'd13'
+        dml_data.d_cols = "d13"
 
-    msg = (r'The treatment variable\(s\) d_cols must be of str or list type. '
-           "5 of type <class 'int'> was passed.")
+    msg = r"The treatment variable\(s\) d_cols must be of str or list type. " "5 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml_data.d_cols = 5
 
     # check single treatment variable
-    d_comp = dml_data.data['d2'].values
-    dml_data.d_cols = 'd2'
+    d_comp = dml_data.data["d2"].values
+    dml_data.d_cols = "d2"
     assert np.array_equal(dml_data.d, d_comp)
     assert dml_data.n_treat == 1
 
@@ -376,30 +359,30 @@ def test_z_cols_setter():
     np.random.seed(3141)
     dml_data = make_plr_CCDDHNR2018(n_obs=100)
     df = dml_data.data.copy().iloc[:, :10]
-    df.columns = [f'X{i + 1}' for i in np.arange(4)] + [f'z{i + 1}' for i in np.arange(3)] + ['y', 'd1', 'd2']
-    dml_data = DoubleMLData(df, 'y', ['d1', 'd2'],
-                            [f'X{i + 1}' for i in np.arange(4)],
-                            [f'z{i + 1}' for i in np.arange(3)])
+    df.columns = [f"X{i + 1}" for i in np.arange(4)] + [f"z{i + 1}" for i in np.arange(3)] + ["y", "d1", "d2"]
+    dml_data = DoubleMLData(df, "y", ["d1", "d2"], [f"X{i + 1}" for i in np.arange(4)], [f"z{i + 1}" for i in np.arange(3)])
 
     # check that after changing z_cols, the z array gets updated
-    z_comp = dml_data.data[['z1', 'z2']].values
-    dml_data.z_cols = ['z1', 'z2']
+    z_comp = dml_data.data[["z1", "z2"]].values
+    dml_data.z_cols = ["z1", "z2"]
     assert np.array_equal(dml_data.z, z_comp)
 
-    msg = r'Invalid instrumental variable\(s\) z_cols. At least one instrumental variable is no data column.'
+    msg = r"Invalid instrumental variable\(s\) z_cols. At least one instrumental variable is no data column."
     with pytest.raises(ValueError, match=msg):
-        dml_data.z_cols = ['z1', 'a13']
+        dml_data.z_cols = ["z1", "a13"]
     with pytest.raises(ValueError, match=msg):
-        dml_data.z_cols = 'a13'
+        dml_data.z_cols = "a13"
 
-    msg = (r'The instrumental variable\(s\) z_cols must be of str or list type \(or None\). '
-           "5 of type <class 'int'> was passed.")
+    msg = (
+        r"The instrumental variable\(s\) z_cols must be of str or list type \(or None\). "
+        "5 of type <class 'int'> was passed."
+    )
     with pytest.raises(TypeError, match=msg):
         dml_data.z_cols = 5
 
     # check single instrument
-    z_comp = dml_data.data[['z2']].values
-    dml_data.z_cols = 'z2'
+    z_comp = dml_data.data[["z2"]].values
+    dml_data.z_cols = "z2"
     assert np.array_equal(dml_data.z, z_comp)
 
     # check None
@@ -412,22 +395,19 @@ def test_z_cols_setter():
 def test_t_col_setter():
     np.random.seed(3141)
     df = make_did_SZ2020(n_obs=100, cross_sectional_data=True, return_type=pd.DataFrame)
-    df['t_new'] = np.ones(shape=(100,))
-    dml_data = DoubleMLData(df, 'y', 'd',
-                            [f'Z{i + 1}' for i in np.arange(4)],
-                            t_col='t')
+    df["t_new"] = np.ones(shape=(100,))
+    dml_data = DoubleMLData(df, "y", "d", [f"Z{i + 1}" for i in np.arange(4)], t_col="t")
 
     # check that after changing t_col, the t array gets updated
-    t_comp = dml_data.data['t_new'].values
-    dml_data.t_col = 't_new'
+    t_comp = dml_data.data["t_new"].values
+    dml_data.t_col = "t_new"
     assert np.array_equal(dml_data.t, t_comp)
 
-    msg = r'Invalid time variable t_col. a13 is no data column.'
+    msg = r"Invalid time variable t_col. a13 is no data column."
     with pytest.raises(ValueError, match=msg):
-        dml_data.t_col = 'a13'
+        dml_data.t_col = "a13"
 
-    msg = (r'The time variable t_col must be of str type \(or None\). '
-           "5 of type <class 'int'> was passed.")
+    msg = r"The time variable t_col must be of str type \(or None\). " "5 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml_data.t_col = 5
 
@@ -440,22 +420,19 @@ def test_t_col_setter():
 def test_s_col_setter():
     np.random.seed(3141)
     df = make_ssm_data(n_obs=100, return_type=pd.DataFrame)
-    df['s_new'] = np.ones(shape=(100,))
-    dml_data = DoubleMLData(df, 'y', 'd',
-                            [f'X{i + 1}' for i in np.arange(4)],
-                            s_col='s')
+    df["s_new"] = np.ones(shape=(100,))
+    dml_data = DoubleMLData(df, "y", "d", [f"X{i + 1}" for i in np.arange(4)], s_col="s")
 
     # check that after changing s_col, the s array gets updated
-    s_comp = dml_data.data['s_new'].values
-    dml_data.s_col = 's_new'
+    s_comp = dml_data.data["s_new"].values
+    dml_data.s_col = "s_new"
     assert np.array_equal(dml_data.s, s_comp)
 
-    msg = r'Invalid score or selection variable s_col. a13 is no data column.'
+    msg = r"Invalid score or selection variable s_col. a13 is no data column."
     with pytest.raises(ValueError, match=msg):
-        dml_data.s_col = 'a13'
+        dml_data.s_col = "a13"
 
-    msg = (r'The score or selection variable s_col must be of str type \(or None\). '
-           "5 of type <class 'int'> was passed.")
+    msg = r"The score or selection variable s_col must be of str type \(or None\). " "5 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml_data.s_col = 5
 
@@ -469,34 +446,33 @@ def test_cluster_cols_setter():
     np.random.seed(3141)
     dml_data = make_plr_CCDDHNR2018(n_obs=100)
     df = dml_data.data.copy().iloc[:, :10]
-    df.columns = [f'X{i + 1}' for i in np.arange(7)] + ['y', 'd1', 'd2']
-    dml_data = DoubleMLClusterData(df, 'y', ['d1', 'd2'],
-                                   cluster_cols=[f'X{i + 1}' for i in [5, 6]],
-                                   x_cols=[f'X{i + 1}' for i in np.arange(5)])
+    df.columns = [f"X{i + 1}" for i in np.arange(7)] + ["y", "d1", "d2"]
+    dml_data = DoubleMLClusterData(
+        df, "y", ["d1", "d2"], cluster_cols=[f"X{i + 1}" for i in [5, 6]], x_cols=[f"X{i + 1}" for i in np.arange(5)]
+    )
 
-    cluster_vars = df[['X6', 'X7']].values
+    cluster_vars = df[["X6", "X7"]].values
     assert np.array_equal(dml_data.cluster_vars, cluster_vars)
     assert dml_data.n_cluster_vars == 2
 
     # check that after changing cluster_cols, the cluster_vars array gets updated
-    cluster_vars = df[['X7', 'X6']].values
-    dml_data.cluster_cols = ['X7', 'X6']
+    cluster_vars = df[["X7", "X6"]].values
+    dml_data.cluster_cols = ["X7", "X6"]
     assert np.array_equal(dml_data.cluster_vars, cluster_vars)
 
-    msg = r'Invalid cluster variable\(s\) cluster_cols. At least one cluster variable is no data column.'
+    msg = r"Invalid cluster variable\(s\) cluster_cols. At least one cluster variable is no data column."
     with pytest.raises(ValueError, match=msg):
-        dml_data.cluster_cols = ['X6', 'X13']
+        dml_data.cluster_cols = ["X6", "X13"]
     with pytest.raises(ValueError, match=msg):
-        dml_data.cluster_cols = 'X13'
+        dml_data.cluster_cols = "X13"
 
-    msg = (r'The cluster variable\(s\) cluster_cols must be of str or list type. '
-           "5 of type <class 'int'> was passed.")
+    msg = r"The cluster variable\(s\) cluster_cols must be of str or list type. " "5 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml_data.cluster_cols = 5
 
     # check single cluster variable
-    cluster_vars = df[['X7']].values
-    dml_data.cluster_cols = 'X7'
+    cluster_vars = df[["X7"]].values
+    dml_data.cluster_cols = "X7"
     assert np.array_equal(dml_data.cluster_vars, cluster_vars)
     assert dml_data.n_cluster_vars == 1
 
@@ -506,20 +482,19 @@ def test_y_col_setter():
     np.random.seed(3141)
     dml_data = make_plr_CCDDHNR2018(n_obs=100)
     df = dml_data.data.copy().iloc[:, :10]
-    df.columns = [f'X{i + 1}' for i in np.arange(7)] + ['y', 'y123', 'd']
-    dml_data = DoubleMLData(df, 'y', 'd', [f'X{i + 1}' for i in np.arange(7)])
+    df.columns = [f"X{i + 1}" for i in np.arange(7)] + ["y", "y123", "d"]
+    dml_data = DoubleMLData(df, "y", "d", [f"X{i + 1}" for i in np.arange(7)])
 
     # check that after changing y_col, the y array gets updated
-    y_comp = dml_data.data['y123'].values
-    dml_data.y_col = 'y123'
+    y_comp = dml_data.data["y123"].values
+    dml_data.y_col = "y123"
     assert np.array_equal(dml_data.y, y_comp)
 
-    msg = r'Invalid outcome variable y_col. d13 is no data column.'
+    msg = r"Invalid outcome variable y_col. d13 is no data column."
     with pytest.raises(ValueError, match=msg):
-        dml_data.y_col = 'd13'
+        dml_data.y_col = "d13"
 
-    msg = (r'The outcome variable y_col must be of str type. '
-           "5 of type <class 'int'> was passed.")
+    msg = r"The outcome variable y_col must be of str type. " "5 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml_data.y_col = 5
 
@@ -529,122 +504,123 @@ def test_use_other_treat_as_covariate():
     np.random.seed(3141)
     dml_data = make_plr_CCDDHNR2018(n_obs=100)
     df = dml_data.data.copy().iloc[:, :10]
-    df.columns = [f'X{i + 1}' for i in np.arange(7)] + ['y', 'd1', 'd2']
-    dml_data = DoubleMLData(df, 'y', ['d1', 'd2'], [f'X{i + 1}' for i in np.arange(7)],
-                            use_other_treat_as_covariate=True)
-    dml_data.set_x_d('d1')
-    assert np.array_equal(dml_data.d, df['d1'].values)
-    assert np.array_equal(dml_data.x, df[[f'X{i + 1}' for i in np.arange(7)] + ['d2']].values)
-    dml_data.set_x_d('d2')
-    assert np.array_equal(dml_data.d, df['d2'].values)
-    assert np.array_equal(dml_data.x, df[[f'X{i + 1}' for i in np.arange(7)] + ['d1']].values)
-
-    dml_data = DoubleMLData(df, 'y', ['d1', 'd2'], [f'X{i + 1}' for i in np.arange(7)],
-                            use_other_treat_as_covariate=False)
-    dml_data.set_x_d('d1')
-    assert np.array_equal(dml_data.d, df['d1'].values)
-    assert np.array_equal(dml_data.x, df[[f'X{i + 1}' for i in np.arange(7)]].values)
-    dml_data.set_x_d('d2')
-    assert np.array_equal(dml_data.d, df['d2'].values)
-    assert np.array_equal(dml_data.x, df[[f'X{i + 1}' for i in np.arange(7)]].values)
+    df.columns = [f"X{i + 1}" for i in np.arange(7)] + ["y", "d1", "d2"]
+    dml_data = DoubleMLData(df, "y", ["d1", "d2"], [f"X{i + 1}" for i in np.arange(7)], use_other_treat_as_covariate=True)
+    dml_data.set_x_d("d1")
+    assert np.array_equal(dml_data.d, df["d1"].values)
+    assert np.array_equal(dml_data.x, df[[f"X{i + 1}" for i in np.arange(7)] + ["d2"]].values)
+    dml_data.set_x_d("d2")
+    assert np.array_equal(dml_data.d, df["d2"].values)
+    assert np.array_equal(dml_data.x, df[[f"X{i + 1}" for i in np.arange(7)] + ["d1"]].values)
+
+    dml_data = DoubleMLData(df, "y", ["d1", "d2"], [f"X{i + 1}" for i in np.arange(7)], use_other_treat_as_covariate=False)
+    dml_data.set_x_d("d1")
+    assert np.array_equal(dml_data.d, df["d1"].values)
+    assert np.array_equal(dml_data.x, df[[f"X{i + 1}" for i in np.arange(7)]].values)
+    dml_data.set_x_d("d2")
+    assert np.array_equal(dml_data.d, df["d2"].values)
+    assert np.array_equal(dml_data.x, df[[f"X{i + 1}" for i in np.arange(7)]].values)
 
     dml_data.use_other_treat_as_covariate = True
-    assert np.array_equal(dml_data.d, df['d1'].values)
-    assert np.array_equal(dml_data.x, df[[f'X{i + 1}' for i in np.arange(7)] + ['d2']].values)
+    assert np.array_equal(dml_data.d, df["d1"].values)
+    assert np.array_equal(dml_data.x, df[[f"X{i + 1}" for i in np.arange(7)] + ["d2"]].values)
 
-    msg = 'use_other_treat_as_covariate must be True or False. Got 1.'
+    msg = "use_other_treat_as_covariate must be True or False. Got 1."
     with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLData(df, 'y', ['d1', 'd2'], [f'X{i + 1}' for i in np.arange(7)],
-                         use_other_treat_as_covariate=1)
+        _ = DoubleMLData(df, "y", ["d1", "d2"], [f"X{i + 1}" for i in np.arange(7)], use_other_treat_as_covariate=1)
 
-    msg = 'Invalid treatment_var. d3 is not in d_cols.'
+    msg = "Invalid treatment_var. d3 is not in d_cols."
     with pytest.raises(ValueError, match=msg):
-        dml_data.set_x_d('d3')
+        dml_data.set_x_d("d3")
 
     msg = r"treatment_var must be of str type. \['d1', 'd2'\] of type <class 'list'> was passed."
     with pytest.raises(TypeError, match=msg):
-        dml_data.set_x_d(['d1', 'd2'])
+        dml_data.set_x_d(["d1", "d2"])
 
 
 @pytest.mark.ci
 def test_disjoint_sets():
     np.random.seed(3141)
-    df = pd.DataFrame(np.tile(np.arange(6), (4, 1)),
-                      columns=['yy', 'dd1', 'xx1', 'xx2', 'zz', 'tt'])
+    df = pd.DataFrame(np.tile(np.arange(6), (4, 1)), columns=["yy", "dd1", "xx1", "xx2", "zz", "tt"])
 
-    msg = (r'At least one variable/column is set as treatment variable \(``d_cols``\) and as covariate\(``x_cols``\). '
-           'Consider using parameter ``use_other_treat_as_covariate``.')
+    msg = (
+        r"At least one variable/column is set as treatment variable \(``d_cols``\) and as covariate\(``x_cols``\). "
+        "Consider using parameter ``use_other_treat_as_covariate``."
+    )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col='yy', d_cols=['dd1', 'xx1'], x_cols=['xx1', 'xx2'])
-    msg = 'yy cannot be set as outcome variable ``y_col`` and treatment variable in ``d_cols``'
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1", "xx1"], x_cols=["xx1", "xx2"])
+    msg = "yy cannot be set as outcome variable ``y_col`` and treatment variable in ``d_cols``"
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col='yy', d_cols=['dd1', 'yy'], x_cols=['xx1', 'xx2'])
-    msg = 'yy cannot be set as outcome variable ``y_col`` and covariate in ``x_cols``'
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1", "yy"], x_cols=["xx1", "xx2"])
+    msg = "yy cannot be set as outcome variable ``y_col`` and covariate in ``x_cols``"
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col='yy', d_cols=['dd1'], x_cols=['xx1', 'yy', 'xx2'])
-    msg = 'yy cannot be set as outcome variable ``y_col`` and instrumental variable in ``z_cols``'
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "yy", "xx2"])
+    msg = "yy cannot be set as outcome variable ``y_col`` and instrumental variable in ``z_cols``"
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col='yy', d_cols=['dd1'], x_cols=['xx1', 'xx2'], z_cols='yy')
-    msg = (r'At least one variable/column is set as treatment variable \(``d_cols``\) and instrumental variable in '
-           '``z_cols``.')
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], z_cols="yy")
+    msg = (
+        r"At least one variable/column is set as treatment variable \(``d_cols``\) and instrumental variable in " "``z_cols``."
+    )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col='yy', d_cols=['dd1'], x_cols=['xx1', 'xx2'], z_cols=['dd1'])
-    msg = (r'At least one variable/column is set as covariate \(``x_cols``\) and instrumental variable in '
-           '``z_cols``.')
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], z_cols=["dd1"])
+    msg = r"At least one variable/column is set as covariate \(``x_cols``\) and instrumental variable in " "``z_cols``."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col='yy', d_cols=['dd1'], x_cols=['xx1', 'xx2'], z_cols='xx2')
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], z_cols="xx2")
 
-    msg = 'xx2 cannot be set as time variable ``t_col`` and covariate in ``x_cols``.'
+    msg = "xx2 cannot be set as time variable ``t_col`` and covariate in ``x_cols``."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col='yy', d_cols=['dd1'], x_cols=['xx1', 'xx2'], t_col='xx2')
-    msg = 'dd1 cannot be set as time variable ``t_col`` and treatment variable in ``d_cols``.'
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], t_col="xx2")
+    msg = "dd1 cannot be set as time variable ``t_col`` and treatment variable in ``d_cols``."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col='yy', d_cols=['dd1'], x_cols=['xx1', 'xx2'], t_col='dd1')
-    msg = 'yy cannot be set as time variable ``t_col`` and outcome variable ``y_col``.'
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], t_col="dd1")
+    msg = "yy cannot be set as time variable ``t_col`` and outcome variable ``y_col``."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col='yy', d_cols=['dd1'], x_cols=['xx1', 'xx2'], t_col='yy')
-    msg = 'zz cannot be set as time variable ``t_col`` and instrumental variable in ``z_cols``.'
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], t_col="yy")
+    msg = "zz cannot be set as time variable ``t_col`` and instrumental variable in ``z_cols``."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col='yy', d_cols=['dd1'], x_cols=['xx1', 'xx2'], z_cols='zz', t_col='zz')
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], z_cols="zz", t_col="zz")
 
-    msg = 'xx2 cannot be set as score or selection variable ``s_col`` and covariate in ``x_cols``.'
+    msg = "xx2 cannot be set as score or selection variable ``s_col`` and covariate in ``x_cols``."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col='yy', d_cols=['dd1'], x_cols=['xx1', 'xx2'], s_col='xx2')
-    msg = 'dd1 cannot be set as score or selection variable ``s_col`` and treatment variable in ``d_cols``.'
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], s_col="xx2")
+    msg = "dd1 cannot be set as score or selection variable ``s_col`` and treatment variable in ``d_cols``."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col='yy', d_cols=['dd1'], x_cols=['xx1', 'xx2'], s_col='dd1')
-    msg = 'yy cannot be set as score or selection variable ``s_col`` and outcome variable ``y_col``.'
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], s_col="dd1")
+    msg = "yy cannot be set as score or selection variable ``s_col`` and outcome variable ``y_col``."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col='yy', d_cols=['dd1'], x_cols=['xx1', 'xx2'], s_col='yy')
-    msg = 'zz cannot be set as score or selection variable ``s_col`` and instrumental variable in ``z_cols``.'
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], s_col="yy")
+    msg = "zz cannot be set as score or selection variable ``s_col`` and instrumental variable in ``z_cols``."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col='yy', d_cols=['dd1'], x_cols=['xx1', 'xx2'], z_cols='zz', s_col='zz')
-    msg = 'tt cannot be set as score or selection variable ``s_col`` and time variable ``t_col``.'
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], z_cols="zz", s_col="zz")
+    msg = "tt cannot be set as score or selection variable ``s_col`` and time variable ``t_col``."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(df, y_col='yy', d_cols=['dd1'], x_cols=['xx1', 'xx2'], t_col='tt', s_col='tt')
+        _ = DoubleMLData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], t_col="tt", s_col="tt")
 
     # cluster data
-    msg = 'yy cannot be set as outcome variable ``y_col`` and cluster variable in ``cluster_cols``'
+    msg = "yy cannot be set as outcome variable ``y_col`` and cluster variable in ``cluster_cols``"
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLClusterData(df, y_col='yy', d_cols=['dd1'], x_cols=['xx1', 'xx2'], cluster_cols='yy')
-    msg = (r'At least one variable/column is set as treatment variable \(``d_cols``\) and cluster variable in '
-           '``cluster_cols``.')
+        _ = DoubleMLClusterData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], cluster_cols="yy")
+    msg = (
+        r"At least one variable/column is set as treatment variable \(``d_cols``\) and cluster variable in "
+        "``cluster_cols``."
+    )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLClusterData(df, y_col='yy', d_cols=['dd1'], x_cols=['xx1', 'xx2'], cluster_cols='dd1')
-    msg = (r'At least one variable/column is set as covariate \(``x_cols``\) and cluster variable in '
-           '``cluster_cols``.')
+        _ = DoubleMLClusterData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], cluster_cols="dd1")
+    msg = r"At least one variable/column is set as covariate \(``x_cols``\) and cluster variable in " "``cluster_cols``."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLClusterData(df, y_col='yy', d_cols=['dd1'], x_cols=['xx1', 'xx2'], cluster_cols='xx2')
-    msg = (r'At least one variable/column is set as instrumental variable \(``z_cols``\) and cluster variable in '
-           '``cluster_cols``.')
+        _ = DoubleMLClusterData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1", "xx2"], cluster_cols="xx2")
+    msg = (
+        r"At least one variable/column is set as instrumental variable \(``z_cols``\) and cluster variable in "
+        "``cluster_cols``."
+    )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLClusterData(df, y_col='yy', d_cols=['dd1'], x_cols=['xx1'], z_cols=['xx2'], cluster_cols='xx2')
-    msg = 'xx2 cannot be set as time variable ``t_col`` and cluster variable in ``cluster_cols``.'
+        _ = DoubleMLClusterData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1"], z_cols=["xx2"], cluster_cols="xx2")
+    msg = "xx2 cannot be set as time variable ``t_col`` and cluster variable in ``cluster_cols``."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLClusterData(df, y_col='yy', d_cols=['dd1'], x_cols=['xx1'], t_col='xx2', cluster_cols='xx2')
-    msg = 'xx2 cannot be set as score or selection variable ``s_col`` and cluster variable in ``cluster_cols``.'
+        _ = DoubleMLClusterData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1"], t_col="xx2", cluster_cols="xx2")
+    msg = "xx2 cannot be set as score or selection variable ``s_col`` and cluster variable in ``cluster_cols``."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLClusterData(df, y_col='yy', d_cols=['dd1'], x_cols=['xx1'], s_col='xx2', cluster_cols='xx2')
+        _ = DoubleMLClusterData(df, y_col="yy", d_cols=["dd1"], x_cols=["xx1"], s_col="xx2", cluster_cols="xx2")
 
 
 @pytest.mark.ci
@@ -653,38 +629,39 @@ def test_duplicates():
     dml_data = make_plr_CCDDHNR2018(n_obs=100)
     dml_cluster_data = make_pliv_multiway_cluster_CKMS2021(N=10, M=10)
 
-    msg = r'Invalid treatment variable\(s\) d_cols: Contains duplicate values.'
+    msg = r"Invalid treatment variable\(s\) d_cols: Contains duplicate values."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(dml_data.data, y_col='y', d_cols=['d', 'd', 'X1'], x_cols=['X3', 'X2'])
+        _ = DoubleMLData(dml_data.data, y_col="y", d_cols=["d", "d", "X1"], x_cols=["X3", "X2"])
     with pytest.raises(ValueError, match=msg):
-        dml_data.d_cols = ['d', 'd', 'X1']
+        dml_data.d_cols = ["d", "d", "X1"]
 
-    msg = 'Invalid covariates x_cols: Contains duplicate values.'
+    msg = "Invalid covariates x_cols: Contains duplicate values."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(dml_data.data, y_col='y', d_cols=['d'], x_cols=['X3', 'X2', 'X3'])
+        _ = DoubleMLData(dml_data.data, y_col="y", d_cols=["d"], x_cols=["X3", "X2", "X3"])
     with pytest.raises(ValueError, match=msg):
-        dml_data.x_cols = ['X3', 'X2', 'X3']
+        dml_data.x_cols = ["X3", "X2", "X3"]
 
-    msg = r'Invalid instrumental variable\(s\) z_cols: Contains duplicate values.'
+    msg = r"Invalid instrumental variable\(s\) z_cols: Contains duplicate values."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(dml_data.data, y_col='y', d_cols=['d'], x_cols=['X3', 'X2'],
-                         z_cols=['X15', 'X12', 'X12', 'X15'])
+        _ = DoubleMLData(dml_data.data, y_col="y", d_cols=["d"], x_cols=["X3", "X2"], z_cols=["X15", "X12", "X12", "X15"])
     with pytest.raises(ValueError, match=msg):
-        dml_data.z_cols = ['X15', 'X12', 'X12', 'X15']
+        dml_data.z_cols = ["X15", "X12", "X12", "X15"]
 
-    msg = r'Invalid cluster variable\(s\) cluster_cols: Contains duplicate values.'
+    msg = r"Invalid cluster variable\(s\) cluster_cols: Contains duplicate values."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLClusterData(dml_cluster_data.data, y_col='y', d_cols=['d'], cluster_cols=['X3', 'X2', 'X3'])
+        _ = DoubleMLClusterData(dml_cluster_data.data, y_col="y", d_cols=["d"], cluster_cols=["X3", "X2", "X3"])
     with pytest.raises(ValueError, match=msg):
-        dml_cluster_data.cluster_cols = ['X3', 'X2', 'X3']
+        dml_cluster_data.cluster_cols = ["X3", "X2", "X3"]
 
-    msg = 'Invalid pd.DataFrame: Contains duplicate column names.'
+    msg = "Invalid pd.DataFrame: Contains duplicate column names."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(pd.DataFrame(np.zeros((100, 5)), columns=['y', 'd', 'X3', 'X2', 'y']),
-                         y_col='y', d_cols=['d'], x_cols=['X3', 'X2'])
+        _ = DoubleMLData(
+            pd.DataFrame(np.zeros((100, 5)), columns=["y", "d", "X3", "X2", "y"]), y_col="y", d_cols=["d"], x_cols=["X3", "X2"]
+        )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLClusterData(pd.DataFrame(np.zeros((100, 5)), columns=['y', 'd', 'X3', 'X2', 'y']),
-                                y_col='y', d_cols=['d'], cluster_cols=['X2'])
+        _ = DoubleMLClusterData(
+            pd.DataFrame(np.zeros((100, 5)), columns=["y", "d", "X3", "X2", "y"]), y_col="y", d_cols=["d"], cluster_cols=["X2"]
+        )
 
 
 @pytest.mark.ci
@@ -693,62 +670,49 @@ def test_dml_datatype():
     # msg = ('data must be of pd.DataFrame type. '
     #        f'{str(data_array)} of type {str(type(data_array))} was passed.')
     with pytest.raises(TypeError):
-        _ = DoubleMLData(data_array, y_col='y', d_cols=['d'], x_cols=['X3', 'X2'])
+        _ = DoubleMLData(data_array, y_col="y", d_cols=["d"], x_cols=["X3", "X2"])
     with pytest.raises(TypeError):
-        _ = DoubleMLClusterData(data_array, y_col='y', d_cols=['d'], cluster_cols=['X3', 'X2'])
+        _ = DoubleMLClusterData(data_array, y_col="y", d_cols=["d"], cluster_cols=["X3", "X2"])
 
 
 @pytest.mark.ci
 def test_dml_data_w_missings(generate_data_irm_w_missings):
     (x, y, d) = generate_data_irm_w_missings
 
-    dml_data = DoubleMLData.from_arrays(x, y, d,
-                                        force_all_x_finite=False)
+    dml_data = DoubleMLData.from_arrays(x, y, d, force_all_x_finite=False)
 
-    _ = DoubleMLData.from_arrays(x, y, d,
-                                 force_all_x_finite='allow-nan')
+    _ = DoubleMLData.from_arrays(x, y, d, force_all_x_finite="allow-nan")
 
     msg = r"Input contains NaN."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData.from_arrays(x, y, d,
-                                     force_all_x_finite=True)
+        _ = DoubleMLData.from_arrays(x, y, d, force_all_x_finite=True)
 
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData.from_arrays(x, x[:, 0], d,
-                                     force_all_x_finite=False)
+        _ = DoubleMLData.from_arrays(x, x[:, 0], d, force_all_x_finite=False)
 
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData.from_arrays(x, y, x[:, 0],
-                                     force_all_x_finite=False)
+        _ = DoubleMLData.from_arrays(x, y, x[:, 0], force_all_x_finite=False)
 
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData.from_arrays(x, y, d, x[:, 0],
-                                     force_all_x_finite=False)
+        _ = DoubleMLData.from_arrays(x, y, d, x[:, 0], force_all_x_finite=False)
 
     msg = r"Input contains infinity or a value too large for dtype\('float64'\)."
     xx = np.copy(x)
     xx[0, 0] = np.inf
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData.from_arrays(xx, y, d,
-                                     force_all_x_finite='allow-nan')
+        _ = DoubleMLData.from_arrays(xx, y, d, force_all_x_finite="allow-nan")
 
     msg = "Invalid force_all_x_finite. force_all_x_finite must be True, False or 'allow-nan'."
     with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLData.from_arrays(xx, y, d,
-                                     force_all_x_finite=1)
+        _ = DoubleMLData.from_arrays(xx, y, d, force_all_x_finite=1)
     with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLData(dml_data.data,
-                         y_col='y', d_cols='d',
-                         force_all_x_finite=1)
+        _ = DoubleMLData(dml_data.data, y_col="y", d_cols="d", force_all_x_finite=1)
 
     msg = "Invalid force_all_x_finite allownan. force_all_x_finite must be True, False or 'allow-nan'."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData.from_arrays(xx, y, d,
-                                     force_all_x_finite='allownan')
+        _ = DoubleMLData.from_arrays(xx, y, d, force_all_x_finite="allownan")
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLData(dml_data.data,
-                         y_col='y', d_cols='d',
-                         force_all_x_finite='allownan')
+        _ = DoubleMLData(dml_data.data, y_col="y", d_cols="d", force_all_x_finite="allownan")
 
     msg = r"Input contains NaN."
     with pytest.raises(ValueError, match=msg):
@@ -757,5 +721,5 @@ def test_dml_data_w_missings(generate_data_irm_w_missings):
     assert dml_data.force_all_x_finite is True
     dml_data.force_all_x_finite = False
     assert dml_data.force_all_x_finite is False
-    dml_data.force_all_x_finite = 'allow-nan'
-    assert dml_data.force_all_x_finite == 'allow-nan'
+    dml_data.force_all_x_finite = "allow-nan"
+    assert dml_data.force_all_x_finite == "allow-nan"
diff --git a/doubleml/tests/test_evaluate_learner.py b/doubleml/tests/test_evaluate_learner.py
index 4b3056b89..dbad9b620 100644
--- a/doubleml/tests/test_evaluate_learner.py
+++ b/doubleml/tests/test_evaluate_learner.py
@@ -1,75 +1,76 @@
-import pytest
 import numpy as np
-import doubleml as dml
-from doubleml.datasets import make_irm_data
+import pytest
 from sklearn.base import clone
-
-from sklearn.linear_model import LogisticRegression, LinearRegression
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import LinearRegression, LogisticRegression
 
+import doubleml as dml
+from doubleml.datasets import make_irm_data
 from doubleml.utils._estimation import _logloss
 
-
 np.random.seed(3141)
-data = make_irm_data(theta=0.5, n_obs=200, dim_x=5, return_type='DataFrame')
-obj_dml_data = dml.DoubleMLData(data, 'y', 'd')
+data = make_irm_data(theta=0.5, n_obs=200, dim_x=5, return_type="DataFrame")
+obj_dml_data = dml.DoubleMLData(data, "y", "d")
 
 
-@pytest.fixture(scope='module',
-                params=[[LinearRegression(),
-                         LogisticRegression(solver='lbfgs', max_iter=250)],
-                        [RandomForestRegressor(max_depth=2, n_estimators=10),
-                         RandomForestClassifier(max_depth=2, n_estimators=10)]])
+@pytest.fixture(
+    scope="module",
+    params=[
+        [LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250)],
+        [RandomForestRegressor(max_depth=2, n_estimators=10), RandomForestClassifier(max_depth=2, n_estimators=10)],
+    ],
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 5])
+@pytest.fixture(scope="module", params=[1, 5])
 def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.01, 0.05])
+@pytest.fixture(scope="module", params=[0.01, 0.05])
 def trimming_threshold(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_irm_eval_learner_fixture(learner, trimming_threshold, n_rep):
     # Set machine learning methods for m & g
     ml_g = clone(learner[0])
     ml_m = clone(learner[1])
 
     np.random.seed(3141)
-    dml_irm_obj = dml.DoubleMLIRM(obj_dml_data,
-                                  ml_g, ml_m,
-                                  n_folds=2,
-                                  n_rep=n_rep,
-                                  trimming_threshold=trimming_threshold)
+    dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_g, ml_m, n_folds=2, n_rep=n_rep, trimming_threshold=trimming_threshold)
     dml_irm_obj.fit()
-    res_manual = dml_irm_obj.evaluate_learners(learners=['ml_g0', 'ml_g1'])
-    res_manual['ml_m'] = dml_irm_obj.evaluate_learners(learners=['ml_m'], metric=_logloss)['ml_m']
+    res_manual = dml_irm_obj.evaluate_learners(learners=["ml_g0", "ml_g1"])
+    res_manual["ml_m"] = dml_irm_obj.evaluate_learners(learners=["ml_m"], metric=_logloss)["ml_m"]
 
-    res_dict = {'nuisance_loss': dml_irm_obj.nuisance_loss,
-                'nuisance_loss_manual': res_manual
-                }
+    res_dict = {"nuisance_loss": dml_irm_obj.nuisance_loss, "nuisance_loss_manual": res_manual}
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_irm_eval_learner(dml_irm_eval_learner_fixture, n_rep):
-    assert dml_irm_eval_learner_fixture['nuisance_loss_manual']['ml_g0'].shape == (n_rep, 1)
-    assert dml_irm_eval_learner_fixture['nuisance_loss_manual']['ml_g1'].shape == (n_rep, 1)
-    assert dml_irm_eval_learner_fixture['nuisance_loss_manual']['ml_m'].shape == (n_rep, 1)
-
-    assert np.allclose(dml_irm_eval_learner_fixture['nuisance_loss_manual']['ml_g0'],
-                       dml_irm_eval_learner_fixture['nuisance_loss']['ml_g0'],
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_irm_eval_learner_fixture['nuisance_loss_manual']['ml_g1'],
-                       dml_irm_eval_learner_fixture['nuisance_loss']['ml_g1'],
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_irm_eval_learner_fixture['nuisance_loss_manual']['ml_m'],
-                       dml_irm_eval_learner_fixture['nuisance_loss']['ml_m'],
-                       rtol=1e-9, atol=1e-4)
+    assert dml_irm_eval_learner_fixture["nuisance_loss_manual"]["ml_g0"].shape == (n_rep, 1)
+    assert dml_irm_eval_learner_fixture["nuisance_loss_manual"]["ml_g1"].shape == (n_rep, 1)
+    assert dml_irm_eval_learner_fixture["nuisance_loss_manual"]["ml_m"].shape == (n_rep, 1)
+
+    assert np.allclose(
+        dml_irm_eval_learner_fixture["nuisance_loss_manual"]["ml_g0"],
+        dml_irm_eval_learner_fixture["nuisance_loss"]["ml_g0"],
+        rtol=1e-9,
+        atol=1e-4,
+    )
+    assert np.allclose(
+        dml_irm_eval_learner_fixture["nuisance_loss_manual"]["ml_g1"],
+        dml_irm_eval_learner_fixture["nuisance_loss"]["ml_g1"],
+        rtol=1e-9,
+        atol=1e-4,
+    )
+    assert np.allclose(
+        dml_irm_eval_learner_fixture["nuisance_loss_manual"]["ml_m"],
+        dml_irm_eval_learner_fixture["nuisance_loss"]["ml_m"],
+        rtol=1e-9,
+        atol=1e-4,
+    )
diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py
index cacd1edfa..26c80bb3e 100644
--- a/doubleml/tests/test_exceptions.py
+++ b/doubleml/tests/test_exceptions.py
@@ -1,18 +1,36 @@
-import pytest
-import pandas as pd
-import numpy as np
 import copy
 
-from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV, DoubleMLData, \
-    DoubleMLClusterData, DoubleMLPQ, DoubleMLLPQ, DoubleMLCVAR, DoubleMLQTE, DoubleMLDID, \
-    DoubleMLDIDCS, DoubleMLBLP
-from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data, \
-    make_pliv_multiway_cluster_CKMS2021, make_did_SZ2020
+import numpy as np
+import pandas as pd
+import pytest
+from sklearn.base import BaseEstimator
+from sklearn.linear_model import Lasso, LogisticRegression
 
-from ._utils import DummyDataClass
+from doubleml import (
+    DoubleMLBLP,
+    DoubleMLClusterData,
+    DoubleMLCVAR,
+    DoubleMLData,
+    DoubleMLDID,
+    DoubleMLDIDCS,
+    DoubleMLIIVM,
+    DoubleMLIRM,
+    DoubleMLLPQ,
+    DoubleMLPLIV,
+    DoubleMLPLR,
+    DoubleMLPQ,
+    DoubleMLQTE,
+)
+from doubleml.datasets import (
+    make_did_SZ2020,
+    make_iivm_data,
+    make_irm_data,
+    make_pliv_CHS2015,
+    make_pliv_multiway_cluster_CKMS2021,
+    make_plr_CCDDHNR2018,
+)
 
-from sklearn.linear_model import Lasso, LogisticRegression
-from sklearn.base import BaseEstimator
+from ._utils import DummyDataClass
 
 np.random.seed(3141)
 n = 100
@@ -22,7 +40,7 @@
 ml_g = Lasso()
 ml_r = Lasso()
 dml_plr = DoubleMLPLR(dml_data, ml_l, ml_m)
-dml_plr_iv_type = DoubleMLPLR(dml_data, ml_l, ml_m, ml_g, score='IV-type')
+dml_plr_iv_type = DoubleMLPLR(dml_data, ml_l, ml_m, ml_g, score="IV-type")
 
 dml_data_pliv = make_pliv_CHS2015(n_obs=n, dim_z=1)
 dml_pliv = DoubleMLPLIV(dml_data_pliv, ml_l, ml_m, ml_r)
@@ -41,11 +59,11 @@
 
 @pytest.mark.ci
 def test_doubleml_exception_data():
-    msg = 'The data must be of DoubleMLData or DoubleMLClusterData type.'
+    msg = "The data must be of DoubleMLData or DoubleMLClusterData type."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLPLR(pd.DataFrame(), ml_l, ml_m)
 
-    msg = 'The data must be of DoubleMLData type.'
+    msg = "The data must be of DoubleMLData type."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLPLR(DummyDataClass(pd.DataFrame(np.zeros((100, 10)))), ml_l, ml_m)
     with pytest.raises(TypeError, match=msg):
@@ -62,207 +80,217 @@ def test_doubleml_exception_data():
         _ = DoubleMLCVAR(DummyDataClass(pd.DataFrame(np.zeros((100, 10)))), ml_g, ml_m, treatment=1)
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLQTE(DummyDataClass(pd.DataFrame(np.zeros((100, 10)))), ml_g, ml_m)
-    msg = 'For repeated outcomes the data must be of DoubleMLData type.'
+    msg = "For repeated outcomes the data must be of DoubleMLData type."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLDID(DummyDataClass(pd.DataFrame(np.zeros((100, 10)))), ml_g, ml_m)
-    msg = 'For repeated cross sections the data must be of DoubleMLData type. '
+    msg = "For repeated cross sections the data must be of DoubleMLData type. "
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLDIDCS(DummyDataClass(pd.DataFrame(np.zeros((100, 10)))), ml_g, ml_m)
 
     # PLR with IV
-    msg = (r'Incompatible data. Z1 have been set as instrumental variable\(s\). '
-           'To fit a partially linear IV regression model use DoubleMLPLIV instead of DoubleMLPLR.')
+    msg = (
+        r"Incompatible data. Z1 have been set as instrumental variable\(s\). "
+        "To fit a partially linear IV regression model use DoubleMLPLIV instead of DoubleMLPLR."
+    )
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLPLR(dml_data_pliv, ml_l, ml_m)
 
     # PLIV without IV
-    msg = ('Incompatible data. '
-           'At least one variable must be set as instrumental variable. '
-           r'To fit a partially linear regression model without instrumental variable\(s\) '
-           'use DoubleMLPLR instead of DoubleMLPLIV.')
+    msg = (
+        "Incompatible data. "
+        "At least one variable must be set as instrumental variable. "
+        r"To fit a partially linear regression model without instrumental variable\(s\) "
+        "use DoubleMLPLR instead of DoubleMLPLIV."
+    )
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLPLIV(dml_data, Lasso(), Lasso(), Lasso())
 
     # IRM with IV
-    msg = (r'Incompatible data. z have been set as instrumental variable\(s\). '
-           'To fit an interactive IV regression model use DoubleMLIIVM instead of DoubleMLIRM.')
+    msg = (
+        r"Incompatible data. z have been set as instrumental variable\(s\). "
+        "To fit an interactive IV regression model use DoubleMLIIVM instead of DoubleMLIRM."
+    )
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLIRM(dml_data_iivm, Lasso(), LogisticRegression())
-    msg = ('Incompatible data. To fit an IRM model with DML exactly one binary variable with values 0 and 1 '
-           'needs to be specified as treatment variable.')
+    msg = (
+        "Incompatible data. To fit an IRM model with DML exactly one binary variable with values 0 and 1 "
+        "needs to be specified as treatment variable."
+    )
     df_irm = dml_data_irm.data.copy()
-    df_irm['d'] = df_irm['d'] * 2
+    df_irm["d"] = df_irm["d"] * 2
     with pytest.raises(ValueError, match=msg):
         # non-binary D for IRM
-        _ = DoubleMLIRM(DoubleMLData(df_irm, 'y', 'd'),
-                        Lasso(), LogisticRegression())
+        _ = DoubleMLIRM(DoubleMLData(df_irm, "y", "d"), Lasso(), LogisticRegression())
     with pytest.raises(ValueError, match=msg):
         # multiple D for IRM
-        _ = DoubleMLIRM(DoubleMLData(df_irm, 'y', ['d', 'X1']),
-                        Lasso(), LogisticRegression())
+        _ = DoubleMLIRM(DoubleMLData(df_irm, "y", ["d", "X1"]), Lasso(), LogisticRegression())
 
-    msg = ('Incompatible data. To fit an IIVM model with DML exactly one binary variable with values 0 and 1 '
-           'needs to be specified as treatment variable.')
+    msg = (
+        "Incompatible data. To fit an IIVM model with DML exactly one binary variable with values 0 and 1 "
+        "needs to be specified as treatment variable."
+    )
     df_iivm = dml_data_iivm.data.copy()
-    df_iivm['d'] = df_iivm['d'] * 2
+    df_iivm["d"] = df_iivm["d"] * 2
     with pytest.raises(ValueError, match=msg):
         # non-binary D for IIVM
-        _ = DoubleMLIIVM(DoubleMLData(df_iivm, 'y', 'd', z_cols='z'),
-                         Lasso(), LogisticRegression(), LogisticRegression())
+        _ = DoubleMLIIVM(DoubleMLData(df_iivm, "y", "d", z_cols="z"), Lasso(), LogisticRegression(), LogisticRegression())
     df_iivm = dml_data_iivm.data.copy()
     with pytest.raises(ValueError, match=msg):
         # multiple D for IIVM
-        _ = DoubleMLIIVM(DoubleMLData(df_iivm, 'y', ['d', 'X1'], z_cols='z'),
-                         Lasso(), LogisticRegression(), LogisticRegression())
+        _ = DoubleMLIIVM(
+            DoubleMLData(df_iivm, "y", ["d", "X1"], z_cols="z"), Lasso(), LogisticRegression(), LogisticRegression()
+        )
 
-    msg = ('Incompatible data. To fit an IIVM model with DML exactly one binary variable with values 0 and 1 '
-           'needs to be specified as instrumental variable.')
+    msg = (
+        "Incompatible data. To fit an IIVM model with DML exactly one binary variable with values 0 and 1 "
+        "needs to be specified as instrumental variable."
+    )
     with pytest.raises(ValueError, match=msg):
         # IIVM without IV
-        _ = DoubleMLIIVM(dml_data_irm,
-                         Lasso(), LogisticRegression(), LogisticRegression())
+        _ = DoubleMLIIVM(dml_data_irm, Lasso(), LogisticRegression(), LogisticRegression())
     df_iivm = dml_data_iivm.data.copy()
-    df_iivm['z'] = df_iivm['z'] * 2
+    df_iivm["z"] = df_iivm["z"] * 2
     with pytest.raises(ValueError, match=msg):
         # non-binary Z for IIVM
-        _ = DoubleMLIIVM(DoubleMLData(df_iivm, 'y', 'd', z_cols='z'),
-                         Lasso(), LogisticRegression(), LogisticRegression())
+        _ = DoubleMLIIVM(DoubleMLData(df_iivm, "y", "d", z_cols="z"), Lasso(), LogisticRegression(), LogisticRegression())
     df_iivm = dml_data_iivm.data.copy()
     with pytest.raises(ValueError, match=msg):
         # multiple Z for IIVM
-        _ = DoubleMLIIVM(DoubleMLData(df_iivm, 'y', 'd', z_cols=['z', 'X1']),
-                         Lasso(), LogisticRegression(), LogisticRegression())
+        _ = DoubleMLIIVM(
+            DoubleMLData(df_iivm, "y", "d", z_cols=["z", "X1"]), Lasso(), LogisticRegression(), LogisticRegression()
+        )
 
     # PQ with IV
-    msg = r'Incompatible data. z have been set as instrumental variable\(s\).'
+    msg = r"Incompatible data. z have been set as instrumental variable\(s\)."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLPQ(dml_data_iivm, LogisticRegression(), LogisticRegression(), treatment=1)
-    msg = ('Incompatible data. To fit an PQ model with DML exactly one binary variable with values 0 and 1 '
-           'needs to be specified as treatment variable.')
+    msg = (
+        "Incompatible data. To fit an PQ model with DML exactly one binary variable with values 0 and 1 "
+        "needs to be specified as treatment variable."
+    )
     df_irm = dml_data_irm.data.copy()
-    df_irm['d'] = df_irm['d'] * 2
+    df_irm["d"] = df_irm["d"] * 2
     with pytest.raises(ValueError, match=msg):
         # non-binary D for PQ
-        _ = DoubleMLPQ(DoubleMLData(df_irm, 'y', 'd'),
-                       LogisticRegression(), LogisticRegression(), treatment=1)
+        _ = DoubleMLPQ(DoubleMLData(df_irm, "y", "d"), LogisticRegression(), LogisticRegression(), treatment=1)
     df_irm = dml_data_irm.data.copy()
     with pytest.raises(ValueError, match=msg):
         # multiple D for PQ
-        _ = DoubleMLPQ(DoubleMLData(df_irm, 'y', ['d', 'X1']),
-                       LogisticRegression(), LogisticRegression(), treatment=1)
+        _ = DoubleMLPQ(DoubleMLData(df_irm, "y", ["d", "X1"]), LogisticRegression(), LogisticRegression(), treatment=1)
 
     # LPQ with non-binary treatment
-    msg = ('Incompatible data. To fit an LPQ model with DML exactly one binary variable with values 0 and 1 '
-           'needs to be specified as treatment variable.')
+    msg = (
+        "Incompatible data. To fit an LPQ model with DML exactly one binary variable with values 0 and 1 "
+        "needs to be specified as treatment variable."
+    )
     df_iivm = dml_data_iivm.data.copy()
-    df_iivm['d'] = df_iivm['d'] * 2
+    df_iivm["d"] = df_iivm["d"] * 2
     with pytest.raises(ValueError, match=msg):
         # non-binary D for LPQ
-        _ = DoubleMLLPQ(DoubleMLData(df_iivm, 'y', 'd', 'z'),
-                        LogisticRegression(), LogisticRegression(), treatment=1)
+        _ = DoubleMLLPQ(DoubleMLData(df_iivm, "y", "d", "z"), LogisticRegression(), LogisticRegression(), treatment=1)
     df_iivm = dml_data_iivm.data.copy()
     with pytest.raises(ValueError, match=msg):
         # multiple D for LPQ
-        _ = DoubleMLLPQ(DoubleMLData(df_iivm, 'y', ['d', 'X1'], 'z'),
-                        LogisticRegression(), LogisticRegression(), treatment=1)
-    msg = ('Incompatible data. To fit an LPQ model with DML exactly one binary variable with values 0 and 1 '
-           'needs to be specified as instrumental variable.')
+        _ = DoubleMLLPQ(DoubleMLData(df_iivm, "y", ["d", "X1"], "z"), LogisticRegression(), LogisticRegression(), treatment=1)
+    msg = (
+        "Incompatible data. To fit an LPQ model with DML exactly one binary variable with values 0 and 1 "
+        "needs to be specified as instrumental variable."
+    )
     df_iivm = dml_data_iivm.data.copy()
-    df_iivm['z'] = df_iivm['z'] * 2
+    df_iivm["z"] = df_iivm["z"] * 2
     with pytest.raises(ValueError, match=msg):
         # no instrument Z for LPQ
-        _ = DoubleMLLPQ(DoubleMLData(df_iivm, 'y', 'd', x_cols=['z']),
-                        LogisticRegression(), LogisticRegression(), treatment=1)
+        _ = DoubleMLLPQ(DoubleMLData(df_iivm, "y", "d", x_cols=["z"]), LogisticRegression(), LogisticRegression(), treatment=1)
     with pytest.raises(ValueError, match=msg):
         # non-binary Z for LPQ
-        _ = DoubleMLLPQ(DoubleMLData(df_iivm, 'y', 'd', z_cols=['z']),
-                        LogisticRegression(), LogisticRegression(), treatment=1)
+        _ = DoubleMLLPQ(DoubleMLData(df_iivm, "y", "d", z_cols=["z"]), LogisticRegression(), LogisticRegression(), treatment=1)
 
     # CVAR with IV
-    msg = r'Incompatible data. z have been set as instrumental variable\(s\).'
+    msg = r"Incompatible data. z have been set as instrumental variable\(s\)."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLCVAR(dml_data_iivm, Lasso(), LogisticRegression(), treatment=1)
-    msg = ('Incompatible data. To fit an CVaR model with DML exactly one binary variable with values 0 and 1 '
-           'needs to be specified as treatment variable.')
+    msg = (
+        "Incompatible data. To fit an CVaR model with DML exactly one binary variable with values 0 and 1 "
+        "needs to be specified as treatment variable."
+    )
     df_irm = dml_data_irm.data.copy()
-    df_irm['d'] = df_irm['d'] * 2
+    df_irm["d"] = df_irm["d"] * 2
     with pytest.raises(ValueError, match=msg):
         # non-binary D for CVAR
-        _ = DoubleMLCVAR(DoubleMLData(df_irm, 'y', 'd'),
-                         Lasso(), LogisticRegression(), treatment=1)
+        _ = DoubleMLCVAR(DoubleMLData(df_irm, "y", "d"), Lasso(), LogisticRegression(), treatment=1)
     df_irm = dml_data_irm.data.copy()
     with pytest.raises(ValueError, match=msg):
         # multiple D for CVAR
-        _ = DoubleMLCVAR(DoubleMLData(df_irm, 'y', ['d', 'X1']),
-                         Lasso(), LogisticRegression(), treatment=1)
+        _ = DoubleMLCVAR(DoubleMLData(df_irm, "y", ["d", "X1"]), Lasso(), LogisticRegression(), treatment=1)
 
     # QTE
-    msg = ('Incompatible data. To fit an PQ model with DML exactly one binary variable with values 0 and 1 '
-           'needs to be specified as treatment variable.')
+    msg = (
+        "Incompatible data. To fit an PQ model with DML exactly one binary variable with values 0 and 1 "
+        "needs to be specified as treatment variable."
+    )
     df_irm = dml_data_irm.data.copy()
-    df_irm['d'] = df_irm['d'] * 2
+    df_irm["d"] = df_irm["d"] * 2
     with pytest.raises(ValueError, match=msg):
         # non-binary D for QTE
-        _ = DoubleMLQTE(DoubleMLData(df_irm, 'y', 'd'),
-                        LogisticRegression(), LogisticRegression())
+        _ = DoubleMLQTE(DoubleMLData(df_irm, "y", "d"), LogisticRegression(), LogisticRegression())
     df_irm = dml_data_irm.data.copy()
     with pytest.raises(ValueError, match=msg):
         # multiple D for QTE
-        _ = DoubleMLQTE(DoubleMLData(df_irm, 'y', ['d', 'X1']),
-                        LogisticRegression(), LogisticRegression())
+        _ = DoubleMLQTE(DoubleMLData(df_irm, "y", ["d", "X1"]), LogisticRegression(), LogisticRegression())
 
     # DID with IV
-    msg = r'Incompatible data. z have been set as instrumental variable\(s\).'
+    msg = r"Incompatible data. z have been set as instrumental variable\(s\)."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLDID(dml_data_iivm, Lasso(), LogisticRegression())
-    msg = ('Incompatible data. To fit an DID model with DML exactly one binary variable with values 0 and 1 '
-           'needs to be specified as treatment variable.')
+    msg = (
+        "Incompatible data. To fit an DID model with DML exactly one binary variable with values 0 and 1 "
+        "needs to be specified as treatment variable."
+    )
     df_irm = dml_data_irm.data.copy()
-    df_irm['d'] = df_irm['d'] * 2
+    df_irm["d"] = df_irm["d"] * 2
     with pytest.raises(ValueError, match=msg):
         # non-binary D for DID
-        _ = DoubleMLDID(DoubleMLData(df_irm, 'y', 'd'),
-                        Lasso(), LogisticRegression())
+        _ = DoubleMLDID(DoubleMLData(df_irm, "y", "d"), Lasso(), LogisticRegression())
     df_irm = dml_data_irm.data.copy()
     with pytest.raises(ValueError, match=msg):
         # multiple D for DID
-        _ = DoubleMLDID(DoubleMLData(df_irm, 'y', ['d', 'X1']),
-                        Lasso(), LogisticRegression())
+        _ = DoubleMLDID(DoubleMLData(df_irm, "y", ["d", "X1"]), Lasso(), LogisticRegression())
 
     # DIDCS with IV
-    msg = r'Incompatible data. z have been set as instrumental variable\(s\).'
+    msg = r"Incompatible data. z have been set as instrumental variable\(s\)."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLDIDCS(dml_data_iivm, Lasso(), LogisticRegression())
 
     # DIDCS treatment exceptions
-    msg = ('Incompatible data. To fit an DIDCS model with DML exactly one binary variable with values 0 and 1 '
-           'needs to be specified as treatment variable.')
+    msg = (
+        "Incompatible data. To fit an DIDCS model with DML exactly one binary variable with values 0 and 1 "
+        "needs to be specified as treatment variable."
+    )
     df_did_cs = dml_data_did_cs.data.copy()
-    df_did_cs['d'] = df_did_cs['d'] * 2
+    df_did_cs["d"] = df_did_cs["d"] * 2
     with pytest.raises(ValueError, match=msg):
         # non-binary D for DIDCS
-        _ = DoubleMLDIDCS(DoubleMLData(df_did_cs, y_col='y', d_cols='d', t_col='t'),
-                          Lasso(), LogisticRegression())
+        _ = DoubleMLDIDCS(DoubleMLData(df_did_cs, y_col="y", d_cols="d", t_col="t"), Lasso(), LogisticRegression())
     df_did_cs = dml_data_did_cs.data.copy()
     with pytest.raises(ValueError, match=msg):
         # multiple D for DIDCS
-        _ = DoubleMLDIDCS(DoubleMLData(df_did_cs, y_col='y', d_cols=['d', 'Z1'], t_col='t'),
-                          Lasso(), LogisticRegression())
+        _ = DoubleMLDIDCS(DoubleMLData(df_did_cs, y_col="y", d_cols=["d", "Z1"], t_col="t"), Lasso(), LogisticRegression())
 
     # DIDCS time exceptions
-    msg = ('Incompatible data. To fit an DIDCS model with DML exactly one binary variable with values 0 and 1 '
-           'needs to be specified as time variable.')
+    msg = (
+        "Incompatible data. To fit an DIDCS model with DML exactly one binary variable with values 0 and 1 "
+        "needs to be specified as time variable."
+    )
     df_did_cs = dml_data_did_cs.data.copy()
-    df_did_cs['t'] = df_did_cs['t'] * 2
+    df_did_cs["t"] = df_did_cs["t"] * 2
     with pytest.raises(ValueError, match=msg):
         # non-binary t for DIDCS
-        _ = DoubleMLDIDCS(DoubleMLData(df_did_cs, y_col='y', d_cols='d', t_col='t'),
-                          Lasso(), LogisticRegression())
+        _ = DoubleMLDIDCS(DoubleMLData(df_did_cs, y_col="y", d_cols="d", t_col="t"), Lasso(), LogisticRegression())
 
 
 @pytest.mark.ci
 def test_doubleml_exception_framework():
-    msg = r'Apply fit\(\) before sensitivity_analysis\(\).'
+    msg = r"Apply fit\(\) before sensitivity_analysis\(\)."
     with pytest.raises(ValueError, match=msg):
         dml_obj = DoubleMLPLR(dml_data, ml_l, ml_m)
         dml_obj.sensitivity_analysis()
@@ -271,173 +299,207 @@ def test_doubleml_exception_framework():
 @pytest.mark.ci
 def test_doubleml_exception_scores():
     # PLR
-    msg = 'Invalid score IV. Valid score IV-type or partialling out.'
+    msg = "Invalid score IV. Valid score IV-type or partialling out."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLPLR(dml_data, ml_l, ml_m, score='IV')
-    msg = 'score should be either a string or a callable. 0 was passed.'
+        _ = DoubleMLPLR(dml_data, ml_l, ml_m, score="IV")
+    msg = "score should be either a string or a callable. 0 was passed."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLPLR(dml_data, ml_l, ml_m, score=0)
 
     # IRM
-    msg = 'Invalid score IV. Valid score ATE or ATTE.'
+    msg = "Invalid score IV. Valid score ATE or ATTE."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), score='IV')
-    msg = 'score should be either a string or a callable. 0 was passed.'
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), score="IV")
+    msg = "score should be either a string or a callable. 0 was passed."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), score=0)
 
     # IIVM
-    msg = 'Invalid score ATE. Valid score LATE.'
+    msg = "Invalid score ATE. Valid score LATE."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(), score='ATE')
-    msg = 'score should be either a string or a callable. 0 was passed.'
+        _ = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(), score="ATE")
+    msg = "score should be either a string or a callable. 0 was passed."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(), score=0)
 
     # PLIV
-    msg = 'Invalid score IV. Valid score partialling out.'
+    msg = "Invalid score IV. Valid score partialling out."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso(), score='IV')
-    msg = 'score should be either a string or a callable. 0 was passed.'
+        _ = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso(), score="IV")
+    msg = "score should be either a string or a callable. 0 was passed."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso(), score=0)
 
     # PQ
-    msg = 'Invalid score IV. Valid score PQ.'
+    msg = "Invalid score IV. Valid score PQ."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLPQ(dml_data_irm, LogisticRegression(), LogisticRegression(), treatment=1, score='IV')
-    msg = 'score should be a string. 2 was passed.'
+        _ = DoubleMLPQ(dml_data_irm, LogisticRegression(), LogisticRegression(), treatment=1, score="IV")
+    msg = "score should be a string. 2 was passed."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLPQ(dml_data_irm, LogisticRegression(), LogisticRegression(), treatment=1, score=2)
 
     # LPQ
-    msg = 'Invalid score IV. Valid score LPQ.'
+    msg = "Invalid score IV. Valid score LPQ."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLLPQ(dml_data_iivm, LogisticRegression(), LogisticRegression(), treatment=1, score='IV')
-    msg = 'score should be a string. 2 was passed.'
+        _ = DoubleMLLPQ(dml_data_iivm, LogisticRegression(), LogisticRegression(), treatment=1, score="IV")
+    msg = "score should be a string. 2 was passed."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLLPQ(dml_data_iivm, LogisticRegression(), LogisticRegression(), treatment=1, score=2)
 
     # CVaR
-    msg = 'Invalid score IV. Valid score CVaR.'
+    msg = "Invalid score IV. Valid score CVaR."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLCVAR(dml_data_irm, LogisticRegression(), LogisticRegression(), treatment=1, score='IV')
-    msg = 'score should be a string. 2 was passed.'
+        _ = DoubleMLCVAR(dml_data_irm, LogisticRegression(), LogisticRegression(), treatment=1, score="IV")
+    msg = "score should be a string. 2 was passed."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLCVAR(dml_data_irm, LogisticRegression(), LogisticRegression(), treatment=1, score=2)
 
     # QTE
-    msg = 'Invalid score IV. Valid score PQ or LPQ or CVaR.'
+    msg = "Invalid score IV. Valid score PQ or LPQ or CVaR."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(), score='IV')
-    msg = 'score should be a string. 2 was passed.'
+        _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(), score="IV")
+    msg = "score should be a string. 2 was passed."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(), score=2)
 
     # DID
-    msg = 'Invalid score IV. Valid score observational or experimental.'
+    msg = "Invalid score IV. Valid score observational or experimental."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLDID(dml_data_did, Lasso(), LogisticRegression(), score='IV')
-    msg = 'score should be a string. 2 was passed.'
+        _ = DoubleMLDID(dml_data_did, Lasso(), LogisticRegression(), score="IV")
+    msg = "score should be a string. 2 was passed."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLDID(dml_data_did, Lasso(), LogisticRegression(), score=2)
 
     # DIDCS
-    msg = 'Invalid score IV. Valid score observational or experimental.'
+    msg = "Invalid score IV. Valid score observational or experimental."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(), score='IV')
-    msg = 'score should be a string. 2 was passed.'
+        _ = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(), score="IV")
+    msg = "score should be a string. 2 was passed."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(), score=2)
 
 
 @pytest.mark.ci
 def test_doubleml_exception_trimming_rule():
-    msg = 'Invalid trimming_rule discard. Valid trimming_rule truncate.'
+    msg = "Invalid trimming_rule discard. Valid trimming_rule truncate."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_rule='discard')
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_rule="discard")
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(), trimming_rule='discard')
+        _ = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(), trimming_rule="discard")
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLPQ(dml_data_irm, LogisticRegression(), LogisticRegression(), treatment=1, trimming_rule='discard')
+        _ = DoubleMLPQ(dml_data_irm, LogisticRegression(), LogisticRegression(), treatment=1, trimming_rule="discard")
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLLPQ(dml_data_iivm, LogisticRegression(), LogisticRegression(), treatment=1, trimming_rule='discard')
+        _ = DoubleMLLPQ(dml_data_iivm, LogisticRegression(), LogisticRegression(), treatment=1, trimming_rule="discard")
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLCVAR(dml_data_irm, LogisticRegression(), LogisticRegression(), treatment=1, trimming_rule='discard')
+        _ = DoubleMLCVAR(dml_data_irm, LogisticRegression(), LogisticRegression(), treatment=1, trimming_rule="discard")
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule='discard')
+        _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="discard")
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLDID(dml_data_did, Lasso(), LogisticRegression(), trimming_rule='discard')
+        _ = DoubleMLDID(dml_data_did, Lasso(), LogisticRegression(), trimming_rule="discard")
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(), trimming_rule='discard')
+        _ = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(), trimming_rule="discard")
 
     # check the trimming_threshold exceptions
     msg = "trimming_threshold has to be a float. Object of type <class 'str'> passed."
     with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                        trimming_rule='truncate', trimming_threshold="0.1")
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(),
-                         trimming_rule='truncate', trimming_threshold="0.1")
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLPQ(dml_data_irm, LogisticRegression(), LogisticRegression(), treatment=1,
-                       trimming_rule='truncate', trimming_threshold="0.1")
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLLPQ(dml_data_iivm, LogisticRegression(), LogisticRegression(), treatment=1,
-                        trimming_rule='truncate', trimming_threshold="0.1")
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLCVAR(dml_data_irm, Lasso(), LogisticRegression(), treatment=1,
-                         trimming_rule='truncate', trimming_threshold="0.1")
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(),
-                        trimming_rule='truncate', trimming_threshold="0.1")
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLDID(dml_data_did, Lasso(), LogisticRegression(),
-                        trimming_rule='truncate', trimming_threshold="0.1")
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(),
-                          trimming_rule='truncate', trimming_threshold="0.1")
-
-    msg = 'Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5.'
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                        trimming_rule='truncate', trimming_threshold=0.6)
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(),
-                         trimming_rule='truncate', trimming_threshold=0.6)
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLPQ(dml_data_irm, LogisticRegression(), LogisticRegression(), treatment=1,
-                       trimming_rule='truncate', trimming_threshold=0.6)
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLLPQ(dml_data_iivm, LogisticRegression(), LogisticRegression(), treatment=1,
-                        trimming_rule='truncate', trimming_threshold=0.6)
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLCVAR(dml_data_irm, Lasso(), LogisticRegression(), treatment=1,
-                         trimming_rule='truncate', trimming_threshold=0.6)
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(),
-                        trimming_rule='truncate', trimming_threshold=0.6)
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLDID(dml_data_did, Lasso(), LogisticRegression(),
-                        trimming_rule='truncate', trimming_threshold=0.6)
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(),
-                          trimming_rule='truncate', trimming_threshold=0.6)
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold="0.1")
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLIIVM(
+            dml_data_iivm,
+            Lasso(),
+            LogisticRegression(),
+            LogisticRegression(),
+            trimming_rule="truncate",
+            trimming_threshold="0.1",
+        )
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLPQ(
+            dml_data_irm,
+            LogisticRegression(),
+            LogisticRegression(),
+            treatment=1,
+            trimming_rule="truncate",
+            trimming_threshold="0.1",
+        )
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLLPQ(
+            dml_data_iivm,
+            LogisticRegression(),
+            LogisticRegression(),
+            treatment=1,
+            trimming_rule="truncate",
+            trimming_threshold="0.1",
+        )
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLCVAR(
+            dml_data_irm, Lasso(), LogisticRegression(), treatment=1, trimming_rule="truncate", trimming_threshold="0.1"
+        )
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLQTE(
+            dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="truncate", trimming_threshold="0.1"
+        )
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLDID(dml_data_did, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold="0.1")
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold="0.1")
+
+    msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold=0.6)
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIIVM(
+            dml_data_iivm,
+            Lasso(),
+            LogisticRegression(),
+            LogisticRegression(),
+            trimming_rule="truncate",
+            trimming_threshold=0.6,
+        )
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLPQ(
+            dml_data_irm,
+            LogisticRegression(),
+            LogisticRegression(),
+            treatment=1,
+            trimming_rule="truncate",
+            trimming_threshold=0.6,
+        )
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLLPQ(
+            dml_data_iivm,
+            LogisticRegression(),
+            LogisticRegression(),
+            treatment=1,
+            trimming_rule="truncate",
+            trimming_threshold=0.6,
+        )
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLCVAR(
+            dml_data_irm, Lasso(), LogisticRegression(), treatment=1, trimming_rule="truncate", trimming_threshold=0.6
+        )
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLQTE(
+            dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="truncate", trimming_threshold=0.6
+        )
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLDID(dml_data_did, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold=0.6)
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold=0.6)
 
 
 @pytest.mark.ci
 def test_doubleml_exception_weights():
-
     msg = "weights must be a numpy array or dictionary. weights of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights=1)
     msg = r"weights must have keys \['weights', 'weights_bar'\]. keys dict_keys\(\['d'\]\) were passed."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights={'d': [1, 2, 3]})
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights={"d": [1, 2, 3]})
     msg = "weights must be a numpy array for ATTE score. weights of type <class 'dict'> was passed."
     with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                        score='ATTE', weights={'weights': np.ones_like(dml_data_irm.d)})
+        _ = DoubleMLIRM(
+            dml_data_irm, Lasso(), LogisticRegression(), score="ATTE", weights={"weights": np.ones_like(dml_data_irm.d)}
+        )
 
     # shape checks
     msg = rf"weights must have shape \({n},\). weights of shape \(1,\) was passed."
@@ -449,46 +511,85 @@ def test_doubleml_exception_weights():
 
     msg = rf"weights must have shape \({n},\). weights of shape \(1,\) was passed."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                        weights={'weights': np.ones(1), 'weights_bar': np.ones(1)})
+        _ = DoubleMLIRM(
+            dml_data_irm, Lasso(), LogisticRegression(), weights={"weights": np.ones(1), "weights_bar": np.ones(1)}
+        )
     msg = rf"weights must have shape \({n},\). weights of shape \({n}, 2\) was passed."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                        weights={'weights': np.ones((n, 2)), 'weights_bar': np.ones((n, 2))})
+        _ = DoubleMLIRM(
+            dml_data_irm, Lasso(), LogisticRegression(), weights={"weights": np.ones((n, 2)), "weights_bar": np.ones((n, 2))}
+        )
     msg = rf"weights_bar must have shape \({n}, 1\). weights_bar of shape \({n}, 2\) was passed."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                        weights={'weights': np.ones(n), 'weights_bar': np.ones((n, 2))})
+        _ = DoubleMLIRM(
+            dml_data_irm, Lasso(), LogisticRegression(), weights={"weights": np.ones(n), "weights_bar": np.ones((n, 2))}
+        )
 
     # value checks
     msg = "All weights values must be greater or equal 0."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                        weights=-1*np.ones(n,))
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                        weights={'weights': -1*np.ones(n,), 'weights_bar': np.ones((n, 1))})
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                        weights={'weights': np.ones(n,), 'weights_bar': -1*np.ones((n, 1))})
+        _ = DoubleMLIRM(
+            dml_data_irm,
+            Lasso(),
+            LogisticRegression(),
+            weights=-1
+            * np.ones(
+                n,
+            ),
+        )
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(
+            dml_data_irm,
+            Lasso(),
+            LogisticRegression(),
+            weights={
+                "weights": -1
+                * np.ones(
+                    n,
+                ),
+                "weights_bar": np.ones((n, 1)),
+            },
+        )
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(
+            dml_data_irm,
+            Lasso(),
+            LogisticRegression(),
+            weights={
+                "weights": np.ones(
+                    n,
+                ),
+                "weights_bar": -1 * np.ones((n, 1)),
+            },
+        )
 
     msg = "At least one weight must be non-zero."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                        weights=np.zeros((dml_data_irm.d.shape[0], )))
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights=np.zeros((dml_data_irm.d.shape[0],)))
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                        weights={'weights': np.zeros((dml_data_irm.d.shape[0], )),
-                                 'weights_bar': np.ones((dml_data_irm.d.shape[0], 1))})
+        _ = DoubleMLIRM(
+            dml_data_irm,
+            Lasso(),
+            LogisticRegression(),
+            weights={"weights": np.zeros((dml_data_irm.d.shape[0],)), "weights_bar": np.ones((dml_data_irm.d.shape[0], 1))},
+        )
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                        weights={'weights': np.ones((dml_data_irm.d.shape[0], )),
-                                 'weights_bar': np.zeros((dml_data_irm.d.shape[0], 1))})
+        _ = DoubleMLIRM(
+            dml_data_irm,
+            Lasso(),
+            LogisticRegression(),
+            weights={"weights": np.ones((dml_data_irm.d.shape[0],)), "weights_bar": np.zeros((dml_data_irm.d.shape[0], 1))},
+        )
 
     msg = "weights must be binary for ATTE score."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                        score='ATTE', weights=np.random.choice([0, 0.2], dml_data_irm.d.shape[0]))
+        _ = DoubleMLIRM(
+            dml_data_irm,
+            Lasso(),
+            LogisticRegression(),
+            score="ATTE",
+            weights=np.random.choice([0, 0.2], dml_data_irm.d.shape[0]),
+        )
 
 
 @pytest.mark.ci
@@ -503,13 +604,13 @@ def test_doubleml_exception_quantiles():
 
     msg = "Quantile has be between 0 or 1. Quantile 1.0 passed."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLPQ(dml_data_irm, ml_g, ml_m, treatment=1, quantile=1.)
+        _ = DoubleMLPQ(dml_data_irm, ml_g, ml_m, treatment=1, quantile=1.0)
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLLPQ(dml_data_iivm, ml_g, ml_m, treatment=1, quantile=1.)
+        _ = DoubleMLLPQ(dml_data_iivm, ml_g, ml_m, treatment=1, quantile=1.0)
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLCVAR(dml_data_irm, ml_g, ml_m, treatment=1, quantile=1.)
+        _ = DoubleMLCVAR(dml_data_irm, ml_g, ml_m, treatment=1, quantile=1.0)
 
-    msg = r'Quantiles have be between 0 or 1. Quantiles \[0.2 2. \] passed.'
+    msg = r"Quantiles have be between 0 or 1. Quantiles \[0.2 2. \] passed."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLQTE(dml_data_irm, ml_g, ml_m, quantiles=[0.2, 2])
 
@@ -570,32 +671,45 @@ def test_doubleml_exception_ipw_normalization():
 
 @pytest.mark.ci
 def test_doubleml_exception_subgroups():
-    msg = 'Invalid subgroups True. subgroups must be of type dictionary.'
+    msg = "Invalid subgroups True. subgroups must be of type dictionary."
     with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(),
-                         subgroups=True)
+        _ = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(), subgroups=True)
     msg = "Invalid subgroups {'abs': True}. subgroups must be a dictionary with keys always_takers and never_takers."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(),
-                         subgroups={'abs': True})
-    msg = ("Invalid subgroups {'always_takers': True, 'never_takers': False, 'abs': 5}. "
-           "subgroups must be a dictionary with keys always_takers and never_takers.")
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(),
-                         subgroups={'always_takers': True, 'never_takers': False, 'abs': 5})
-    msg = ("Invalid subgroups {'always_takers': True}. "
-           "subgroups must be a dictionary with keys always_takers and never_takers.")
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(),
-                         subgroups={'always_takers': True})
+        _ = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(), subgroups={"abs": True})
+    msg = (
+        "Invalid subgroups {'always_takers': True, 'never_takers': False, 'abs': 5}. "
+        "subgroups must be a dictionary with keys always_takers and never_takers."
+    )
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIIVM(
+            dml_data_iivm,
+            Lasso(),
+            LogisticRegression(),
+            LogisticRegression(),
+            subgroups={"always_takers": True, "never_takers": False, "abs": 5},
+        )
+    msg = "Invalid subgroups {'always_takers': True}. subgroups must be a dictionary with keys always_takers and never_takers."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(), subgroups={"always_takers": True})
     msg = r"subgroups\['always_takers'\] must be True or False. Got 5."
     with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(),
-                         subgroups={'always_takers': 5, 'never_takers': False})
+        _ = DoubleMLIIVM(
+            dml_data_iivm,
+            Lasso(),
+            LogisticRegression(),
+            LogisticRegression(),
+            subgroups={"always_takers": 5, "never_takers": False},
+        )
     msg = r"subgroups\['never_takers'\] must be True or False. Got 5."
     with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(),
-                         subgroups={'always_takers': True, 'never_takers': 5})
+        _ = DoubleMLIIVM(
+            dml_data_iivm,
+            Lasso(),
+            LogisticRegression(),
+            LogisticRegression(),
+            subgroups={"always_takers": True, "never_takers": 5},
+        )
 
 
 @pytest.mark.ci
@@ -603,45 +717,46 @@ def test_doubleml_exception_resampling():
     msg = "The number of folds must be of int type. 1.5 of type <class 'float'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=1.5)
-    msg = ('The number of repetitions for the sample splitting must be of int type. '
-           "1.5 of type <class 'float'> was passed.")
+    msg = "The number of repetitions for the sample splitting must be of int type. 1.5 of type <class 'float'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLPLR(dml_data, ml_l, ml_m, n_rep=1.5)
-    msg = 'The number of folds must be positive. 0 was passed.'
+    msg = "The number of folds must be positive. 0 was passed."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=0)
-    msg = 'The number of repetitions for the sample splitting must be positive. 0 was passed.'
+    msg = "The number of repetitions for the sample splitting must be positive. 0 was passed."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLPLR(dml_data, ml_l, ml_m, n_rep=0)
-    msg = 'draw_sample_splitting must be True or False. Got true.'
+    msg = "draw_sample_splitting must be True or False. Got true."
     with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLPLR(dml_data, ml_l, ml_m, draw_sample_splitting='true')
+        _ = DoubleMLPLR(dml_data, ml_l, ml_m, draw_sample_splitting="true")
 
 
 @pytest.mark.ci
 def test_doubleml_exception_onefold():
-    msg = 'n_folds must be greater than 1. You can use set_sample_splitting with a tuple to only use one fold.'
+    msg = "n_folds must be greater than 1. You can use set_sample_splitting with a tuple to only use one fold."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=1)
 
 
 @pytest.mark.ci
 def test_doubleml_exception_get_params():
-    msg = 'Invalid nuisance learner ml_r. Valid nuisance learner ml_l or ml_m.'
+    msg = "Invalid nuisance learner ml_r. Valid nuisance learner ml_l or ml_m."
     with pytest.raises(ValueError, match=msg):
-        dml_plr.get_params('ml_r')
-    msg = 'Invalid nuisance learner ml_g. Valid nuisance learner ml_l or ml_m.'
+        dml_plr.get_params("ml_r")
+    msg = "Invalid nuisance learner ml_g. Valid nuisance learner ml_l or ml_m."
     with pytest.raises(ValueError, match=msg):
-        dml_plr.get_params('ml_g')
-    msg = 'Invalid nuisance learner ml_r. Valid nuisance learner ml_l or ml_m or ml_g.'
+        dml_plr.get_params("ml_g")
+    msg = "Invalid nuisance learner ml_r. Valid nuisance learner ml_l or ml_m or ml_g."
     with pytest.raises(ValueError, match=msg):
-        dml_plr_iv_type.get_params('ml_r')
+        dml_plr_iv_type.get_params("ml_r")
 
 
 @pytest.mark.ci
 def test_doubleml_exception_smpls():
-    msg = ('Sample splitting not specified. '
-           r'Either draw samples via .draw_sample splitting\(\) or set external samples via .set_sample_splitting\(\).')
+    msg = (
+        "Sample splitting not specified. "
+        r"Either draw samples via .draw_sample splitting\(\) or set external samples via .set_sample_splitting\(\)."
+    )
     dml_plr_no_smpls = DoubleMLPLR(dml_data, ml_l, ml_m, draw_sample_splitting=False)
     with pytest.raises(ValueError, match=msg):
         _ = dml_plr_no_smpls.smpls
@@ -651,52 +766,44 @@ def test_doubleml_exception_smpls():
 
     dml_pliv_cluster = DoubleMLPLIV(dml_cluster_data_pliv, ml_g, ml_m, ml_r)
     smpls = dml_plr.smpls
-    msg = ('For cluster data, all_smpls_cluster must be provided.')
+    msg = "For cluster data, all_smpls_cluster must be provided."
     with pytest.raises(ValueError, match=msg):
         _ = dml_pliv_cluster.set_sample_splitting(smpls)
 
     all_smpls_cluster = copy.deepcopy(dml_pliv_cluster.smpls_cluster)
     all_smpls_cluster.append(all_smpls_cluster[0])
-    msg = ('Invalid samples provided. Number of repetitions for all_smpls and all_smpls_cluster must be the same.')
+    msg = "Invalid samples provided. Number of repetitions for all_smpls and all_smpls_cluster must be the same."
     with pytest.raises(ValueError, match=msg):
-        _ = dml_pliv_cluster.set_sample_splitting(
-            all_smpls=dml_pliv_cluster.smpls,
-            all_smpls_cluster=all_smpls_cluster)
+        _ = dml_pliv_cluster.set_sample_splitting(all_smpls=dml_pliv_cluster.smpls, all_smpls_cluster=all_smpls_cluster)
 
     all_smpls_cluster = copy.deepcopy(dml_pliv_cluster.smpls_cluster)
     all_smpls_cluster[0] = all_smpls_cluster[0][0]
-    msg = ('Invalid samples provided. Number of folds for all_smpls and all_smpls_cluster must be the same.')
+    msg = "Invalid samples provided. Number of folds for all_smpls and all_smpls_cluster must be the same."
     with pytest.raises(ValueError, match=msg):
-        _ = dml_pliv_cluster.set_sample_splitting(
-            all_smpls=dml_pliv_cluster.smpls,
-            all_smpls_cluster=all_smpls_cluster)
+        _ = dml_pliv_cluster.set_sample_splitting(all_smpls=dml_pliv_cluster.smpls, all_smpls_cluster=all_smpls_cluster)
 
     all_smpls_cluster = copy.deepcopy(dml_pliv_cluster.smpls_cluster)
     all_smpls_cluster[0][0][1][1] = np.append(all_smpls_cluster[0][0][1][1], [11], axis=0)
-    msg = ('Invalid cluster partition provided. At least one inner list does not form a partition.')
+    msg = "Invalid cluster partition provided. At least one inner list does not form a partition."
     with pytest.raises(ValueError, match=msg):
-        _ = dml_pliv_cluster.set_sample_splitting(
-            all_smpls=dml_pliv_cluster.smpls,
-            all_smpls_cluster=all_smpls_cluster)
+        _ = dml_pliv_cluster.set_sample_splitting(all_smpls=dml_pliv_cluster.smpls, all_smpls_cluster=all_smpls_cluster)
 
     all_smpls_cluster = copy.deepcopy(dml_pliv_cluster.smpls_cluster)
     all_smpls_cluster[0][0][1][1][1] = 11
-    msg = ('Invalid cluster partition provided. At least one inner list does not form a partition.')
+    msg = "Invalid cluster partition provided. At least one inner list does not form a partition."
     with pytest.raises(ValueError, match=msg):
-        _ = dml_pliv_cluster.set_sample_splitting(
-            all_smpls=dml_pliv_cluster.smpls,
-            all_smpls_cluster=all_smpls_cluster)
+        _ = dml_pliv_cluster.set_sample_splitting(all_smpls=dml_pliv_cluster.smpls, all_smpls_cluster=all_smpls_cluster)
 
 
 @pytest.mark.ci
 def test_doubleml_exception_fit():
     msg = "The number of CPUs used to fit the learners must be of int type. 5 of type <class 'str'> was passed."
     with pytest.raises(TypeError, match=msg):
-        dml_plr.fit(n_jobs_cv='5')
-    msg = 'store_predictions must be True or False. Got 1.'
+        dml_plr.fit(n_jobs_cv="5")
+    msg = "store_predictions must be True or False. Got 1."
     with pytest.raises(TypeError, match=msg):
         dml_plr.fit(store_predictions=1)
-    msg = 'store_models must be True or False. Got 1.'
+    msg = "store_models must be True or False. Got 1."
     with pytest.raises(TypeError, match=msg):
         dml_plr.fit(store_models=1)
 
@@ -704,18 +811,18 @@ def test_doubleml_exception_fit():
 @pytest.mark.ci
 def test_doubleml_exception_bootstrap():
     dml_plr_boot = DoubleMLPLR(dml_data, ml_l, ml_m)
-    msg = r'Apply fit\(\) before bootstrap\(\).'
+    msg = r"Apply fit\(\) before bootstrap\(\)."
     with pytest.raises(ValueError, match=msg):
         dml_plr_boot.bootstrap()
 
     dml_plr_boot.fit()
     msg = 'Method must be "Bayes", "normal" or "wild". Got Gaussian.'
     with pytest.raises(ValueError, match=msg):
-        dml_plr_boot.bootstrap(method='Gaussian')
+        dml_plr_boot.bootstrap(method="Gaussian")
     msg = "The number of bootstrap replications must be of int type. 500 of type <class 'str'> was passed."
     with pytest.raises(TypeError, match=msg):
-        dml_plr_boot.bootstrap(n_rep_boot='500')
-    msg = 'The number of bootstrap replications must be positive. 0 was passed.'
+        dml_plr_boot.bootstrap(n_rep_boot="500")
+    msg = "The number of bootstrap replications must be positive. 0 was passed."
     with pytest.raises(ValueError, match=msg):
         dml_plr_boot.bootstrap(n_rep_boot=0)
 
@@ -725,21 +832,21 @@ def test_doubleml_exception_confint():
     dml_plr_confint = DoubleMLPLR(dml_data, ml_l, ml_m)
     dml_plr_confint.fit()
 
-    msg = 'joint must be True or False. Got 1.'
+    msg = "joint must be True or False. Got 1."
     with pytest.raises(TypeError, match=msg):
         dml_plr_confint.confint(joint=1)
     msg = "The confidence level must be of float type. 5% of type <class 'str'> was passed."
     with pytest.raises(TypeError, match=msg):
-        dml_plr_confint.confint(level='5%')
-    msg = r'The confidence level must be in \(0,1\). 0.0 was passed.'
+        dml_plr_confint.confint(level="5%")
+    msg = r"The confidence level must be in \(0,1\). 0.0 was passed."
     with pytest.raises(ValueError, match=msg):
-        dml_plr_confint.confint(level=0.)
+        dml_plr_confint.confint(level=0.0)
 
     dml_plr_confint_not_fitted = DoubleMLPLR(dml_data, ml_l, ml_m)
-    msg = r'Apply fit\(\) before confint\(\).'
+    msg = r"Apply fit\(\) before confint\(\)."
     with pytest.raises(ValueError, match=msg):
         dml_plr_confint_not_fitted.confint()
-    msg = r'Apply bootstrap\(\) before confint\(joint=True\).'
+    msg = r"Apply bootstrap\(\) before confint\(joint=True\)."
     with pytest.raises(ValueError, match=msg):
         dml_plr_confint.confint(joint=True)
     dml_plr_confint.bootstrap()
@@ -751,15 +858,15 @@ def test_doubleml_exception_confint():
 def test_doubleml_exception_p_adjust():
     dml_plr_p_adjust = DoubleMLPLR(dml_data, ml_l, ml_m)
 
-    msg = r'Apply fit\(\) before p_adjust\(\).'
+    msg = r"Apply fit\(\) before p_adjust\(\)."
     with pytest.raises(ValueError, match=msg):
         dml_plr_p_adjust.p_adjust()
     dml_plr_p_adjust.fit()
     msg = r'Apply bootstrap\(\) before p_adjust\("romano-wolf"\).'
     with pytest.raises(ValueError, match=msg):
-        dml_plr_p_adjust.p_adjust(method='romano-wolf')
+        dml_plr_p_adjust.p_adjust(method="romano-wolf")
     dml_plr_p_adjust.bootstrap()
-    p_val = dml_plr_p_adjust.p_adjust(method='romano-wolf')
+    p_val = dml_plr_p_adjust.p_adjust(method="romano-wolf")
     assert isinstance(p_val, pd.DataFrame)
 
     msg = "The p_adjust method must be of str type. 0.05 of type <class 'float'> was passed."
@@ -769,74 +876,78 @@ def test_doubleml_exception_p_adjust():
 
 @pytest.mark.ci
 def test_doubleml_exception_tune():
-    msg = r'Invalid param_grids \[0.05, 0.5\]. param_grids must be a dictionary with keys ml_l and ml_m'
+    msg = r"Invalid param_grids \[0.05, 0.5\]. param_grids must be a dictionary with keys ml_l and ml_m"
     with pytest.raises(ValueError, match=msg):
         dml_plr.tune([0.05, 0.5])
-    msg = (r"Invalid param_grids {'ml_r': {'alpha': \[0.05, 0.5\]}}. "
-           "param_grids must be a dictionary with keys ml_l and ml_m.")
+    msg = (
+        r"Invalid param_grids {'ml_r': {'alpha': \[0.05, 0.5\]}}. " "param_grids must be a dictionary with keys ml_l and ml_m."
+    )
     with pytest.raises(ValueError, match=msg):
-        dml_plr.tune({'ml_r': {'alpha': [0.05, 0.5]}})
+        dml_plr.tune({"ml_r": {"alpha": [0.05, 0.5]}})
 
-    msg = r'Invalid param_grids \[0.05, 0.5\]. param_grids must be a dictionary with keys ml_l and ml_m and ml_g'
+    msg = r"Invalid param_grids \[0.05, 0.5\]. param_grids must be a dictionary with keys ml_l and ml_m and ml_g"
     with pytest.raises(ValueError, match=msg):
         dml_plr_iv_type.tune([0.05, 0.5])
-    msg = (r"Invalid param_grids {'ml_g': {'alpha': \[0.05, 0.5\]}, 'ml_m': {'alpha': \[0.05, 0.5\]}}. "
-           "param_grids must be a dictionary with keys ml_l and ml_m and ml_g.")
+    msg = (
+        r"Invalid param_grids {'ml_g': {'alpha': \[0.05, 0.5\]}, 'ml_m': {'alpha': \[0.05, 0.5\]}}. "
+        "param_grids must be a dictionary with keys ml_l and ml_m and ml_g."
+    )
     with pytest.raises(ValueError, match=msg):
-        dml_plr_iv_type.tune({'ml_g': {'alpha': [0.05, 0.5]},
-                              'ml_m': {'alpha': [0.05, 0.5]}})
+        dml_plr_iv_type.tune({"ml_g": {"alpha": [0.05, 0.5]}, "ml_m": {"alpha": [0.05, 0.5]}})
 
-    param_grids = {'ml_l': {'alpha': [0.05, 0.5]}, 'ml_m': {'alpha': [0.05, 0.5]}}
-    msg = ('Invalid scoring_methods neg_mean_absolute_error. '
-           'scoring_methods must be a dictionary. '
-           'Valid keys are ml_l and ml_m.')
+    param_grids = {"ml_l": {"alpha": [0.05, 0.5]}, "ml_m": {"alpha": [0.05, 0.5]}}
+    msg = (
+        "Invalid scoring_methods neg_mean_absolute_error. scoring_methods must be a dictionary. Valid keys are ml_l and ml_m."
+    )
     with pytest.raises(ValueError, match=msg):
-        dml_plr.tune(param_grids, scoring_methods='neg_mean_absolute_error')
+        dml_plr.tune(param_grids, scoring_methods="neg_mean_absolute_error")
 
-    msg = 'tune_on_folds must be True or False. Got 1.'
+    msg = "tune_on_folds must be True or False. Got 1."
     with pytest.raises(TypeError, match=msg):
         dml_plr.tune(param_grids, tune_on_folds=1)
 
-    msg = 'The number of folds used for tuning must be at least two. 1 was passed.'
+    msg = "The number of folds used for tuning must be at least two. 1 was passed."
     with pytest.raises(ValueError, match=msg):
         dml_plr.tune(param_grids, n_folds_tune=1)
     msg = "The number of folds used for tuning must be of int type. 1.0 of type <class 'float'> was passed."
     with pytest.raises(TypeError, match=msg):
-        dml_plr.tune(param_grids, n_folds_tune=1.)
+        dml_plr.tune(param_grids, n_folds_tune=1.0)
 
     msg = 'search_mode must be "grid_search" or "randomized_search". Got gridsearch.'
     with pytest.raises(ValueError, match=msg):
-        dml_plr.tune(param_grids, search_mode='gridsearch')
+        dml_plr.tune(param_grids, search_mode="gridsearch")
 
-    msg = 'The number of parameter settings sampled for the randomized search must be at least two. 1 was passed.'
+    msg = "The number of parameter settings sampled for the randomized search must be at least two. 1 was passed."
     with pytest.raises(ValueError, match=msg):
         dml_plr.tune(param_grids, n_iter_randomized_search=1)
-    msg = ("The number of parameter settings sampled for the randomized search must be of int type. "
-           "1.0 of type <class 'float'> was passed.")
+    msg = (
+        "The number of parameter settings sampled for the randomized search must be of int type. "
+        "1.0 of type <class 'float'> was passed."
+    )
     with pytest.raises(TypeError, match=msg):
-        dml_plr.tune(param_grids, n_iter_randomized_search=1.)
+        dml_plr.tune(param_grids, n_iter_randomized_search=1.0)
 
     msg = "The number of CPUs used to fit the learners must be of int type. 5 of type <class 'str'> was passed."
     with pytest.raises(TypeError, match=msg):
-        dml_plr.tune(param_grids, n_jobs_cv='5')
+        dml_plr.tune(param_grids, n_jobs_cv="5")
 
-    msg = 'set_as_params must be True or False. Got 1.'
+    msg = "set_as_params must be True or False. Got 1."
     with pytest.raises(TypeError, match=msg):
         dml_plr.tune(param_grids, set_as_params=1)
 
-    msg = 'return_tune_res must be True or False. Got 1.'
+    msg = "return_tune_res must be True or False. Got 1."
     with pytest.raises(TypeError, match=msg):
         dml_plr.tune(param_grids, return_tune_res=1)
 
 
 @pytest.mark.ci
 def test_doubleml_exception_set_ml_nuisance_params():
-    msg = 'Invalid nuisance learner g. Valid nuisance learner ml_l or ml_m.'
+    msg = "Invalid nuisance learner g. Valid nuisance learner ml_l or ml_m."
     with pytest.raises(ValueError, match=msg):
-        dml_plr.set_ml_nuisance_params('g', 'd', {'alpha': 0.1})
-    msg = 'Invalid treatment variable y. Valid treatment variable d.'
+        dml_plr.set_ml_nuisance_params("g", "d", {"alpha": 0.1})
+    msg = "Invalid treatment variable y. Valid treatment variable d."
     with pytest.raises(ValueError, match=msg):
-        dml_plr.set_ml_nuisance_params('ml_l', 'y', {'alpha': 0.1})
+        dml_plr.set_ml_nuisance_params("ml_l", "y", {"alpha": 0.1})
 
 
 class _DummyNoSetParams:
@@ -868,13 +979,13 @@ def predict(self, X):
 
 @pytest.mark.ci
 def test_doubleml_exception_learner():
-    err_msg_prefix = 'Invalid learner provided for ml_l: '
-    warn_msg_prefix = 'Learner provided for ml_l is probably invalid: '
+    err_msg_prefix = "Invalid learner provided for ml_l: "
+    warn_msg_prefix = "Learner provided for ml_l is probably invalid: "
 
-    msg = err_msg_prefix + 'provide an instance of a learner instead of a class.'
+    msg = err_msg_prefix + "provide an instance of a learner instead of a class."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLPLR(dml_data, Lasso, ml_m)
-    msg = err_msg_prefix + r'BaseEstimator\(\) has no method .fit\(\).'
+    msg = err_msg_prefix + r"BaseEstimator\(\) has no method .fit\(\)."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLPLR(dml_data, BaseEstimator(), ml_m)
     # msg = err_msg_prefix + r'_DummyNoSetParams\(\) has no method .set_params\(\).'
@@ -890,54 +1001,63 @@ def test_doubleml_exception_learner():
 
     # ToDo: Currently for ml_l (and others) we only check whether the learner can be identified as regressor. However,
     # we do not check whether it can instead be identified as classifier, which could be used to throw an error.
-    msg = warn_msg_prefix + r'LogisticRegression\(\) is \(probably\) no regressor.'
+    msg = warn_msg_prefix + r"LogisticRegression\(\) is \(probably\) no regressor."
     with pytest.warns(UserWarning, match=msg):
         _ = DoubleMLPLR(dml_data, LogisticRegression(), Lasso())
 
     # we allow classifiers for ml_m in PLR, but only for binary treatment variables
-    msg = (r'The ml_m learner LogisticRegression\(\) was identified as classifier '
-           'but at least one treatment variable is not binary with values 0 and 1.')
+    msg = (
+        r"The ml_m learner LogisticRegression\(\) was identified as classifier "
+        "but at least one treatment variable is not binary with values 0 and 1."
+    )
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLPLR(dml_data, Lasso(), LogisticRegression())
 
     msg = r"For score = 'IV-type', learners ml_l and ml_g should be specified. Set ml_g = clone\(ml_l\)."
     with pytest.warns(UserWarning, match=msg):
-        _ = DoubleMLPLR(dml_data, ml_l=Lasso(), ml_m=ml_m, score='IV-type')
+        _ = DoubleMLPLR(dml_data, ml_l=Lasso(), ml_m=ml_m, score="IV-type")
 
     msg = 'A learner ml_g has been provided for score = "partialling out" but will be ignored.'
     with pytest.warns(UserWarning, match=msg):
-        _ = DoubleMLPLR(dml_data, ml_l=Lasso(), ml_m=Lasso(), ml_g=Lasso(), score='partialling out')
+        _ = DoubleMLPLR(dml_data, ml_l=Lasso(), ml_m=Lasso(), ml_g=Lasso(), score="partialling out")
 
     msg = "For score = 'IV-type', learners ml_l, ml_m, ml_r and ml_g need to be specified."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLPLIV(dml_data_pliv, ml_l=ml_l, ml_m=ml_m, ml_r=ml_r,
-                         score='IV-type')
+        _ = DoubleMLPLIV(dml_data_pliv, ml_l=ml_l, ml_m=ml_m, ml_r=ml_r, score="IV-type")
 
     msg = 'A learner ml_g has been provided for score = "partialling out" but will be ignored.'
     with pytest.warns(UserWarning, match=msg):
-        _ = DoubleMLPLIV(dml_data_pliv, ml_l=Lasso(), ml_m=Lasso(), ml_r=Lasso(), ml_g=Lasso(), score='partialling out')
+        _ = DoubleMLPLIV(dml_data_pliv, ml_l=Lasso(), ml_m=Lasso(), ml_r=Lasso(), ml_g=Lasso(), score="partialling out")
 
     # we allow classifiers for ml_g for binary treatment variables in IRM
-    msg = (r'The ml_g learner LogisticRegression\(\) was identified as classifier '
-           'but the outcome variable is not binary with values 0 and 1.')
+    msg = (
+        r"The ml_g learner LogisticRegression\(\) was identified as classifier "
+        "but the outcome variable is not binary with values 0 and 1."
+    )
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLIRM(dml_data_irm, LogisticRegression(), LogisticRegression())
 
     # we allow classifiers for ml_g for binary treatment variables in IRM
-    msg = (r'The ml_g learner LogisticRegression\(\) was identified as classifier '
-           'but the outcome variable is not binary with values 0 and 1.')
+    msg = (
+        r"The ml_g learner LogisticRegression\(\) was identified as classifier "
+        "but the outcome variable is not binary with values 0 and 1."
+    )
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLIIVM(dml_data_iivm, LogisticRegression(), LogisticRegression(), LogisticRegression())
 
     # we allow classifiers for ml_g for binary treatment variables in DID
-    msg = (r'The ml_g learner LogisticRegression\(\) was identified as classifier '
-           'but the outcome variable is not binary with values 0 and 1.')
+    msg = (
+        r"The ml_g learner LogisticRegression\(\) was identified as classifier "
+        "but the outcome variable is not binary with values 0 and 1."
+    )
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLDID(dml_data_did, LogisticRegression(), LogisticRegression())
 
     # we allow classifiers for ml_g for binary treatment variables in DIDCS
-    msg = (r'The ml_g learner LogisticRegression\(\) was identified as classifier '
-           'but the outcome variable is not binary with values 0 and 1.')
+    msg = (
+        r"The ml_g learner LogisticRegression\(\) was identified as classifier "
+        "but the outcome variable is not binary with values 0 and 1."
+    )
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLDIDCS(dml_data_did_cs, LogisticRegression(), LogisticRegression())
 
@@ -945,13 +1065,17 @@ def test_doubleml_exception_learner():
     # it then predicts labels and therefore an exception will be thrown
     log_reg = LogisticRegression()
     log_reg._estimator_type = None
-    msg = (r'Learner provided for ml_m is probably invalid: LogisticRegression\(\) is \(probably\) neither a regressor '
-           'nor a classifier. Method predict is used for prediction.')
+    msg = (
+        r"Learner provided for ml_m is probably invalid: LogisticRegression\(\) is \(probably\) neither a regressor "
+        "nor a classifier. Method predict is used for prediction."
+    )
     with pytest.warns(UserWarning, match=msg):
         dml_plr_hidden_classifier = DoubleMLPLR(dml_data_irm, Lasso(), log_reg)
-    msg = (r'For the binary variable d, predictions obtained with the ml_m learner LogisticRegression\(\) '
-           'are also observed to be binary with values 0 and 1. Make sure that for classifiers probabilities and not '
-           'labels are predicted.')
+    msg = (
+        r"For the binary variable d, predictions obtained with the ml_m learner LogisticRegression\(\) "
+        "are also observed to be binary with values 0 and 1. Make sure that for classifiers probabilities and not "
+        "labels are predicted."
+    )
     with pytest.raises(ValueError, match=msg):
         dml_plr_hidden_classifier.fit()
 
@@ -960,32 +1084,40 @@ def test_doubleml_exception_learner():
     # whether predict() or predict_proba() is being called can also be manipulated via the unrelated max_iter variable
     log_reg = LogisticRegressionManipulatedPredict()
     log_reg._estimator_type = None
-    msg = (r'Learner provided for ml_g is probably invalid: LogisticRegressionManipulatedPredict\(\) is \(probably\) '
-           'neither a regressor nor a classifier. Method predict is used for prediction.')
+    msg = (
+        r"Learner provided for ml_g is probably invalid: LogisticRegressionManipulatedPredict\(\) is \(probably\) "
+        "neither a regressor nor a classifier. Method predict is used for prediction."
+    )
     with pytest.warns(UserWarning, match=msg):
-        dml_irm_hidden_classifier = DoubleMLIRM(dml_data_irm_binary_outcome,
-                                                log_reg, LogisticRegression())
-    msg = (r'For the binary variable y, predictions obtained with the ml_g learner '
-           r'LogisticRegressionManipulatedPredict\(\) are also observed to be binary with values 0 and 1. Make sure '
-           'that for classifiers probabilities and not labels are predicted.')
+        dml_irm_hidden_classifier = DoubleMLIRM(dml_data_irm_binary_outcome, log_reg, LogisticRegression())
+    msg = (
+        r"For the binary variable y, predictions obtained with the ml_g learner "
+        r"LogisticRegressionManipulatedPredict\(\) are also observed to be binary with values 0 and 1. Make sure "
+        "that for classifiers probabilities and not labels are predicted."
+    )
     with pytest.raises(ValueError, match=msg):
         dml_irm_hidden_classifier.fit()
     with pytest.raises(ValueError, match=msg):
-        dml_irm_hidden_classifier.set_ml_nuisance_params('ml_g0', 'd', {'max_iter': 314})
+        dml_irm_hidden_classifier.set_ml_nuisance_params("ml_g0", "d", {"max_iter": 314})
         dml_irm_hidden_classifier.fit()
 
-    msg = (r'Learner provided for ml_g is probably invalid: LogisticRegressionManipulatedPredict\(\) is \(probably\) '
-           'neither a regressor nor a classifier. Method predict is used for prediction.')
+    msg = (
+        r"Learner provided for ml_g is probably invalid: LogisticRegressionManipulatedPredict\(\) is \(probably\) "
+        "neither a regressor nor a classifier. Method predict is used for prediction."
+    )
     with pytest.warns(UserWarning, match=msg):
-        dml_iivm_hidden_classifier = DoubleMLIIVM(dml_data_iivm_binary_outcome,
-                                                  log_reg, LogisticRegression(), LogisticRegression())
-    msg = (r'For the binary variable y, predictions obtained with the ml_g learner '
-           r'LogisticRegressionManipulatedPredict\(\) are also observed to be binary with values 0 and 1. Make sure '
-           'that for classifiers probabilities and not labels are predicted.')
+        dml_iivm_hidden_classifier = DoubleMLIIVM(
+            dml_data_iivm_binary_outcome, log_reg, LogisticRegression(), LogisticRegression()
+        )
+    msg = (
+        r"For the binary variable y, predictions obtained with the ml_g learner "
+        r"LogisticRegressionManipulatedPredict\(\) are also observed to be binary with values 0 and 1. Make sure "
+        "that for classifiers probabilities and not labels are predicted."
+    )
     with pytest.raises(ValueError, match=msg):
         dml_iivm_hidden_classifier.fit()
     with pytest.raises(ValueError, match=msg):
-        dml_iivm_hidden_classifier.set_ml_nuisance_params('ml_g0', 'd', {'max_iter': 314})
+        dml_iivm_hidden_classifier.set_ml_nuisance_params("ml_g0", "d", {"max_iter": 314})
         dml_iivm_hidden_classifier.fit()
 
 
@@ -995,7 +1127,7 @@ def test_doubleml_exception_and_warning_learner():
     # msg = err_msg_prefix + r'_DummyNoClassifier\(\) has no method .predict\(\).'
     with pytest.raises(TypeError):
         _ = DoubleMLPLR(dml_data, _DummyNoClassifier(), Lasso())
-    msg = 'Invalid learner provided for ml_m: ' + r'Lasso\(\) has no method .predict_proba\(\).'
+    msg = "Invalid learner provided for ml_m: " + r"Lasso\(\) has no method .predict_proba\(\)."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLIRM(dml_data_irm, Lasso(), Lasso())
 
@@ -1007,11 +1139,11 @@ def test_doubleml_sensitivity_not_yet_implemented():
 
     dml_pliv = DoubleMLPLIV(dml_data_pliv, ml_g, ml_m, ml_r)
     dml_pliv.fit()
-    msg = 'Sensitivity analysis is not implemented for this model.'
+    msg = "Sensitivity analysis is not implemented for this model."
     with pytest.raises(NotImplementedError, match=msg):
         _ = dml_pliv.sensitivity_analysis()
 
-    msg = 'Sensitivity analysis not yet implemented for DoubleMLPLIV.'
+    msg = "Sensitivity analysis not yet implemented for DoubleMLPLIV."
     with pytest.raises(NotImplementedError, match=msg):
         _ = dml_pliv.sensitivity_benchmark(benchmarking_set=["X1"])
 
@@ -1026,7 +1158,7 @@ def test_doubleml_sensitivity_inputs():
     with pytest.raises(TypeError, match=msg):
         _ = dml_irm.sensitivity_analysis(cf_y=1)
 
-    msg = r'cf_y must be in \[0,1\). 1.0 was passed.'
+    msg = r"cf_y must be in \[0,1\). 1.0 was passed."
     with pytest.raises(ValueError, match=msg):
         _ = dml_irm.sensitivity_analysis(cf_y=1.0)
 
@@ -1035,7 +1167,7 @@ def test_doubleml_sensitivity_inputs():
     with pytest.raises(TypeError, match=msg):
         _ = dml_irm.sensitivity_analysis(cf_y=0.1, cf_d=1)
 
-    msg = r'cf_d must be in \[0,1\). 1.0 was passed.'
+    msg = r"cf_d must be in \[0,1\). 1.0 was passed."
     with pytest.raises(ValueError, match=msg):
         _ = dml_irm.sensitivity_analysis(cf_y=0.1, cf_d=1.0)
 
@@ -1048,7 +1180,7 @@ def test_doubleml_sensitivity_inputs():
     with pytest.raises(TypeError, match=msg):
         _ = dml_irm.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho="1")
 
-    msg = r'The absolute value of rho must be in \[0,1\]. 1.1 was passed.'
+    msg = r"The absolute value of rho must be in \[0,1\]. 1.1 was passed."
     with pytest.raises(ValueError, match=msg):
         _ = dml_irm.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.1)
 
@@ -1057,11 +1189,11 @@ def test_doubleml_sensitivity_inputs():
     with pytest.raises(TypeError, match=msg):
         _ = dml_irm.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.0, level=1)
 
-    msg = r'The confidence level must be in \(0,1\). 1.0 was passed.'
+    msg = r"The confidence level must be in \(0,1\). 1.0 was passed."
     with pytest.raises(ValueError, match=msg):
         _ = dml_irm.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.0, level=1.0)
 
-    msg = r'The confidence level must be in \(0,1\). 0.0 was passed.'
+    msg = r"The confidence level must be in \(0,1\). 0.0 was passed."
     with pytest.raises(ValueError, match=msg):
         _ = dml_irm.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.0, level=0.0)
 
@@ -1088,28 +1220,29 @@ def test_doubleml_sensitivity_inputs():
         _ = dml_irm.sensitivity_plot(idx_treatment=1)
 
     # test setter
-    msg = ("_sensitivity_element_est must return sensitivity elements in a dict. "
-           "Got type <class 'int'>.")
+    msg = "_sensitivity_element_est must return sensitivity elements in a dict. Got type <class 'int'>."
     with pytest.raises(TypeError, match=msg):
         _ = dml_irm._set_sensitivity_elements(sensitivity_elements=1, i_rep=0, i_treat=0)
 
-    sensitivity_elements = dict({'sigma2': 1})
+    sensitivity_elements = dict({"sigma2": 1})
     with pytest.raises(ValueError):
         _ = dml_irm._set_sensitivity_elements(sensitivity_elements=sensitivity_elements, i_rep=0, i_treat=0)
 
     # test variances
-    sensitivity_elements = dict({'sigma2': 1.0, 'nu2': -2.4, 'psi_sigma2': 1.0, 'psi_nu2': 1.0, 'riesz_rep': 1.0})
+    sensitivity_elements = dict({"sigma2": 1.0, "nu2": -2.4, "psi_sigma2": 1.0, "psi_nu2": 1.0, "riesz_rep": 1.0})
     _ = dml_irm._set_sensitivity_elements(sensitivity_elements=sensitivity_elements, i_rep=0, i_treat=0)
-    msg = ('sensitivity_elements sigma2 and nu2 have to be positive. '
-           r'Got sigma2 \[\[\[1.\]\]\] and nu2 \[\[\[-2.4\]\]\]. '
-           r'Most likely this is due to low quality learners \(especially propensity scores\).')
+    msg = (
+        "sensitivity_elements sigma2 and nu2 have to be positive. "
+        r"Got sigma2 \[\[\[1.\]\]\] and nu2 \[\[\[-2.4\]\]\]. "
+        r"Most likely this is due to low quality learners \(especially propensity scores\)."
+    )
     with pytest.raises(ValueError, match=msg):
         dml_irm.sensitivity_analysis()
 
 
 def test_doubleml_sensitivity_summary():
     dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1)
-    msg = r'Apply sensitivity_analysis\(\) before sensitivity_summary.'
+    msg = r"Apply sensitivity_analysis\(\) before sensitivity_summary."
     with pytest.raises(ValueError, match=msg):
         _ = dml_irm.sensitivity_summary
 
@@ -1128,10 +1261,12 @@ def test_doubleml_sensitivity_benchmark():
     with pytest.raises(ValueError, match=msg):
         _ = dml_irm.sensitivity_benchmark(benchmarking_set=[])
 
-    msg = (r"benchmarking_set must be a subset of features \['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', "
-           r"'X11', 'X12', 'X13', 'X14', 'X15', 'X16', 'X17', 'X18', 'X19', 'X20'\]. \['test_var'\] was passed.")
+    msg = (
+        r"benchmarking_set must be a subset of features \['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', "
+        r"'X11', 'X12', 'X13', 'X14', 'X15', 'X16', 'X17', 'X18', 'X19', 'X20'\]. \['test_var'\] was passed."
+    )
     with pytest.raises(ValueError, match=msg):
-        _ = dml_irm.sensitivity_benchmark(benchmarking_set=['test_var'])
+        _ = dml_irm.sensitivity_benchmark(benchmarking_set=["test_var"])
 
 
 @pytest.mark.ci
@@ -1139,7 +1274,7 @@ def test_doubleml_sensitivity_plot_input():
     dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1)
     dml_irm.fit()
 
-    msg = (r'Apply sensitivity_analysis\(\) to include senario in sensitivity_plot. ')
+    msg = r"Apply sensitivity_analysis\(\) to include senario in sensitivity_plot. "
     with pytest.raises(ValueError, match=msg):
         _ = dml_irm.sensitivity_plot()
 
@@ -1153,26 +1288,26 @@ def test_doubleml_sensitivity_plot_input():
         _ = dml_irm.sensitivity_plot(benchmarks="True")
     msg = r"benchmarks has to be a dictionary with keys cf_y, cf_d and name. Got dict_keys\(\['cf_y', 'cf_d'\]\)."
     with pytest.raises(ValueError, match=msg):
-        _ = dml_irm.sensitivity_plot(benchmarks={'cf_y': 0.1, 'cf_d': 0.15})
+        _ = dml_irm.sensitivity_plot(benchmarks={"cf_y": 0.1, "cf_d": 0.15})
     msg = r"benchmarks has to be a dictionary with values of same length. Got \[1, 2, 2\]."
     with pytest.raises(ValueError, match=msg):
-        _ = dml_irm.sensitivity_plot(benchmarks={'cf_y': [0.1], 'cf_d': [0.15, 0.2], 'name': ['test', 'test2']})
+        _ = dml_irm.sensitivity_plot(benchmarks={"cf_y": [0.1], "cf_d": [0.15, 0.2], "name": ["test", "test2"]})
     msg = "benchmarks cf_y must be of float type. 2 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
-        _ = dml_irm.sensitivity_plot(benchmarks={'cf_y': [0.1, 2], 'cf_d': [0.15, 0.2], 'name': ['test', 'test2']})
-    msg = r'benchmarks cf_y must be in \[0,1\). 1.0 was passed.'
+        _ = dml_irm.sensitivity_plot(benchmarks={"cf_y": [0.1, 2], "cf_d": [0.15, 0.2], "name": ["test", "test2"]})
+    msg = r"benchmarks cf_y must be in \[0,1\). 1.0 was passed."
     with pytest.raises(ValueError, match=msg):
-        _ = dml_irm.sensitivity_plot(benchmarks={'cf_y': [0.1, 1.0], 'cf_d': [0.15, 0.2], 'name': ['test', 'test2']})
+        _ = dml_irm.sensitivity_plot(benchmarks={"cf_y": [0.1, 1.0], "cf_d": [0.15, 0.2], "name": ["test", "test2"]})
     msg = "benchmarks name must be of string type. 2 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
-        _ = dml_irm.sensitivity_plot(benchmarks={'cf_y': [0.1, 0.2], 'cf_d': [0.15, 0.2], 'name': [2, 2]})
+        _ = dml_irm.sensitivity_plot(benchmarks={"cf_y": [0.1, 0.2], "cf_d": [0.15, 0.2], "name": [2, 2]})
 
     msg = "value must be a string. 2 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = dml_irm.sensitivity_plot(value=2)
     msg = "Invalid value test. Valid values theta or ci."
     with pytest.raises(ValueError, match=msg):
-        _ = dml_irm.sensitivity_plot(value='test')
+        _ = dml_irm.sensitivity_plot(value="test")
 
     msg = "fill has to be boolean. True of type <class 'str'> was passed."
     with pytest.raises(TypeError, match=msg):
@@ -1190,12 +1325,12 @@ def test_doubleml_sensitivity_plot_input():
         _ = dml_irm.sensitivity_plot(grid_bounds=(0.15, 1))
     with pytest.raises(TypeError, match=msg):
         _ = dml_irm.sensitivity_plot(grid_bounds=(1, 0.15))
-    msg = r'grid_bounds must be in \(0,1\). 1.0 was passed.'
+    msg = r"grid_bounds must be in \(0,1\). 1.0 was passed."
     with pytest.raises(ValueError, match=msg):
         _ = dml_irm.sensitivity_plot(grid_bounds=(1.0, 0.15))
     with pytest.raises(ValueError, match=msg):
         _ = dml_irm.sensitivity_plot(grid_bounds=(0.15, 1.0))
-    msg = r'grid_bounds must be in \(0,1\). 0.0 was passed.'
+    msg = r"grid_bounds must be in \(0,1\). 0.0 was passed."
     with pytest.raises(ValueError, match=msg):
         _ = dml_irm.sensitivity_plot(grid_bounds=(0.0, 0.15))
     with pytest.raises(ValueError, match=msg):
@@ -1206,16 +1341,22 @@ def test_doubleml_sensitivity_plot_input():
 def test_doubleml_cluster_not_yet_implemented():
     dml_pliv_cluster = DoubleMLPLIV(dml_cluster_data_pliv, ml_g, ml_m, ml_r)
     dml_pliv_cluster.fit()
-    msg = 'bootstrap not yet implemented with clustering.'
+    msg = "bootstrap not yet implemented with clustering."
     with pytest.raises(NotImplementedError, match=msg):
         _ = dml_pliv_cluster.bootstrap()
 
     df = dml_cluster_data_pliv.data.copy()
-    df['cluster_var_k'] = df['cluster_var_i'] + df['cluster_var_j'] - 2
-    dml_cluster_data_multiway = DoubleMLClusterData(df, y_col='Y', d_cols='D', x_cols=['X1', 'X5'], z_cols='Z',
-                                                    cluster_cols=['cluster_var_i', 'cluster_var_j', 'cluster_var_k'])
+    df["cluster_var_k"] = df["cluster_var_i"] + df["cluster_var_j"] - 2
+    dml_cluster_data_multiway = DoubleMLClusterData(
+        df,
+        y_col="Y",
+        d_cols="D",
+        x_cols=["X1", "X5"],
+        z_cols="Z",
+        cluster_cols=["cluster_var_i", "cluster_var_j", "cluster_var_k"],
+    )
     assert dml_cluster_data_multiway.n_cluster_vars == 3
-    msg = r'Multi-way \(n_ways > 2\) clustering not yet implemented.'
+    msg = r"Multi-way \(n_ways > 2\) clustering not yet implemented."
     with pytest.raises(NotImplementedError, match=msg):
         _ = DoubleMLPLIV(dml_cluster_data_multiway, ml_g, ml_m, ml_r)
 
@@ -1238,17 +1379,17 @@ def predict(self, X):
 
 @pytest.mark.ci
 def test_doubleml_nan_prediction():
-    msg = r'Predictions from learner LassoWithNanPred\(\) for ml_l are not finite.'
+    msg = r"Predictions from learner LassoWithNanPred\(\) for ml_l are not finite."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLPLR(dml_data, LassoWithNanPred(), ml_m).fit()
-    msg = r'Predictions from learner LassoWithInfPred\(\) for ml_l are not finite.'
+    msg = r"Predictions from learner LassoWithInfPred\(\) for ml_l are not finite."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLPLR(dml_data, LassoWithInfPred(), ml_m).fit()
 
-    msg = r'Predictions from learner LassoWithNanPred\(\) for ml_m are not finite.'
+    msg = r"Predictions from learner LassoWithNanPred\(\) for ml_m are not finite."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLPLR(dml_data, ml_l, LassoWithNanPred()).fit()
-    msg = r'Predictions from learner LassoWithInfPred\(\) for ml_m are not finite.'
+    msg = r"Predictions from learner LassoWithInfPred\(\) for ml_m are not finite."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLPLR(dml_data, ml_l, LassoWithInfPred()).fit()
 
@@ -1258,145 +1399,114 @@ def test_doubleml_warning_blp():
     n = 5
     np.random.seed(42)
     random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 3)))
-    random_signal = np.random.normal(0, 1, size=(n, ))
+    random_signal = np.random.normal(0, 1, size=(n,))
     blp = DoubleMLBLP(random_signal, random_basis)
     blp.fit()
 
-    msg = 'Returning pointwise confidence intervals for basis coefficients.'
+    msg = "Returning pointwise confidence intervals for basis coefficients."
     with pytest.warns(UserWarning, match=msg):
         _ = blp.confint(joint=True)
 
 
 @pytest.mark.ci
 def test_doubleml_exception_gate():
-    dml_irm_obj = DoubleMLIRM(dml_data_irm,
-                              ml_g=Lasso(),
-                              ml_m=LogisticRegression(),
-                              trimming_threshold=0.05,
-                              n_folds=5)
+    dml_irm_obj = DoubleMLIRM(dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5)
     dml_irm_obj.fit()
 
     msg = "Groups must be of DataFrame type. Groups of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml_irm_obj.gate(groups=2)
     groups = pd.DataFrame(np.random.normal(0, 1, size=(dml_data_irm.n_obs, 3)))
-    msg = (r'Columns of groups must be of bool type or int type \(dummy coded\). '
-           'Alternatively, groups should only contain one column.')
+    msg = (
+        r"Columns of groups must be of bool type or int type \(dummy coded\). "
+        "Alternatively, groups should only contain one column."
+    )
     with pytest.raises(TypeError, match=msg):
         dml_irm_obj.gate(groups=groups)
 
-    dml_irm_obj = DoubleMLIRM(dml_data_irm,
-                              ml_g=Lasso(),
-                              ml_m=LogisticRegression(),
-                              trimming_threshold=0.05,
-                              n_folds=5,
-                              score='ATTE')
+    dml_irm_obj = DoubleMLIRM(
+        dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATTE"
+    )
     dml_irm_obj.fit()
     groups = pd.DataFrame(np.random.choice([True, False], size=dml_data_irm.n_obs))
-    msg = 'Invalid score ATTE. Valid score ATE.'
+    msg = "Invalid score ATTE. Valid score ATE."
     with pytest.raises(ValueError, match=msg):
         dml_irm_obj.gate(groups=groups)
 
-    dml_irm_obj = DoubleMLIRM(dml_data_irm,
-                              ml_g=Lasso(),
-                              ml_m=LogisticRegression(),
-                              trimming_threshold=0.05,
-                              n_folds=5,
-                              score='ATE',
-                              n_rep=2)
+    dml_irm_obj = DoubleMLIRM(
+        dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATE", n_rep=2
+    )
     dml_irm_obj.fit()
 
-    msg = 'Only implemented for one repetition. Number of repetitions is 2.'
+    msg = "Only implemented for one repetition. Number of repetitions is 2."
     with pytest.raises(NotImplementedError, match=msg):
         dml_irm_obj.gate(groups=groups)
 
 
 @pytest.mark.ci
 def test_doubleml_exception_cate():
-    dml_irm_obj = DoubleMLIRM(dml_data_irm,
-                              ml_g=Lasso(),
-                              ml_m=LogisticRegression(),
-                              trimming_threshold=0.05,
-                              n_folds=5,
-                              score='ATTE')
+    dml_irm_obj = DoubleMLIRM(
+        dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATTE"
+    )
     dml_irm_obj.fit()
 
-    msg = 'Invalid score ATTE. Valid score ATE.'
+    msg = "Invalid score ATTE. Valid score ATE."
     with pytest.raises(ValueError, match=msg):
         dml_irm_obj.cate(basis=2)
 
-    dml_irm_obj = DoubleMLIRM(dml_data_irm,
-                              ml_g=Lasso(),
-                              ml_m=LogisticRegression(),
-                              trimming_threshold=0.05,
-                              n_folds=5,
-                              score='ATE',
-                              n_rep=2)
+    dml_irm_obj = DoubleMLIRM(
+        dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATE", n_rep=2
+    )
     dml_irm_obj.fit()
-    msg = 'Only implemented for one repetition. Number of repetitions is 2.'
+    msg = "Only implemented for one repetition. Number of repetitions is 2."
     with pytest.raises(NotImplementedError, match=msg):
         dml_irm_obj.cate(basis=2)
 
 
 @pytest.mark.ci
 def test_doubleml_exception_plr_cate():
-    dml_plr_obj = DoubleMLPLR(dml_data,
-                              ml_l=Lasso(),
-                              ml_m=Lasso(),
-                              n_folds=2,
-                              n_rep=2)
+    dml_plr_obj = DoubleMLPLR(dml_data, ml_l=Lasso(), ml_m=Lasso(), n_folds=2, n_rep=2)
     dml_plr_obj.fit()
-    msg = 'Only implemented for one repetition. Number of repetitions is 2.'
+    msg = "Only implemented for one repetition. Number of repetitions is 2."
     with pytest.raises(NotImplementedError, match=msg):
         dml_plr_obj.cate(basis=2)
 
-    dml_plr_obj = DoubleMLPLR(dml_data,
-                              ml_l=Lasso(),
-                              ml_m=Lasso(),
-                              n_folds=2)
+    dml_plr_obj = DoubleMLPLR(dml_data, ml_l=Lasso(), ml_m=Lasso(), n_folds=2)
     dml_plr_obj.fit(store_predictions=False)
-    msg = r'predictions are None. Call .fit\(store_predictions=True\) to store the predictions.'
+    msg = r"predictions are None. Call .fit\(store_predictions=True\) to store the predictions."
     with pytest.raises(ValueError, match=msg):
         dml_plr_obj.cate(basis=2)
 
-    dml_data_multiple_treat = DoubleMLData(dml_data.data, y_col="y", d_cols=['d', 'X1'])
-    dml_plr_obj_multiple = DoubleMLPLR(dml_data_multiple_treat,
-                                       ml_l=Lasso(),
-                                       ml_m=Lasso(),
-                                       n_folds=2)
+    dml_data_multiple_treat = DoubleMLData(dml_data.data, y_col="y", d_cols=["d", "X1"])
+    dml_plr_obj_multiple = DoubleMLPLR(dml_data_multiple_treat, ml_l=Lasso(), ml_m=Lasso(), n_folds=2)
     dml_plr_obj_multiple.fit()
-    msg = 'Only implemented for single treatment. Number of treatments is 2.'
+    msg = "Only implemented for single treatment. Number of treatments is 2."
     with pytest.raises(NotImplementedError, match=msg):
         dml_plr_obj_multiple.cate(basis=2)
 
 
 @pytest.mark.ci
 def test_doubleml_exception_plr_gate():
-    dml_plr_obj = DoubleMLPLR(dml_data,
-                              ml_l=Lasso(),
-                              ml_m=Lasso(),
-                              n_folds=2,
-                              n_rep=1)
+    dml_plr_obj = DoubleMLPLR(dml_data, ml_l=Lasso(), ml_m=Lasso(), n_folds=2, n_rep=1)
     dml_plr_obj.fit()
     msg = "Groups must be of DataFrame type. Groups of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml_plr_obj.gate(groups=2)
-    msg = (r'Columns of groups must be of bool type or int type \(dummy coded\). '
-           'Alternatively, groups should only contain one column.')
+    msg = (
+        r"Columns of groups must be of bool type or int type \(dummy coded\). "
+        "Alternatively, groups should only contain one column."
+    )
     with pytest.raises(TypeError, match=msg):
         dml_plr_obj.gate(groups=pd.DataFrame(np.random.normal(0, 1, size=(dml_data.n_obs, 3))))
 
 
 @pytest.mark.ci
 def test_double_ml_exception_evaluate_learner():
-    dml_irm_obj = DoubleMLIRM(dml_data_irm,
-                              ml_g=Lasso(),
-                              ml_m=LogisticRegression(),
-                              trimming_threshold=0.05,
-                              n_folds=5,
-                              score='ATTE')
+    dml_irm_obj = DoubleMLIRM(
+        dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATTE"
+    )
 
-    msg = r'Apply fit\(\) before evaluate_learners\(\).'
+    msg = r"Apply fit\(\) before evaluate_learners\(\)."
     with pytest.raises(ValueError, match=msg):
         dml_irm_obj.evaluate_learners()
 
@@ -1406,26 +1516,22 @@ def test_double_ml_exception_evaluate_learner():
     with pytest.raises(TypeError, match=msg):
         dml_irm_obj.evaluate_learners(metric="mse")
 
-    msg = (r"The learners have to be a subset of \['ml_g0', 'ml_g1', 'ml_m'\]. "
-           r"Learners \['ml_g', 'ml_m'\] provided.")
+    msg = r"The learners have to be a subset of \['ml_g0', 'ml_g1', 'ml_m'\]. " r"Learners \['ml_g', 'ml_m'\] provided."
     with pytest.raises(ValueError, match=msg):
-        dml_irm_obj.evaluate_learners(learners=['ml_g', 'ml_m'])
+        dml_irm_obj.evaluate_learners(learners=["ml_g", "ml_m"])
 
-    msg = 'Evaluation from learner ml_g0 is not finite.'
+    msg = "Evaluation from learner ml_g0 is not finite."
 
     def eval_fct(y_pred, y_true):
         return np.nan
+
     with pytest.raises(ValueError, match=msg):
         dml_irm_obj.evaluate_learners(metric=eval_fct)
 
 
 @pytest.mark.ci
 def test_doubleml_exception_policytree():
-    dml_irm_obj = DoubleMLIRM(dml_data_irm,
-                              ml_g=Lasso(),
-                              ml_m=LogisticRegression(),
-                              trimming_threshold=0.05,
-                              n_folds=5)
+    dml_irm_obj = DoubleMLIRM(dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5)
     dml_irm_obj.fit()
 
     msg = "Covariates must be of DataFrame type. Covariates of type <class 'int'> was passed."
@@ -1433,114 +1539,107 @@ def test_doubleml_exception_policytree():
         dml_irm_obj.policy_tree(features=2)
     msg = "Depth must be larger or equal to 0. -1 was passed."
     with pytest.raises(ValueError, match=msg):
-        dml_irm_obj.policy_tree(features=pd.DataFrame(np.random.normal(0, 1, size=(dml_data_irm.n_obs, 3))),
-                                depth=-1)
+        dml_irm_obj.policy_tree(features=pd.DataFrame(np.random.normal(0, 1, size=(dml_data_irm.n_obs, 3))), depth=-1)
     msg = "Depth must be an integer. 0.1 of type <class 'float'> was passed."
     with pytest.raises(TypeError, match=msg):
-        dml_irm_obj.policy_tree(features=pd.DataFrame(np.random.normal(0, 1, size=(dml_data_irm.n_obs, 3))),
-                                depth=.1)
+        dml_irm_obj.policy_tree(features=pd.DataFrame(np.random.normal(0, 1, size=(dml_data_irm.n_obs, 3))), depth=0.1)
 
-    dml_irm_obj = DoubleMLIRM(dml_data_irm,
-                              ml_g=Lasso(),
-                              ml_m=LogisticRegression(),
-                              trimming_threshold=0.05,
-                              n_folds=5,
-                              score='ATTE')
+    dml_irm_obj = DoubleMLIRM(
+        dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATTE"
+    )
     dml_irm_obj.fit()
 
-    msg = 'Invalid score ATTE. Valid score ATE.'
+    msg = "Invalid score ATTE. Valid score ATE."
     with pytest.raises(ValueError, match=msg):
         dml_irm_obj.policy_tree(features=2, depth=1)
 
-    dml_irm_obj = DoubleMLIRM(dml_data_irm,
-                              ml_g=Lasso(),
-                              ml_m=LogisticRegression(),
-                              trimming_threshold=0.05,
-                              n_folds=5,
-                              score='ATE',
-                              n_rep=2)
+    dml_irm_obj = DoubleMLIRM(
+        dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATE", n_rep=2
+    )
     dml_irm_obj.fit()
-    msg = 'Only implemented for one repetition. Number of repetitions is 2.'
+    msg = "Only implemented for one repetition. Number of repetitions is 2."
     with pytest.raises(NotImplementedError, match=msg):
         dml_irm_obj.policy_tree(features=2, depth=1)
 
 
 @pytest.mark.ci
 def test_double_ml_external_predictions():
-    dml_irm_obj = DoubleMLIRM(dml_data_irm,
-                              ml_g=Lasso(),
-                              ml_m=LogisticRegression(),
-                              trimming_threshold=0.05,
-                              n_folds=5,
-                              score='ATE',
-                              n_rep=2)
+    dml_irm_obj = DoubleMLIRM(
+        dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATE", n_rep=2
+    )
 
     msg = "external_predictions must be a dictionary. ml_m of type <class 'str'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml_irm_obj.fit(external_predictions="ml_m")
 
-    dml_irm_obj = DoubleMLIRM(dml_data_irm,
-                              ml_g=Lasso(),
-                              ml_m=LogisticRegression(),
-                              trimming_threshold=0.05,
-                              n_folds=5,
-                              score='ATE',
-                              n_rep=1)
+    dml_irm_obj = DoubleMLIRM(
+        dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATE", n_rep=1
+    )
 
-    predictions = {'d': 'test', 'd_f': 'test'}
-    msg = (r"Invalid external_predictions. Invalid treatment variable in \['d', 'd_f'\]. "
-           "Valid treatment variables d.")
+    predictions = {"d": "test", "d_f": "test"}
+    msg = r"Invalid external_predictions. Invalid treatment variable in \['d', 'd_f'\]. " "Valid treatment variables d."
     with pytest.raises(ValueError, match=msg):
         dml_irm_obj.fit(external_predictions=predictions)
 
-    predictions = {'d': 'test'}
-    msg = ("external_predictions must be a nested dictionary. "
-           "For treatment d a value of type <class 'str'> was passed.")
+    predictions = {"d": "test"}
+    msg = "external_predictions must be a nested dictionary. For treatment d a value of type <class 'str'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml_irm_obj.fit(external_predictions=predictions)
 
-    predictions = {'d': {'ml_f': 'test'}}
-    msg = ("Invalid external_predictions. "
-           r"Invalid nuisance learner for treatment d in \['ml_f'\]. "
-           "Valid nuisance learners ml_g0 or ml_g1 or ml_m.")
+    predictions = {"d": {"ml_f": "test"}}
+    msg = (
+        "Invalid external_predictions. "
+        r"Invalid nuisance learner for treatment d in \['ml_f'\]. "
+        "Valid nuisance learners ml_g0 or ml_g1 or ml_m."
+    )
     with pytest.raises(ValueError, match=msg):
         dml_irm_obj.fit(external_predictions=predictions)
 
-    predictions = {'d': {'ml_m': 'test', 'ml_f': 'test'}}
-    msg = ("Invalid external_predictions. "
-           r"Invalid nuisance learner for treatment d in \['ml_m', 'ml_f'\]. "
-           "Valid nuisance learners ml_g0 or ml_g1 or ml_m.")
+    predictions = {"d": {"ml_m": "test", "ml_f": "test"}}
+    msg = (
+        "Invalid external_predictions. "
+        r"Invalid nuisance learner for treatment d in \['ml_m', 'ml_f'\]. "
+        "Valid nuisance learners ml_g0 or ml_g1 or ml_m."
+    )
     with pytest.raises(ValueError, match=msg):
         dml_irm_obj.fit(external_predictions=predictions)
 
-    predictions = {'d': {'ml_m': 'test'}}
-    msg = ("Invalid external_predictions. "
-           "The values of the nested list must be a numpy array. "
-           "Invalid predictions for treatment d and learner ml_m. "
-           "Object of type <class 'str'> was passed.")
+    predictions = {"d": {"ml_m": "test"}}
+    msg = (
+        "Invalid external_predictions. "
+        "The values of the nested list must be a numpy array. "
+        "Invalid predictions for treatment d and learner ml_m. "
+        "Object of type <class 'str'> was passed."
+    )
     with pytest.raises(TypeError, match=msg):
         dml_irm_obj.fit(external_predictions=predictions)
 
-    predictions = {'d': {'ml_m': np.array([0])}}
-    msg = ('Invalid external_predictions. '
-           r'The supplied predictions have to be of shape \(100, 1\). '
-           'Invalid predictions for treatment d and learner ml_m. '
-           r'Predictions of shape \(1,\) passed.')
+    predictions = {"d": {"ml_m": np.array([0])}}
+    msg = (
+        "Invalid external_predictions. "
+        r"The supplied predictions have to be of shape \(100, 1\). "
+        "Invalid predictions for treatment d and learner ml_m. "
+        r"Predictions of shape \(1,\) passed."
+    )
     with pytest.raises(ValueError, match=msg):
         dml_irm_obj.fit(external_predictions=predictions)
 
-    predictions = {'d': {'ml_m': np.zeros(100)}}
-    msg = ('Invalid external_predictions. '
-           r'The supplied predictions have to be of shape \(100, 1\). '
-           'Invalid predictions for treatment d and learner ml_m. '
-           r'Predictions of shape \(100,\) passed.')
+    predictions = {"d": {"ml_m": np.zeros(100)}}
+    msg = (
+        "Invalid external_predictions. "
+        r"The supplied predictions have to be of shape \(100, 1\). "
+        "Invalid predictions for treatment d and learner ml_m. "
+        r"Predictions of shape \(100,\) passed."
+    )
     with pytest.raises(ValueError, match=msg):
         dml_irm_obj.fit(external_predictions=predictions)
 
-    predictions = {'d': {'ml_m': np.ones(shape=(5, 3))}}
-    msg = ('Invalid external_predictions. '
-           r'The supplied predictions have to be of shape \(100, 1\). '
-           'Invalid predictions for treatment d and learner ml_m. '
-           r'Predictions of shape \(5, 3\) passed.')
+    predictions = {"d": {"ml_m": np.ones(shape=(5, 3))}}
+    msg = (
+        "Invalid external_predictions. "
+        r"The supplied predictions have to be of shape \(100, 1\). "
+        "Invalid predictions for treatment d and learner ml_m. "
+        r"Predictions of shape \(5, 3\) passed."
+    )
     with pytest.raises(ValueError, match=msg):
         dml_irm_obj.fit(external_predictions=predictions)
diff --git a/doubleml/tests/test_exceptions_ext_preds.py b/doubleml/tests/test_exceptions_ext_preds.py
index 4a61361d4..3f6002825 100644
--- a/doubleml/tests/test_exceptions_ext_preds.py
+++ b/doubleml/tests/test_exceptions_ext_preds.py
@@ -1,9 +1,9 @@
 import pytest
-from doubleml import DoubleMLCVAR, DoubleMLQTE, DoubleMLIRM, DoubleMLData
-from doubleml.datasets import make_irm_data
-from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 
-from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
+from doubleml import DoubleMLCVAR, DoubleMLData, DoubleMLIRM, DoubleMLQTE
+from doubleml.datasets import make_irm_data
+from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
 
 df_irm = make_irm_data(n_obs=10, dim_x=2, theta=0.5, return_type="DataFrame")
 ext_predictions = {"d": {}}
diff --git a/doubleml/tests/test_framework.py b/doubleml/tests/test_framework.py
index 0a447420e..24810b680 100644
--- a/doubleml/tests/test_framework.py
+++ b/doubleml/tests/test_framework.py
@@ -1,28 +1,26 @@
-import pytest
 import numpy as np
 import pandas as pd
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
 
 from doubleml.datasets import make_irm_data
-from doubleml.irm.irm import DoubleMLIRM
 from doubleml.double_ml_framework import DoubleMLFramework, concat
-from ._utils import generate_dml_dict
+from doubleml.irm.irm import DoubleMLIRM
 
-from sklearn.linear_model import LinearRegression, LogisticRegression
+from ._utils import generate_dml_dict
 
 
-@pytest.fixture(scope='module',
-                params=[1, 3])
+@pytest.fixture(scope="module", params=[1, 3])
 def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 5])
+@pytest.fixture(scope="module", params=[1, 5])
 def n_thetas(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_framework_fixture(n_rep, n_thetas):
     n_obs = 100
 
@@ -33,13 +31,13 @@ def dml_framework_fixture(n_rep, n_thetas):
     dml_framework_obj = DoubleMLFramework(doubleml_dict)
 
     ci = dml_framework_obj.confint(joint=False, level=0.95)
-    dml_framework_obj.bootstrap(method='normal')
+    dml_framework_obj.bootstrap(method="normal")
     ci_joint = dml_framework_obj.confint(joint=True, level=0.95)
 
     # add objects
     dml_framework_obj_add_obj = dml_framework_obj + dml_framework_obj
     ci_add_obj = dml_framework_obj_add_obj.confint(joint=False, level=0.95)
-    dml_framework_obj_add_obj.bootstrap(method='normal')
+    dml_framework_obj_add_obj.bootstrap(method="normal")
     ci_joint_add_obj = dml_framework_obj_add_obj.confint(joint=True, level=0.95)
 
     # substract objects
@@ -49,114 +47,100 @@ def dml_framework_fixture(n_rep, n_thetas):
     dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2)
     dml_framework_obj_sub_obj = dml_framework_obj - dml_framework_obj_2
     ci_sub_obj = dml_framework_obj_sub_obj.confint(joint=False, level=0.95)
-    dml_framework_obj_sub_obj.bootstrap(method='normal')
+    dml_framework_obj_sub_obj.bootstrap(method="normal")
     ci_joint_sub_obj = dml_framework_obj_sub_obj.confint(joint=True, level=0.95)
 
     # multiply objects
     dml_framework_obj_mul_obj = dml_framework_obj * 2
     ci_mul_obj = dml_framework_obj_mul_obj.confint(joint=False, level=0.95)
-    dml_framework_obj_mul_obj.bootstrap(method='normal')
+    dml_framework_obj_mul_obj.bootstrap(method="normal")
     ci_joint_mul_obj = dml_framework_obj_mul_obj.confint(joint=True, level=0.95)
 
     # concat objects
     dml_framework_obj_concat = concat([dml_framework_obj, dml_framework_obj])
     ci_concat = dml_framework_obj_concat.confint(joint=False, level=0.95)
-    dml_framework_obj_concat.bootstrap(method='normal')
+    dml_framework_obj_concat.bootstrap(method="normal")
     ci_joint_concat = dml_framework_obj_concat.confint(joint=True, level=0.95)
 
     result_dict = {
-        'dml_dict': doubleml_dict,
-        'dml_dict_2': doubleml_dict_2,
-        'dml_framework_obj': dml_framework_obj,
-        'dml_framework_obj_add_obj': dml_framework_obj_add_obj,
-        'dml_framework_obj_sub_obj': dml_framework_obj_sub_obj,
-        'dml_framework_obj_mul_obj': dml_framework_obj_mul_obj,
-        'dml_framework_obj_concat': dml_framework_obj_concat,
-        'ci': ci,
-        'ci_add_obj': ci_add_obj,
-        'ci_sub_obj': ci_sub_obj,
-        'ci_mul_obj': ci_mul_obj,
-        'ci_concat': ci_concat,
-        'ci_joint': ci_joint,
-        'ci_joint_add_obj': ci_joint_add_obj,
-        'ci_joint_sub_obj': ci_joint_sub_obj,
-        'ci_joint_mul_obj': ci_joint_mul_obj,
-        'ci_joint_concat': ci_joint_concat,
+        "dml_dict": doubleml_dict,
+        "dml_dict_2": doubleml_dict_2,
+        "dml_framework_obj": dml_framework_obj,
+        "dml_framework_obj_add_obj": dml_framework_obj_add_obj,
+        "dml_framework_obj_sub_obj": dml_framework_obj_sub_obj,
+        "dml_framework_obj_mul_obj": dml_framework_obj_mul_obj,
+        "dml_framework_obj_concat": dml_framework_obj_concat,
+        "ci": ci,
+        "ci_add_obj": ci_add_obj,
+        "ci_sub_obj": ci_sub_obj,
+        "ci_mul_obj": ci_mul_obj,
+        "ci_concat": ci_concat,
+        "ci_joint": ci_joint,
+        "ci_joint_add_obj": ci_joint_add_obj,
+        "ci_joint_sub_obj": ci_joint_sub_obj,
+        "ci_joint_mul_obj": ci_joint_mul_obj,
+        "ci_joint_concat": ci_joint_concat,
     }
     return result_dict
 
 
 @pytest.mark.ci
 def test_dml_framework_theta(dml_framework_fixture):
+    assert np.allclose(dml_framework_fixture["dml_framework_obj"].all_thetas, dml_framework_fixture["dml_dict"]["all_thetas"])
     assert np.allclose(
-        dml_framework_fixture['dml_framework_obj'].all_thetas,
-        dml_framework_fixture['dml_dict']['all_thetas']
+        dml_framework_fixture["dml_framework_obj_add_obj"].all_thetas,
+        dml_framework_fixture["dml_dict"]["all_thetas"] + dml_framework_fixture["dml_dict"]["all_thetas"],
     )
     assert np.allclose(
-        dml_framework_fixture['dml_framework_obj_add_obj'].all_thetas,
-        dml_framework_fixture['dml_dict']['all_thetas'] + dml_framework_fixture['dml_dict']['all_thetas']
+        dml_framework_fixture["dml_framework_obj_sub_obj"].all_thetas,
+        dml_framework_fixture["dml_dict"]["all_thetas"] - dml_framework_fixture["dml_dict_2"]["all_thetas"],
     )
     assert np.allclose(
-        dml_framework_fixture['dml_framework_obj_sub_obj'].all_thetas,
-        dml_framework_fixture['dml_dict']['all_thetas'] - dml_framework_fixture['dml_dict_2']['all_thetas']
+        dml_framework_fixture["dml_framework_obj_mul_obj"].all_thetas, 2 * dml_framework_fixture["dml_dict"]["all_thetas"]
     )
     assert np.allclose(
-        dml_framework_fixture['dml_framework_obj_mul_obj'].all_thetas,
-        2*dml_framework_fixture['dml_dict']['all_thetas']
-    )
-    assert np.allclose(
-        dml_framework_fixture['dml_framework_obj_concat'].all_thetas,
-        np.vstack((dml_framework_fixture['dml_dict']['all_thetas'], dml_framework_fixture['dml_dict']['all_thetas']))
+        dml_framework_fixture["dml_framework_obj_concat"].all_thetas,
+        np.vstack((dml_framework_fixture["dml_dict"]["all_thetas"], dml_framework_fixture["dml_dict"]["all_thetas"])),
     )
 
 
 @pytest.mark.ci
 def test_dml_framework_se(dml_framework_fixture):
-    assert np.allclose(
-        dml_framework_fixture['dml_framework_obj'].all_ses,
-        dml_framework_fixture['dml_dict']['all_ses']
-    )
-    scaling = dml_framework_fixture['dml_dict']['var_scaling_factors'].reshape(-1, 1)
+    assert np.allclose(dml_framework_fixture["dml_framework_obj"].all_ses, dml_framework_fixture["dml_dict"]["all_ses"])
+    scaling = dml_framework_fixture["dml_dict"]["var_scaling_factors"].reshape(-1, 1)
     add_var = np.mean(
-        np.square(dml_framework_fixture['dml_dict']['scaled_psi'] + dml_framework_fixture['dml_dict']['scaled_psi']),
-        axis=0)
-    assert np.allclose(
-        dml_framework_fixture['dml_framework_obj_add_obj'].all_ses,
-        np.sqrt(add_var / scaling)
+        np.square(dml_framework_fixture["dml_dict"]["scaled_psi"] + dml_framework_fixture["dml_dict"]["scaled_psi"]), axis=0
     )
-    scaling = dml_framework_fixture['dml_dict']['var_scaling_factors'].reshape(-1, 1)
+    assert np.allclose(dml_framework_fixture["dml_framework_obj_add_obj"].all_ses, np.sqrt(add_var / scaling))
+    scaling = dml_framework_fixture["dml_dict"]["var_scaling_factors"].reshape(-1, 1)
     sub_var = np.mean(
-        np.square(dml_framework_fixture['dml_dict']['scaled_psi'] - dml_framework_fixture['dml_dict_2']['scaled_psi']),
-        axis=0)
-    assert np.allclose(
-        dml_framework_fixture['dml_framework_obj_sub_obj'].all_ses,
-        np.sqrt(sub_var / scaling)
+        np.square(dml_framework_fixture["dml_dict"]["scaled_psi"] - dml_framework_fixture["dml_dict_2"]["scaled_psi"]), axis=0
     )
+    assert np.allclose(dml_framework_fixture["dml_framework_obj_sub_obj"].all_ses, np.sqrt(sub_var / scaling))
     assert np.allclose(
-        dml_framework_fixture['dml_framework_obj_mul_obj'].all_ses,
-        2*dml_framework_fixture['dml_dict']['all_ses']
+        dml_framework_fixture["dml_framework_obj_mul_obj"].all_ses, 2 * dml_framework_fixture["dml_dict"]["all_ses"]
     )
     assert np.allclose(
-        dml_framework_fixture['dml_framework_obj_concat'].all_ses,
-        np.vstack((dml_framework_fixture['dml_dict']['all_ses'], dml_framework_fixture['dml_dict']['all_ses']))
+        dml_framework_fixture["dml_framework_obj_concat"].all_ses,
+        np.vstack((dml_framework_fixture["dml_dict"]["all_ses"], dml_framework_fixture["dml_dict"]["all_ses"])),
     )
 
 
 @pytest.mark.ci
 def test_dml_framework_ci(dml_framework_fixture):
-    assert isinstance(dml_framework_fixture['ci'], pd.DataFrame)
-    assert isinstance(dml_framework_fixture['ci_joint'], pd.DataFrame)
-    assert isinstance(dml_framework_fixture['ci_add_obj'], pd.DataFrame)
-    assert isinstance(dml_framework_fixture['ci_joint_add_obj'], pd.DataFrame)
-    assert isinstance(dml_framework_fixture['ci_sub_obj'], pd.DataFrame)
-    assert isinstance(dml_framework_fixture['ci_joint_sub_obj'], pd.DataFrame)
-    assert isinstance(dml_framework_fixture['ci_mul_obj'], pd.DataFrame)
-    assert isinstance(dml_framework_fixture['ci_joint_mul_obj'], pd.DataFrame)
-    assert isinstance(dml_framework_fixture['ci_concat'], pd.DataFrame)
-    assert isinstance(dml_framework_fixture['ci_joint_concat'], pd.DataFrame)
-
-
-@pytest.fixture(scope='module')
+    assert isinstance(dml_framework_fixture["ci"], pd.DataFrame)
+    assert isinstance(dml_framework_fixture["ci_joint"], pd.DataFrame)
+    assert isinstance(dml_framework_fixture["ci_add_obj"], pd.DataFrame)
+    assert isinstance(dml_framework_fixture["ci_joint_add_obj"], pd.DataFrame)
+    assert isinstance(dml_framework_fixture["ci_sub_obj"], pd.DataFrame)
+    assert isinstance(dml_framework_fixture["ci_joint_sub_obj"], pd.DataFrame)
+    assert isinstance(dml_framework_fixture["ci_mul_obj"], pd.DataFrame)
+    assert isinstance(dml_framework_fixture["ci_joint_mul_obj"], pd.DataFrame)
+    assert isinstance(dml_framework_fixture["ci_concat"], pd.DataFrame)
+    assert isinstance(dml_framework_fixture["ci_joint_concat"], pd.DataFrame)
+
+
+@pytest.fixture(scope="module")
 def dml_framework_from_doubleml_fixture(n_rep):
     dml_data = make_irm_data()
 
@@ -168,13 +152,13 @@ def dml_framework_from_doubleml_fixture(n_rep):
     dml_framework_obj = dml_irm_obj.construct_framework()
 
     ci = dml_framework_obj.confint(joint=False, level=0.95)
-    dml_framework_obj.bootstrap(method='normal')
+    dml_framework_obj.bootstrap(method="normal")
     ci_joint = dml_framework_obj.confint(joint=True, level=0.95)
 
     # add objects
     dml_framework_obj_add_obj = dml_framework_obj + dml_framework_obj
     ci_add_obj = dml_framework_obj_add_obj.confint(joint=False, level=0.95)
-    dml_framework_obj_add_obj.bootstrap(method='normal')
+    dml_framework_obj_add_obj.bootstrap(method="normal")
     ci_joint_add_obj = dml_framework_obj_add_obj.confint(joint=True, level=0.95)
 
     # substract objects
@@ -185,40 +169,40 @@ def dml_framework_from_doubleml_fixture(n_rep):
 
     dml_framework_obj_sub_obj = dml_framework_obj - dml_framework_obj_2
     ci_sub_obj = dml_framework_obj_sub_obj.confint(joint=False, level=0.95)
-    dml_framework_obj_sub_obj.bootstrap(method='normal')
+    dml_framework_obj_sub_obj.bootstrap(method="normal")
     ci_joint_sub_obj = dml_framework_obj_sub_obj.confint(joint=True, level=0.95)
 
     # multiply objects
     dml_framework_obj_mul_obj = dml_framework_obj * 2
     ci_mul_obj = dml_framework_obj_mul_obj.confint(joint=False, level=0.95)
-    dml_framework_obj_mul_obj.bootstrap(method='normal')
+    dml_framework_obj_mul_obj.bootstrap(method="normal")
     ci_joint_mul_obj = dml_framework_obj_mul_obj.confint(joint=True, level=0.95)
 
     # concat objects
     dml_framework_obj_concat = concat([dml_framework_obj, dml_framework_obj])
     ci_concat = dml_framework_obj_concat.confint(joint=False, level=0.95)
-    dml_framework_obj_concat.bootstrap(method='normal')
+    dml_framework_obj_concat.bootstrap(method="normal")
     ci_joint_concat = dml_framework_obj_concat.confint(joint=True, level=0.95)
 
     result_dict = {
-        'dml_obj': dml_irm_obj,
-        'dml_obj_2': dml_irm_obj_2,
-        'dml_framework_obj': dml_framework_obj,
-        'dml_framework_obj_add_obj': dml_framework_obj_add_obj,
-        'dml_framework_obj_sub_obj': dml_framework_obj_sub_obj,
-        'dml_framework_obj_mul_obj': dml_framework_obj_mul_obj,
-        'dml_framework_obj_concat': dml_framework_obj_concat,
-        'ci': ci,
-        'ci_add_obj': ci_add_obj,
-        'ci_sub_obj': ci_sub_obj,
-        'ci_mul_obj': ci_mul_obj,
-        'ci_concat': ci_concat,
-        'ci_joint': ci_joint,
-        'ci_joint_add_obj': ci_joint_add_obj,
-        'ci_joint_sub_obj': ci_joint_sub_obj,
-        'ci_joint_mul_obj': ci_joint_mul_obj,
-        'ci_joint_concat': ci_joint_concat,
-        'n_rep': n_rep,
+        "dml_obj": dml_irm_obj,
+        "dml_obj_2": dml_irm_obj_2,
+        "dml_framework_obj": dml_framework_obj,
+        "dml_framework_obj_add_obj": dml_framework_obj_add_obj,
+        "dml_framework_obj_sub_obj": dml_framework_obj_sub_obj,
+        "dml_framework_obj_mul_obj": dml_framework_obj_mul_obj,
+        "dml_framework_obj_concat": dml_framework_obj_concat,
+        "ci": ci,
+        "ci_add_obj": ci_add_obj,
+        "ci_sub_obj": ci_sub_obj,
+        "ci_mul_obj": ci_mul_obj,
+        "ci_concat": ci_concat,
+        "ci_joint": ci_joint,
+        "ci_joint_add_obj": ci_joint_add_obj,
+        "ci_joint_sub_obj": ci_joint_sub_obj,
+        "ci_joint_mul_obj": ci_joint_mul_obj,
+        "ci_joint_concat": ci_joint_concat,
+        "n_rep": n_rep,
     }
     return result_dict
 
@@ -226,71 +210,73 @@ def dml_framework_from_doubleml_fixture(n_rep):
 @pytest.mark.ci
 def test_dml_framework_from_doubleml_theta(dml_framework_from_doubleml_fixture):
     assert np.allclose(
-        dml_framework_from_doubleml_fixture['dml_framework_obj'].all_thetas,
-        dml_framework_from_doubleml_fixture['dml_obj'].all_coef
+        dml_framework_from_doubleml_fixture["dml_framework_obj"].all_thetas,
+        dml_framework_from_doubleml_fixture["dml_obj"].all_coef,
     )
     assert np.allclose(
-        dml_framework_from_doubleml_fixture['dml_framework_obj_add_obj'].all_thetas,
-        dml_framework_from_doubleml_fixture['dml_obj'].all_coef + dml_framework_from_doubleml_fixture['dml_obj'].all_coef
+        dml_framework_from_doubleml_fixture["dml_framework_obj_add_obj"].all_thetas,
+        dml_framework_from_doubleml_fixture["dml_obj"].all_coef + dml_framework_from_doubleml_fixture["dml_obj"].all_coef,
     )
     assert np.allclose(
-        dml_framework_from_doubleml_fixture['dml_framework_obj_sub_obj'].all_thetas,
-        dml_framework_from_doubleml_fixture['dml_obj'].all_coef - dml_framework_from_doubleml_fixture['dml_obj_2'].all_coef
+        dml_framework_from_doubleml_fixture["dml_framework_obj_sub_obj"].all_thetas,
+        dml_framework_from_doubleml_fixture["dml_obj"].all_coef - dml_framework_from_doubleml_fixture["dml_obj_2"].all_coef,
     )
     assert np.allclose(
-        dml_framework_from_doubleml_fixture['dml_framework_obj_mul_obj'].all_thetas,
-        2*dml_framework_from_doubleml_fixture['dml_obj'].all_coef
+        dml_framework_from_doubleml_fixture["dml_framework_obj_mul_obj"].all_thetas,
+        2 * dml_framework_from_doubleml_fixture["dml_obj"].all_coef,
     )
     assert np.allclose(
-        dml_framework_from_doubleml_fixture['dml_framework_obj_concat'].all_thetas,
-        np.vstack((dml_framework_from_doubleml_fixture['dml_obj'].all_coef,
-                   dml_framework_from_doubleml_fixture['dml_obj'].all_coef))
+        dml_framework_from_doubleml_fixture["dml_framework_obj_concat"].all_thetas,
+        np.vstack(
+            (dml_framework_from_doubleml_fixture["dml_obj"].all_coef, dml_framework_from_doubleml_fixture["dml_obj"].all_coef)
+        ),
     )
 
 
 @pytest.mark.ci
 def test_dml_framework_from_doubleml_se(dml_framework_from_doubleml_fixture):
     assert np.allclose(
-        dml_framework_from_doubleml_fixture['dml_framework_obj'].all_ses,
-        dml_framework_from_doubleml_fixture['dml_obj'].all_se
+        dml_framework_from_doubleml_fixture["dml_framework_obj"].all_ses, dml_framework_from_doubleml_fixture["dml_obj"].all_se
     )
     assert np.allclose(
-        dml_framework_from_doubleml_fixture['dml_framework_obj_add_obj'].all_ses,
-        2*dml_framework_from_doubleml_fixture['dml_obj'].all_se
+        dml_framework_from_doubleml_fixture["dml_framework_obj_add_obj"].all_ses,
+        2 * dml_framework_from_doubleml_fixture["dml_obj"].all_se,
     )
 
-    if dml_framework_from_doubleml_fixture['n_rep'] == 1:
+    if dml_framework_from_doubleml_fixture["n_rep"] == 1:
         # formula only valid for n_rep = 1
-        scaling = np.array([dml_framework_from_doubleml_fixture['dml_obj']._var_scaling_factors]).reshape(-1, 1)
+        scaling = np.array([dml_framework_from_doubleml_fixture["dml_obj"]._var_scaling_factors]).reshape(-1, 1)
         sub_var = np.mean(
-            np.square(dml_framework_from_doubleml_fixture['dml_obj'].psi
-                      - dml_framework_from_doubleml_fixture['dml_obj_2'].psi),
-            axis=0)
+            np.square(
+                dml_framework_from_doubleml_fixture["dml_obj"].psi - dml_framework_from_doubleml_fixture["dml_obj_2"].psi
+            ),
+            axis=0,
+        )
         assert np.allclose(
-            dml_framework_from_doubleml_fixture['dml_framework_obj_sub_obj'].all_ses,
-            np.sqrt(sub_var / scaling)
+            dml_framework_from_doubleml_fixture["dml_framework_obj_sub_obj"].all_ses, np.sqrt(sub_var / scaling)
         )
 
     assert np.allclose(
-        dml_framework_from_doubleml_fixture['dml_framework_obj_mul_obj'].all_ses,
-        2*dml_framework_from_doubleml_fixture['dml_obj'].all_se
+        dml_framework_from_doubleml_fixture["dml_framework_obj_mul_obj"].all_ses,
+        2 * dml_framework_from_doubleml_fixture["dml_obj"].all_se,
     )
     assert np.allclose(
-        dml_framework_from_doubleml_fixture['dml_framework_obj_concat'].all_ses,
-        np.vstack((dml_framework_from_doubleml_fixture['dml_obj'].all_se,
-                   dml_framework_from_doubleml_fixture['dml_obj'].all_se))
+        dml_framework_from_doubleml_fixture["dml_framework_obj_concat"].all_ses,
+        np.vstack(
+            (dml_framework_from_doubleml_fixture["dml_obj"].all_se, dml_framework_from_doubleml_fixture["dml_obj"].all_se)
+        ),
     )
 
 
 @pytest.mark.ci
 def test_dml_framework_from_doubleml_ci(dml_framework_from_doubleml_fixture):
-    assert isinstance(dml_framework_from_doubleml_fixture['ci'], pd.DataFrame)
-    assert isinstance(dml_framework_from_doubleml_fixture['ci_joint'], pd.DataFrame)
-    assert isinstance(dml_framework_from_doubleml_fixture['ci_add_obj'], pd.DataFrame)
-    assert isinstance(dml_framework_from_doubleml_fixture['ci_joint_add_obj'], pd.DataFrame)
-    assert isinstance(dml_framework_from_doubleml_fixture['ci_sub_obj'], pd.DataFrame)
-    assert isinstance(dml_framework_from_doubleml_fixture['ci_joint_sub_obj'], pd.DataFrame)
-    assert isinstance(dml_framework_from_doubleml_fixture['ci_mul_obj'], pd.DataFrame)
-    assert isinstance(dml_framework_from_doubleml_fixture['ci_joint_mul_obj'], pd.DataFrame)
-    assert isinstance(dml_framework_from_doubleml_fixture['ci_concat'], pd.DataFrame)
-    assert isinstance(dml_framework_from_doubleml_fixture['ci_joint_concat'], pd.DataFrame)
+    assert isinstance(dml_framework_from_doubleml_fixture["ci"], pd.DataFrame)
+    assert isinstance(dml_framework_from_doubleml_fixture["ci_joint"], pd.DataFrame)
+    assert isinstance(dml_framework_from_doubleml_fixture["ci_add_obj"], pd.DataFrame)
+    assert isinstance(dml_framework_from_doubleml_fixture["ci_joint_add_obj"], pd.DataFrame)
+    assert isinstance(dml_framework_from_doubleml_fixture["ci_sub_obj"], pd.DataFrame)
+    assert isinstance(dml_framework_from_doubleml_fixture["ci_joint_sub_obj"], pd.DataFrame)
+    assert isinstance(dml_framework_from_doubleml_fixture["ci_mul_obj"], pd.DataFrame)
+    assert isinstance(dml_framework_from_doubleml_fixture["ci_joint_mul_obj"], pd.DataFrame)
+    assert isinstance(dml_framework_from_doubleml_fixture["ci_concat"], pd.DataFrame)
+    assert isinstance(dml_framework_from_doubleml_fixture["ci_joint_concat"], pd.DataFrame)
diff --git a/doubleml/tests/test_framework_coverage.py b/doubleml/tests/test_framework_coverage.py
index 3fe0b6498..03625cef2 100644
--- a/doubleml/tests/test_framework_coverage.py
+++ b/doubleml/tests/test_framework_coverage.py
@@ -1,23 +1,22 @@
-import pytest
 import numpy as np
+import pytest
 
 from doubleml.double_ml_framework import DoubleMLFramework, concat
+
 from ._utils import generate_dml_dict
 
 
-@pytest.fixture(scope='module',
-                params=[1, 3])
+@pytest.fixture(scope="module", params=[1, 3])
 def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 5])
+@pytest.fixture(scope="module", params=[1, 5])
 def n_thetas(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def test_dml_framework_coverage_fixture(n_rep, n_thetas):
     np.random.seed(42)
     R = 1000
@@ -41,8 +40,8 @@ def test_dml_framework_coverage_fixture(n_rep, n_thetas):
     coverage_joint_mul_obj = np.zeros((R, 1))
     coverage_all_cis_joint_mul_obj = np.zeros((R, 1, n_rep))
 
-    coverage_concat = np.zeros((R, 2*n_thetas))
-    coverage_all_cis_concat = np.zeros((R, 2*n_thetas, n_rep))
+    coverage_concat = np.zeros((R, 2 * n_thetas))
+    coverage_all_cis_concat = np.zeros((R, 2 * n_thetas, n_rep))
     coverage_joint_concat = np.zeros((R, 1))
     coverage_all_cis_joint_concat = np.zeros((R, 1, n_rep))
     for r in range(R):
@@ -62,163 +61,193 @@ def test_dml_framework_coverage_fixture(n_rep, n_thetas):
 
         true_thetas = np.vstack((np.repeat(0.0, n_thetas), np.repeat(-1.0, n_thetas))).transpose()
         ci = dml_framework_obj_1.confint(joint=False, level=0.95)
-        coverage[r, :] = (true_thetas[:, 0] >= ci['2.5 %'].to_numpy()) & (true_thetas[:, 0] <= ci['97.5 %'].to_numpy())
-        coverage_all_cis[r, :, :] = (true_thetas[:, 0].reshape(-1, 1) >= dml_framework_obj_1._all_cis[:, 0, :]) & \
-            (true_thetas[:, 0].reshape(-1, 1) <= dml_framework_obj_1._all_cis[:, 1, :])
+        coverage[r, :] = (true_thetas[:, 0] >= ci["2.5 %"].to_numpy()) & (true_thetas[:, 0] <= ci["97.5 %"].to_numpy())
+        coverage_all_cis[r, :, :] = (true_thetas[:, 0].reshape(-1, 1) >= dml_framework_obj_1._all_cis[:, 0, :]) & (
+            true_thetas[:, 0].reshape(-1, 1) <= dml_framework_obj_1._all_cis[:, 1, :]
+        )
 
         # joint confidence interval
-        dml_framework_obj_1.bootstrap(method='normal')
+        dml_framework_obj_1.bootstrap(method="normal")
         ci_joint = dml_framework_obj_1.confint(joint=True, level=0.95)
         coverage_joint[r, :] = np.all(
-            (true_thetas[:, 0] >= ci_joint['2.5 %'].to_numpy()) &
-            (true_thetas[:, 0] <= ci_joint['97.5 %'].to_numpy()))
+            (true_thetas[:, 0] >= ci_joint["2.5 %"].to_numpy()) & (true_thetas[:, 0] <= ci_joint["97.5 %"].to_numpy())
+        )
         coverage_all_cis_joint[r, :, :] = np.all(
-            (true_thetas[:, 0].reshape(-1, 1) >= dml_framework_obj_1._all_cis[:, 0, :]) &
-            (true_thetas[:, 0].reshape(-1, 1) <= dml_framework_obj_1._all_cis[:, 1, :]),
-            axis=0)
+            (true_thetas[:, 0].reshape(-1, 1) >= dml_framework_obj_1._all_cis[:, 0, :])
+            & (true_thetas[:, 0].reshape(-1, 1) <= dml_framework_obj_1._all_cis[:, 1, :]),
+            axis=0,
+        )
 
         # add objects
         dml_framework_obj_add_obj = dml_framework_obj_1 + dml_framework_obj_2
         true_thetas_add_obj = np.sum(true_thetas, axis=1)
         ci_add_obj = dml_framework_obj_add_obj.confint(joint=False, level=0.95)
-        coverage_add_obj[r, :] = (
-            (true_thetas_add_obj >= ci_add_obj['2.5 %'].to_numpy()) &
-            (true_thetas_add_obj <= ci_add_obj['97.5 %'].to_numpy()))
+        coverage_add_obj[r, :] = (true_thetas_add_obj >= ci_add_obj["2.5 %"].to_numpy()) & (
+            true_thetas_add_obj <= ci_add_obj["97.5 %"].to_numpy()
+        )
         coverage_all_cis_add_obj[r, :, :] = (
-            (true_thetas_add_obj.reshape(-1, 1) >= dml_framework_obj_add_obj._all_cis[:, 0, :]) &
-            (true_thetas_add_obj.reshape(-1, 1) <= dml_framework_obj_add_obj._all_cis[:, 1, :]))
+            true_thetas_add_obj.reshape(-1, 1) >= dml_framework_obj_add_obj._all_cis[:, 0, :]
+        ) & (true_thetas_add_obj.reshape(-1, 1) <= dml_framework_obj_add_obj._all_cis[:, 1, :])
 
-        dml_framework_obj_add_obj.bootstrap(method='normal')
+        dml_framework_obj_add_obj.bootstrap(method="normal")
         ci_joint_add_obj = dml_framework_obj_add_obj.confint(joint=True, level=0.95)
         coverage_joint_add_obj[r, :] = np.all(
-            (true_thetas_add_obj >= ci_joint_add_obj['2.5 %'].to_numpy()) &
-            (true_thetas_add_obj <= ci_joint_add_obj['97.5 %'].to_numpy()))
+            (true_thetas_add_obj >= ci_joint_add_obj["2.5 %"].to_numpy())
+            & (true_thetas_add_obj <= ci_joint_add_obj["97.5 %"].to_numpy())
+        )
         coverage_all_cis_joint_add_obj[r, :, :] = np.all(
-            (true_thetas_add_obj.reshape(-1, 1) >= dml_framework_obj_add_obj._all_cis[:, 0, :]) &
-            (true_thetas_add_obj.reshape(-1, 1) <= dml_framework_obj_add_obj._all_cis[:, 1, :]),
-            axis=0)
+            (true_thetas_add_obj.reshape(-1, 1) >= dml_framework_obj_add_obj._all_cis[:, 0, :])
+            & (true_thetas_add_obj.reshape(-1, 1) <= dml_framework_obj_add_obj._all_cis[:, 1, :]),
+            axis=0,
+        )
 
         # substract objects
         dml_framework_obj_sub_obj = dml_framework_obj_1 - dml_framework_obj_2
         true_thetas_sub_obj = true_thetas[:, 0] - true_thetas[:, 1]
         ci_sub_obj = dml_framework_obj_sub_obj.confint(joint=False, level=0.95)
-        coverage_sub_obj[r, :] = (
-            (true_thetas_sub_obj >= ci_sub_obj['2.5 %'].to_numpy()) &
-            (true_thetas_sub_obj <= ci_sub_obj['97.5 %'].to_numpy()))
+        coverage_sub_obj[r, :] = (true_thetas_sub_obj >= ci_sub_obj["2.5 %"].to_numpy()) & (
+            true_thetas_sub_obj <= ci_sub_obj["97.5 %"].to_numpy()
+        )
         coverage_all_cis_sub_obj[r, :, :] = (
-            (true_thetas_sub_obj.reshape(-1, 1) >= dml_framework_obj_sub_obj._all_cis[:, 0, :]) &
-            (true_thetas_sub_obj.reshape(-1, 1) <= dml_framework_obj_sub_obj._all_cis[:, 1, :]))
+            true_thetas_sub_obj.reshape(-1, 1) >= dml_framework_obj_sub_obj._all_cis[:, 0, :]
+        ) & (true_thetas_sub_obj.reshape(-1, 1) <= dml_framework_obj_sub_obj._all_cis[:, 1, :])
 
-        dml_framework_obj_sub_obj.bootstrap(method='normal')
+        dml_framework_obj_sub_obj.bootstrap(method="normal")
         ci_joint_sub_obj = dml_framework_obj_sub_obj.confint(joint=True, level=0.95)
         coverage_joint_sub_obj[r, :] = np.all(
-            (true_thetas_sub_obj >= ci_joint_sub_obj['2.5 %'].to_numpy()) &
-            (true_thetas_sub_obj <= ci_joint_sub_obj['97.5 %'].to_numpy()))
+            (true_thetas_sub_obj >= ci_joint_sub_obj["2.5 %"].to_numpy())
+            & (true_thetas_sub_obj <= ci_joint_sub_obj["97.5 %"].to_numpy())
+        )
         coverage_all_cis_joint_sub_obj[r, :, :] = np.all(
-            (true_thetas_sub_obj.reshape(-1, 1) >= dml_framework_obj_sub_obj._all_cis[:, 0, :]) &
-            (true_thetas_sub_obj.reshape(-1, 1) <= dml_framework_obj_sub_obj._all_cis[:, 1, :]),
-            axis=0)
+            (true_thetas_sub_obj.reshape(-1, 1) >= dml_framework_obj_sub_obj._all_cis[:, 0, :])
+            & (true_thetas_sub_obj.reshape(-1, 1) <= dml_framework_obj_sub_obj._all_cis[:, 1, :]),
+            axis=0,
+        )
 
         # multiply objects
         dml_framework_obj_mul_obj = dml_framework_obj_2 * 2
         true_thetas_mul_obj = 2 * true_thetas[:, 1]
         ci_mul_obj = dml_framework_obj_mul_obj.confint(joint=False, level=0.95)
-        coverage_mul_obj[r, :] = (
-            (true_thetas_mul_obj >= ci_mul_obj['2.5 %'].to_numpy()) &
-            (true_thetas_mul_obj <= ci_mul_obj['97.5 %'].to_numpy()))
+        coverage_mul_obj[r, :] = (true_thetas_mul_obj >= ci_mul_obj["2.5 %"].to_numpy()) & (
+            true_thetas_mul_obj <= ci_mul_obj["97.5 %"].to_numpy()
+        )
         coverage_all_cis_mul_obj[r, :, :] = (
-            (true_thetas_mul_obj.reshape(-1, 1) >= dml_framework_obj_mul_obj._all_cis[:, 0, :]) &
-            (true_thetas_mul_obj.reshape(-1, 1) <= dml_framework_obj_mul_obj._all_cis[:, 1, :]))
+            true_thetas_mul_obj.reshape(-1, 1) >= dml_framework_obj_mul_obj._all_cis[:, 0, :]
+        ) & (true_thetas_mul_obj.reshape(-1, 1) <= dml_framework_obj_mul_obj._all_cis[:, 1, :])
 
-        dml_framework_obj_mul_obj.bootstrap(method='normal')
+        dml_framework_obj_mul_obj.bootstrap(method="normal")
         ci_joint_mul_obj = dml_framework_obj_mul_obj.confint(joint=True, level=0.95)
         coverage_joint_mul_obj[r, :] = np.all(
-            (true_thetas_mul_obj >= ci_joint_mul_obj['2.5 %'].to_numpy()) &
-            (true_thetas_mul_obj <= ci_joint_mul_obj['97.5 %'].to_numpy()))
+            (true_thetas_mul_obj >= ci_joint_mul_obj["2.5 %"].to_numpy())
+            & (true_thetas_mul_obj <= ci_joint_mul_obj["97.5 %"].to_numpy())
+        )
         coverage_all_cis_joint_mul_obj[r, :, :] = np.all(
-            (true_thetas_mul_obj.reshape(-1, 1) >= dml_framework_obj_mul_obj._all_cis[:, 0, :]) &
-            (true_thetas_mul_obj.reshape(-1, 1) <= dml_framework_obj_mul_obj._all_cis[:, 1, :]),
-            axis=0)
+            (true_thetas_mul_obj.reshape(-1, 1) >= dml_framework_obj_mul_obj._all_cis[:, 0, :])
+            & (true_thetas_mul_obj.reshape(-1, 1) <= dml_framework_obj_mul_obj._all_cis[:, 1, :]),
+            axis=0,
+        )
 
         # concat objects
         dml_framework_obj_concat = concat([dml_framework_obj_1, dml_framework_obj_2])
-        true_thetas_concat = true_thetas.reshape(-1, order='F')
+        true_thetas_concat = true_thetas.reshape(-1, order="F")
         ci_concat = dml_framework_obj_concat.confint(joint=False, level=0.95)
-        coverage_concat[r, :] = (
-            (true_thetas_concat >= ci_concat['2.5 %'].to_numpy()) &
-            (true_thetas_concat <= ci_concat['97.5 %'].to_numpy()))
+        coverage_concat[r, :] = (true_thetas_concat >= ci_concat["2.5 %"].to_numpy()) & (
+            true_thetas_concat <= ci_concat["97.5 %"].to_numpy()
+        )
         coverage_all_cis_concat[r, :, :] = (
-            (true_thetas_concat.reshape(-1, 1) >= dml_framework_obj_concat._all_cis[:, 0, :]) &
-            (true_thetas_concat.reshape(-1, 1) <= dml_framework_obj_concat._all_cis[:, 1, :]))
+            true_thetas_concat.reshape(-1, 1) >= dml_framework_obj_concat._all_cis[:, 0, :]
+        ) & (true_thetas_concat.reshape(-1, 1) <= dml_framework_obj_concat._all_cis[:, 1, :])
 
-        dml_framework_obj_concat.bootstrap(method='normal')
+        dml_framework_obj_concat.bootstrap(method="normal")
         ci_joint_concat = dml_framework_obj_concat.confint(joint=True, level=0.95)
         coverage_joint_concat[r, :] = np.all(
-            (true_thetas_concat >= ci_joint_concat['2.5 %'].to_numpy()) &
-            (true_thetas_concat <= ci_joint_concat['97.5 %'].to_numpy()))
+            (true_thetas_concat >= ci_joint_concat["2.5 %"].to_numpy())
+            & (true_thetas_concat <= ci_joint_concat["97.5 %"].to_numpy())
+        )
         coverage_all_cis_joint_concat[r, :, :] = np.all(
-            (true_thetas_concat.reshape(-1, 1) >= dml_framework_obj_concat._all_cis[:, 0, :]) &
-            (true_thetas_concat.reshape(-1, 1) <= dml_framework_obj_concat._all_cis[:, 1, :]),
-            axis=0)
+            (true_thetas_concat.reshape(-1, 1) >= dml_framework_obj_concat._all_cis[:, 0, :])
+            & (true_thetas_concat.reshape(-1, 1) <= dml_framework_obj_concat._all_cis[:, 1, :]),
+            axis=0,
+        )
 
     result_dict = {
-        'coverage_rate': np.mean(coverage, axis=0),
-        'coverage_rate_all_cis': np.mean(coverage_all_cis, axis=0),
-        'coverage_rate_joint': np.mean(coverage_joint, axis=0),
-        'coverage_rate_all_cis_joint': np.mean(coverage_all_cis_joint, axis=0),
-        'coverage_rate_add_obj': np.mean(coverage_add_obj, axis=0),
-        'coverage_rate_all_cis_add_obj': np.mean(coverage_all_cis_add_obj, axis=0),
-        'coverage_rate_joint_add_obj': np.mean(coverage_joint_add_obj, axis=0),
-        'coverage_rate_all_cis_joint_add_obj': np.mean(coverage_all_cis_joint_add_obj, axis=0),
-        'coverage_rate_sub_obj': np.mean(coverage_sub_obj, axis=0),
-        'coverage_rate_all_cis_sub_obj': np.mean(coverage_all_cis_sub_obj, axis=0),
-        'coverage_rate_joint_sub_obj': np.mean(coverage_joint_sub_obj, axis=0),
-        'coverage_rate_all_cis_joint_sub_obj': np.mean(coverage_all_cis_joint_sub_obj, axis=0),
-        'coverage_rate_mul_obj': np.mean(coverage_mul_obj, axis=0),
-        'coverage_rate_all_cis_mul_obj': np.mean(coverage_all_cis_mul_obj, axis=0),
-        'coverage_rate_joint_mul_obj': np.mean(coverage_joint_mul_obj, axis=0),
-        'coverage_rate_all_cis_joint_mul_obj': np.mean(coverage_all_cis_joint_mul_obj, axis=0),
-        'coverage_rate_concat': np.mean(coverage_concat, axis=0),
-        'coverage_rate_all_cis_concat': np.mean(coverage_all_cis_concat, axis=0),
-        'coverage_rate_joint_concat': np.mean(coverage_joint_concat, axis=0),
-        'coverage_rate_all_cis_joint_concat': np.mean(coverage_all_cis_joint_concat, axis=0),
+        "coverage_rate": np.mean(coverage, axis=0),
+        "coverage_rate_all_cis": np.mean(coverage_all_cis, axis=0),
+        "coverage_rate_joint": np.mean(coverage_joint, axis=0),
+        "coverage_rate_all_cis_joint": np.mean(coverage_all_cis_joint, axis=0),
+        "coverage_rate_add_obj": np.mean(coverage_add_obj, axis=0),
+        "coverage_rate_all_cis_add_obj": np.mean(coverage_all_cis_add_obj, axis=0),
+        "coverage_rate_joint_add_obj": np.mean(coverage_joint_add_obj, axis=0),
+        "coverage_rate_all_cis_joint_add_obj": np.mean(coverage_all_cis_joint_add_obj, axis=0),
+        "coverage_rate_sub_obj": np.mean(coverage_sub_obj, axis=0),
+        "coverage_rate_all_cis_sub_obj": np.mean(coverage_all_cis_sub_obj, axis=0),
+        "coverage_rate_joint_sub_obj": np.mean(coverage_joint_sub_obj, axis=0),
+        "coverage_rate_all_cis_joint_sub_obj": np.mean(coverage_all_cis_joint_sub_obj, axis=0),
+        "coverage_rate_mul_obj": np.mean(coverage_mul_obj, axis=0),
+        "coverage_rate_all_cis_mul_obj": np.mean(coverage_all_cis_mul_obj, axis=0),
+        "coverage_rate_joint_mul_obj": np.mean(coverage_joint_mul_obj, axis=0),
+        "coverage_rate_all_cis_joint_mul_obj": np.mean(coverage_all_cis_joint_mul_obj, axis=0),
+        "coverage_rate_concat": np.mean(coverage_concat, axis=0),
+        "coverage_rate_all_cis_concat": np.mean(coverage_all_cis_concat, axis=0),
+        "coverage_rate_joint_concat": np.mean(coverage_joint_concat, axis=0),
+        "coverage_rate_all_cis_joint_concat": np.mean(coverage_all_cis_joint_concat, axis=0),
     }
     return result_dict
 
 
 @pytest.mark.ci
 def test_dml_framework_coverage(test_dml_framework_coverage_fixture):
-    assert np.all((test_dml_framework_coverage_fixture['coverage_rate'] >= 0.9))
-    assert np.all((test_dml_framework_coverage_fixture['coverage_rate_all_cis'] >= 0.9) &
-                  (test_dml_framework_coverage_fixture['coverage_rate_all_cis'] < 1.0))
-    assert np.all((test_dml_framework_coverage_fixture['coverage_rate_add_obj'] >= 0.9))
-    assert np.all((test_dml_framework_coverage_fixture['coverage_rate_all_cis_add_obj'] >= 0.9) &
-                  (test_dml_framework_coverage_fixture['coverage_rate_all_cis_add_obj'] < 1.0))
-    assert np.all((test_dml_framework_coverage_fixture['coverage_rate_sub_obj'] >= 0.9))
-    assert np.all((test_dml_framework_coverage_fixture['coverage_rate_all_cis_sub_obj'] >= 0.9) &
-                  (test_dml_framework_coverage_fixture['coverage_rate_all_cis_sub_obj'] < 1.0))
-    assert np.all((test_dml_framework_coverage_fixture['coverage_rate_mul_obj'] >= 0.9))
-    assert np.all((test_dml_framework_coverage_fixture['coverage_rate_all_cis_mul_obj'] >= 0.9) &
-                  (test_dml_framework_coverage_fixture['coverage_rate_all_cis_mul_obj'] < 1.0))
-    assert np.all((test_dml_framework_coverage_fixture['coverage_rate_concat'] >= 0.9))
-    assert np.all((test_dml_framework_coverage_fixture['coverage_rate_all_cis_concat'] >= 0.9) &
-                  (test_dml_framework_coverage_fixture['coverage_rate_all_cis_concat'] < 1.0))
+    assert np.all((test_dml_framework_coverage_fixture["coverage_rate"] >= 0.9))
+    assert np.all(
+        (test_dml_framework_coverage_fixture["coverage_rate_all_cis"] >= 0.9)
+        & (test_dml_framework_coverage_fixture["coverage_rate_all_cis"] < 1.0)
+    )
+    assert np.all((test_dml_framework_coverage_fixture["coverage_rate_add_obj"] >= 0.9))
+    assert np.all(
+        (test_dml_framework_coverage_fixture["coverage_rate_all_cis_add_obj"] >= 0.9)
+        & (test_dml_framework_coverage_fixture["coverage_rate_all_cis_add_obj"] < 1.0)
+    )
+    assert np.all((test_dml_framework_coverage_fixture["coverage_rate_sub_obj"] >= 0.9))
+    assert np.all(
+        (test_dml_framework_coverage_fixture["coverage_rate_all_cis_sub_obj"] >= 0.9)
+        & (test_dml_framework_coverage_fixture["coverage_rate_all_cis_sub_obj"] < 1.0)
+    )
+    assert np.all((test_dml_framework_coverage_fixture["coverage_rate_mul_obj"] >= 0.9))
+    assert np.all(
+        (test_dml_framework_coverage_fixture["coverage_rate_all_cis_mul_obj"] >= 0.9)
+        & (test_dml_framework_coverage_fixture["coverage_rate_all_cis_mul_obj"] < 1.0)
+    )
+    assert np.all((test_dml_framework_coverage_fixture["coverage_rate_concat"] >= 0.9))
+    assert np.all(
+        (test_dml_framework_coverage_fixture["coverage_rate_all_cis_concat"] >= 0.9)
+        & (test_dml_framework_coverage_fixture["coverage_rate_all_cis_concat"] < 1.0)
+    )
 
 
 @pytest.mark.ci
 def test_dml_framework_coverage_joint(test_dml_framework_coverage_fixture):
-    assert np.all((test_dml_framework_coverage_fixture['coverage_rate_joint'] >= 0.9))
-    assert np.all((test_dml_framework_coverage_fixture['coverage_rate_all_cis_joint'] >= 0.9) &
-                  (test_dml_framework_coverage_fixture['coverage_rate_all_cis_joint'] < 1.0))
-    assert np.all((test_dml_framework_coverage_fixture['coverage_rate_joint_add_obj'] >= 0.9))
-    assert np.all((test_dml_framework_coverage_fixture['coverage_rate_all_cis_joint_add_obj'] >= 0.9) &
-                  (test_dml_framework_coverage_fixture['coverage_rate_all_cis_joint_add_obj'] < 1.0))
-    assert np.all((test_dml_framework_coverage_fixture['coverage_rate_joint_sub_obj'] >= 0.9))
-    assert np.all((test_dml_framework_coverage_fixture['coverage_rate_all_cis_joint_sub_obj'] >= 0.9) &
-                  (test_dml_framework_coverage_fixture['coverage_rate_all_cis_joint_sub_obj'] < 1.0))
-    assert np.all((test_dml_framework_coverage_fixture['coverage_rate_joint_mul_obj'] >= 0.9))
-    assert np.all((test_dml_framework_coverage_fixture['coverage_rate_all_cis_joint_mul_obj'] >= 0.9) &
-                  (test_dml_framework_coverage_fixture['coverage_rate_all_cis_joint_mul_obj'] < 1.0))
-    assert np.all((test_dml_framework_coverage_fixture['coverage_rate_joint_concat'] >= 0.9))
-    assert np.all((test_dml_framework_coverage_fixture['coverage_rate_all_cis_joint_concat'] >= 0.9) &
-                  (test_dml_framework_coverage_fixture['coverage_rate_all_cis_joint_concat'] < 1.0))
+    assert np.all((test_dml_framework_coverage_fixture["coverage_rate_joint"] >= 0.9))
+    assert np.all(
+        (test_dml_framework_coverage_fixture["coverage_rate_all_cis_joint"] >= 0.9)
+        & (test_dml_framework_coverage_fixture["coverage_rate_all_cis_joint"] < 1.0)
+    )
+    assert np.all((test_dml_framework_coverage_fixture["coverage_rate_joint_add_obj"] >= 0.9))
+    assert np.all(
+        (test_dml_framework_coverage_fixture["coverage_rate_all_cis_joint_add_obj"] >= 0.9)
+        & (test_dml_framework_coverage_fixture["coverage_rate_all_cis_joint_add_obj"] < 1.0)
+    )
+    assert np.all((test_dml_framework_coverage_fixture["coverage_rate_joint_sub_obj"] >= 0.9))
+    assert np.all(
+        (test_dml_framework_coverage_fixture["coverage_rate_all_cis_joint_sub_obj"] >= 0.9)
+        & (test_dml_framework_coverage_fixture["coverage_rate_all_cis_joint_sub_obj"] < 1.0)
+    )
+    assert np.all((test_dml_framework_coverage_fixture["coverage_rate_joint_mul_obj"] >= 0.9))
+    assert np.all(
+        (test_dml_framework_coverage_fixture["coverage_rate_all_cis_joint_mul_obj"] >= 0.9)
+        & (test_dml_framework_coverage_fixture["coverage_rate_all_cis_joint_mul_obj"] < 1.0)
+    )
+    assert np.all((test_dml_framework_coverage_fixture["coverage_rate_joint_concat"] >= 0.9))
+    assert np.all(
+        (test_dml_framework_coverage_fixture["coverage_rate_all_cis_joint_concat"] >= 0.9)
+        & (test_dml_framework_coverage_fixture["coverage_rate_all_cis_joint_concat"] < 1.0)
+    )
diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index b80cfac27..b7ee2b8de 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -1,8 +1,10 @@
-import pytest
-import numpy as np
 import copy
 
+import numpy as np
+import pytest
+
 from doubleml.double_ml_framework import DoubleMLFramework, concat
+
 from ._utils import generate_dml_dict
 
 n_obs = 10
@@ -14,12 +16,12 @@
 psi_b = np.random.normal(size=(n_obs, n_thetas, n_rep))
 doubleml_dict = generate_dml_dict(psi_a, psi_b)
 # add sensitivity elements
-doubleml_dict['sensitivity_elements'] = {
-    'sigma2': np.ones(shape=(1, n_thetas, n_rep)),
-    'nu2': np.ones(shape=(1, n_thetas, n_rep)),
-    'psi_sigma2': np.ones(shape=(n_obs, n_thetas, n_rep)),
-    'psi_nu2': np.ones(shape=(n_obs, n_thetas, n_rep)),
-    'riesz_rep': np.ones(shape=(n_obs, n_thetas, n_rep))
+doubleml_dict["sensitivity_elements"] = {
+    "sigma2": np.ones(shape=(1, n_thetas, n_rep)),
+    "nu2": np.ones(shape=(1, n_thetas, n_rep)),
+    "psi_sigma2": np.ones(shape=(n_obs, n_thetas, n_rep)),
+    "psi_nu2": np.ones(shape=(n_obs, n_thetas, n_rep)),
+    "riesz_rep": np.ones(shape=(n_obs, n_thetas, n_rep)),
 }
 
 # combine objects and estimate parameters
@@ -36,37 +38,37 @@ def test_input_exceptions():
     msg = r"The shape of thetas does not match the expected shape \(2,\)\."
     with pytest.raises(ValueError, match=msg):
         test_dict = doubleml_dict.copy()
-        test_dict['thetas'] = np.ones(shape=(1,))
+        test_dict["thetas"] = np.ones(shape=(1,))
         DoubleMLFramework(test_dict)
 
     msg = r"The shape of ses does not match the expected shape \(2,\)\."
     with pytest.raises(ValueError, match=msg):
         test_dict = doubleml_dict.copy()
-        test_dict['ses'] = np.ones(shape=(1,))
+        test_dict["ses"] = np.ones(shape=(1,))
         DoubleMLFramework(test_dict)
 
     msg = r"The shape of all_thetas does not match the expected shape \(2, 5\)\."
     with pytest.raises(ValueError, match=msg):
         test_dict = doubleml_dict.copy()
-        test_dict['all_thetas'] = np.ones(shape=(1, 5))
+        test_dict["all_thetas"] = np.ones(shape=(1, 5))
         DoubleMLFramework(test_dict)
 
     msg = r"The shape of all_ses does not match the expected shape \(2, 5\)\."
     with pytest.raises(ValueError, match=msg):
         test_dict = doubleml_dict.copy()
-        test_dict['all_ses'] = np.ones(shape=(1, 5))
+        test_dict["all_ses"] = np.ones(shape=(1, 5))
         DoubleMLFramework(test_dict)
 
     msg = r"The shape of var_scaling_factors does not match the expected shape \(2,\)\."
     with pytest.raises(ValueError, match=msg):
         test_dict = doubleml_dict.copy()
-        test_dict['var_scaling_factors'] = np.ones(shape=(1, 5))
+        test_dict["var_scaling_factors"] = np.ones(shape=(1, 5))
         DoubleMLFramework(test_dict)
 
     msg = r"The shape of scaled_psi does not match the expected shape \(10, 2, 5\)\."
     with pytest.raises(ValueError, match=msg):
         test_dict = doubleml_dict.copy()
-        test_dict['scaled_psi'] = np.ones(shape=(10, 2, 5, 3))
+        test_dict["scaled_psi"] = np.ones(shape=(10, 2, 5, 3))
         DoubleMLFramework(test_dict)
 
     msg = "doubleml_dict must be a dictionary."
@@ -76,70 +78,72 @@ def test_input_exceptions():
     msg = "sensitivity_elements must be a dictionary."
     with pytest.raises(TypeError, match=msg):
         test_dict = doubleml_dict.copy()
-        test_dict['sensitivity_elements'] = 1
+        test_dict["sensitivity_elements"] = 1
         DoubleMLFramework(test_dict)
 
-    msg = 'The sensitivity_elements dict must contain the following keys: sigma2, nu2, psi_sigma2, psi_nu2'
+    msg = "The sensitivity_elements dict must contain the following keys: sigma2, nu2, psi_sigma2, psi_nu2"
     with pytest.raises(ValueError, match=msg):
         test_dict = doubleml_dict.copy()
-        test_dict['sensitivity_elements'] = {'sensitivities': np.ones(shape=(n_obs, n_thetas, n_rep))}
+        test_dict["sensitivity_elements"] = {"sensitivities": np.ones(shape=(n_obs, n_thetas, n_rep))}
         DoubleMLFramework(test_dict)
 
-    msg = r'The shape of sigma2 does not match the expected shape \(1, 2, 5\)\.'
+    msg = r"The shape of sigma2 does not match the expected shape \(1, 2, 5\)\."
     with pytest.raises(ValueError, match=msg):
         test_dict = copy.deepcopy(doubleml_dict)
-        test_dict['sensitivity_elements']['sigma2'] = np.ones(shape=(n_obs, n_rep))
+        test_dict["sensitivity_elements"]["sigma2"] = np.ones(shape=(n_obs, n_rep))
         DoubleMLFramework(test_dict)
 
-    msg = r'The shape of nu2 does not match the expected shape \(1, 2, 5\)\.'
+    msg = r"The shape of nu2 does not match the expected shape \(1, 2, 5\)\."
     with pytest.raises(ValueError, match=msg):
         test_dict = copy.deepcopy(doubleml_dict)
-        test_dict['sensitivity_elements']['nu2'] = np.ones(shape=(n_obs, n_rep))
+        test_dict["sensitivity_elements"]["nu2"] = np.ones(shape=(n_obs, n_rep))
         DoubleMLFramework(test_dict)
 
-    msg = r'The shape of psi_sigma2 does not match the expected shape \(10, 2, 5\)\.'
+    msg = r"The shape of psi_sigma2 does not match the expected shape \(10, 2, 5\)\."
     with pytest.raises(ValueError, match=msg):
         test_dict = copy.deepcopy(doubleml_dict)
-        test_dict['sensitivity_elements']['psi_sigma2'] = np.ones(shape=(n_obs, n_thetas, n_rep, 3))
+        test_dict["sensitivity_elements"]["psi_sigma2"] = np.ones(shape=(n_obs, n_thetas, n_rep, 3))
         DoubleMLFramework(test_dict)
 
-    msg = r'The shape of psi_nu2 does not match the expected shape \(10, 2, 5\)\.'
+    msg = r"The shape of psi_nu2 does not match the expected shape \(10, 2, 5\)\."
     with pytest.raises(ValueError, match=msg):
         test_dict = copy.deepcopy(doubleml_dict)
-        test_dict['sensitivity_elements']['psi_nu2'] = np.ones(shape=(n_obs, n_thetas, n_rep, 3))
+        test_dict["sensitivity_elements"]["psi_nu2"] = np.ones(shape=(n_obs, n_thetas, n_rep, 3))
         DoubleMLFramework(test_dict)
 
-    msg = r'The shape of riesz_rep does not match the expected shape \(10, 2, 5\)\.'
+    msg = r"The shape of riesz_rep does not match the expected shape \(10, 2, 5\)\."
     with pytest.raises(ValueError, match=msg):
         test_dict = copy.deepcopy(doubleml_dict)
-        test_dict['sensitivity_elements']['riesz_rep'] = np.ones(shape=(n_obs, n_thetas, n_rep, 3))
+        test_dict["sensitivity_elements"]["riesz_rep"] = np.ones(shape=(n_obs, n_thetas, n_rep, 3))
         DoubleMLFramework(test_dict)
 
     msg = "is_cluster_data has to be boolean. 1.0 of type <class 'float'> was passed."
     with pytest.raises(TypeError, match=msg):
         test_dict = copy.deepcopy(doubleml_dict)
-        test_dict['is_cluster_data'] = 1.0
+        test_dict["is_cluster_data"] = 1.0
         DoubleMLFramework(test_dict)
 
     msg = "If is_cluster_data is True, cluster_dict must be provided."
     with pytest.raises(ValueError, match=msg):
         test_dict = copy.deepcopy(doubleml_dict)
-        test_dict['is_cluster_data'] = True
+        test_dict["is_cluster_data"] = True
         DoubleMLFramework(test_dict)
 
     msg = "cluster_dict must be a dictionary."
     with pytest.raises(TypeError, match=msg):
         test_dict = copy.deepcopy(doubleml_dict)
-        test_dict['is_cluster_data'] = True
-        test_dict['cluster_dict'] = 1.0
+        test_dict["is_cluster_data"] = True
+        test_dict["cluster_dict"] = 1.0
         DoubleMLFramework(test_dict)
 
-    msg = ('The cluster_dict must contain the following keys: smpls, smpls_cluster,'
-           ' cluster_vars, n_folds_per_cluster. Got: cluster_ids.')
+    msg = (
+        "The cluster_dict must contain the following keys: smpls, smpls_cluster,"
+        " cluster_vars, n_folds_per_cluster. Got: cluster_ids."
+    )
     with pytest.raises(ValueError, match=msg):
         test_dict = copy.deepcopy(doubleml_dict)
-        test_dict['is_cluster_data'] = True
-        test_dict['cluster_dict'] = {'cluster_ids': np.ones(shape=(n_obs, n_rep))}
+        test_dict["is_cluster_data"] = True
+        test_dict["cluster_dict"] = {"cluster_ids": np.ones(shape=(n_obs, n_rep))}
         DoubleMLFramework(test_dict)
 
     test_dict = copy.deepcopy(doubleml_dict)
@@ -148,7 +152,7 @@ def test_input_exceptions():
     msg = "treatment_names must be a list. Got 1 of type <class 'int'>."
     with pytest.raises(TypeError, match=msg):
         test_dict = copy.deepcopy(doubleml_dict)
-        test_dict['treatment_names'] = 1
+        test_dict["treatment_names"] = 1
         DoubleMLFramework(test_dict)
     with pytest.raises(TypeError, match=msg):
         framework_names.treatment_names = 1
@@ -156,18 +160,18 @@ def test_input_exceptions():
     msg = r"treatment_names must be a list of strings. At least one element is not a string: \['test', 1\]."
     with pytest.raises(TypeError, match=msg):
         test_dict = copy.deepcopy(doubleml_dict)
-        test_dict['treatment_names'] = ['test', 1]
+        test_dict["treatment_names"] = ["test", 1]
         DoubleMLFramework(test_dict)
     with pytest.raises(TypeError, match=msg):
-        framework_names.treatment_names = ['test', 1]
+        framework_names.treatment_names = ["test", 1]
 
     msg = "The length of treatment_names does not match the number of treatments. Got 2 treatments and 3 treatment names."
     with pytest.raises(ValueError, match=msg):
         test_dict = copy.deepcopy(doubleml_dict)
-        test_dict['treatment_names'] = ['test', 'test2', 'test3']
+        test_dict["treatment_names"] = ["test", "test2", "test3"]
         DoubleMLFramework(test_dict)
     with pytest.raises(ValueError, match=msg):
-        framework_names.treatment_names = ['test', 'test2', 'test3']
+        framework_names.treatment_names = ["test", "test2", "test3"]
 
 
 def test_operation_exceptions():
@@ -177,21 +181,21 @@ def test_operation_exceptions():
         _ = dml_framework_obj_1 + 1.0
     with pytest.raises(TypeError, match=msg):
         _ = 1.0 + dml_framework_obj_1
-    msg = 'The number of observations in DoubleMLFrameworks must be the same. Got 10 and 11.'
+    msg = "The number of observations in DoubleMLFrameworks must be the same. Got 10 and 11."
     with pytest.raises(ValueError, match=msg):
         psi_a_2 = np.ones(shape=(n_obs + 1, n_thetas, n_rep))
         psi_b_2 = np.random.normal(size=(n_obs + 1, n_thetas, n_rep))
         doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2)
         dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2)
         _ = dml_framework_obj_1 + dml_framework_obj_2
-    msg = 'The number of parameters theta in DoubleMLFrameworks must be the same. Got 2 and 3.'
+    msg = "The number of parameters theta in DoubleMLFrameworks must be the same. Got 2 and 3."
     with pytest.raises(ValueError, match=msg):
         psi_a_2 = np.ones(shape=(n_obs, n_thetas + 1, n_rep))
         psi_b_2 = np.random.normal(size=(n_obs, n_thetas + 1, n_rep))
         doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2)
         dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2)
         _ = dml_framework_obj_1 + dml_framework_obj_2
-    msg = 'The number of replications in DoubleMLFrameworks must be the same. Got 5 and 6.'
+    msg = "The number of replications in DoubleMLFrameworks must be the same. Got 5 and 6."
     with pytest.raises(ValueError, match=msg):
         psi_a_2 = np.ones(shape=(n_obs, n_thetas, n_rep + 1))
         psi_b_2 = np.random.normal(size=(n_obs, n_thetas, n_rep + 1))
@@ -205,21 +209,21 @@ def test_operation_exceptions():
         _ = dml_framework_obj_1 - 1.0
     with pytest.raises(TypeError, match=msg):
         _ = 1.0 - dml_framework_obj_1
-    msg = 'The number of observations in DoubleMLFrameworks must be the same. Got 10 and 11.'
+    msg = "The number of observations in DoubleMLFrameworks must be the same. Got 10 and 11."
     with pytest.raises(ValueError, match=msg):
         psi_a_2 = np.ones(shape=(n_obs + 1, n_thetas, n_rep))
         psi_b_2 = np.random.normal(size=(n_obs + 1, n_thetas, n_rep))
         doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2)
         dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2)
         _ = dml_framework_obj_1 - dml_framework_obj_2
-    msg = 'The number of parameters theta in DoubleMLFrameworks must be the same. Got 2 and 3.'
+    msg = "The number of parameters theta in DoubleMLFrameworks must be the same. Got 2 and 3."
     with pytest.raises(ValueError, match=msg):
         psi_a_2 = np.ones(shape=(n_obs, n_thetas + 1, n_rep))
         psi_b_2 = np.random.normal(size=(n_obs, n_thetas + 1, n_rep))
         doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2)
         dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2)
         _ = dml_framework_obj_1 - dml_framework_obj_2
-    msg = 'The number of replications in DoubleMLFrameworks must be the same. Got 5 and 6.'
+    msg = "The number of replications in DoubleMLFrameworks must be the same. Got 5 and 6."
     with pytest.raises(ValueError, match=msg):
         psi_a_2 = np.ones(shape=(n_obs, n_thetas, n_rep + 1))
         psi_b_2 = np.random.normal(size=(n_obs, n_thetas, n_rep + 1))
@@ -235,20 +239,20 @@ def test_operation_exceptions():
         _ = {} * dml_framework_obj_1
 
     # concatenation
-    msg = 'Need at least one object to concatenate.'
+    msg = "Need at least one object to concatenate."
     with pytest.raises(TypeError, match=msg):
         concat([])
-    msg = 'All objects must be of type DoubleMLFramework.'
+    msg = "All objects must be of type DoubleMLFramework."
     with pytest.raises(TypeError, match=msg):
         concat([dml_framework_obj_1, 1.0])
-    msg = 'The number of observations in DoubleMLFrameworks must be the same. Got 10 and 11.'
+    msg = "The number of observations in DoubleMLFrameworks must be the same. Got 10 and 11."
     with pytest.raises(ValueError, match=msg):
         psi_a_2 = np.ones(shape=(n_obs + 1, n_thetas, n_rep))
         psi_b_2 = np.random.normal(size=(n_obs + 1, n_thetas, n_rep))
         doubleml_dict_2 = generate_dml_dict(psi_a_2, psi_b_2)
         dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2)
         _ = concat([dml_framework_obj_1, dml_framework_obj_2])
-    msg = 'The number of replications in DoubleMLFrameworks must be the same. Got 5 and 6.'
+    msg = "The number of replications in DoubleMLFrameworks must be the same. Got 5 and 6."
     with pytest.raises(ValueError, match=msg):
         psi_a_2 = np.ones(shape=(n_obs, n_thetas, n_rep + 1))
         psi_b_2 = np.random.normal(size=(n_obs, n_thetas, n_rep + 1))
@@ -256,15 +260,15 @@ def test_operation_exceptions():
         dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2)
         _ = concat([dml_framework_obj_1, dml_framework_obj_2])
 
-    msg = 'concat not yet implemented with clustering.'
+    msg = "concat not yet implemented with clustering."
     with pytest.raises(NotImplementedError, match=msg):
         doubleml_dict_cluster = generate_dml_dict(psi_a_2, psi_b_2)
-        doubleml_dict_cluster['is_cluster_data'] = True
-        doubleml_dict_cluster['cluster_dict'] = {
-            'smpls': np.ones(shape=(n_obs, n_rep)),
-            'smpls_cluster': np.ones(shape=(n_obs, n_rep)),
-            'cluster_vars': np.ones(shape=(n_obs, n_rep)),
-            'n_folds_per_cluster': 2
+        doubleml_dict_cluster["is_cluster_data"] = True
+        doubleml_dict_cluster["cluster_dict"] = {
+            "smpls": np.ones(shape=(n_obs, n_rep)),
+            "smpls_cluster": np.ones(shape=(n_obs, n_rep)),
+            "cluster_vars": np.ones(shape=(n_obs, n_rep)),
+            "n_folds_per_cluster": 2,
         }
         dml_framework_obj_cluster = DoubleMLFramework(doubleml_dict_cluster)
         _ = concat([dml_framework_obj_cluster, dml_framework_obj_cluster])
@@ -283,13 +287,13 @@ def test_p_adjust_exceptions():
 
     msg = r'Apply bootstrap\(\) before p_adjust\("rw"\)\.'
     with pytest.raises(ValueError, match=msg):
-        _ = dml_framework_obj_1.p_adjust(method='rw')
+        _ = dml_framework_obj_1.p_adjust(method="rw")
 
 
 @pytest.mark.ci
 def test_sensitivity_exceptions():
     dml_framework_no_sensitivity = DoubleMLFramework(generate_dml_dict(psi_a, psi_b))
-    msg = 'Sensitivity analysis is not implemented for this model.'
+    msg = "Sensitivity analysis is not implemented for this model."
     with pytest.raises(NotImplementedError, match=msg):
         _ = dml_framework_no_sensitivity._calc_sensitivity_analysis(cf_y=0.1, cf_d=0.1, rho=1.0, level=0.95)
 
@@ -300,7 +304,7 @@ def test_sensitivity_exceptions():
     with pytest.raises(TypeError, match=msg):
         _ = dml_framework_obj_1._calc_sensitivity_analysis(cf_y=1, cf_d=0.03, rho=1.0, level=0.95)
 
-    msg = r'cf_y must be in \[0,1\). 1.0 was passed.'
+    msg = r"cf_y must be in \[0,1\). 1.0 was passed."
     with pytest.raises(ValueError, match=msg):
         _ = dml_framework_obj_1.sensitivity_analysis(cf_y=1.0)
     with pytest.raises(ValueError, match=msg):
@@ -313,7 +317,7 @@ def test_sensitivity_exceptions():
     with pytest.raises(TypeError, match=msg):
         _ = dml_framework_obj_1._calc_sensitivity_analysis(cf_y=0.1, cf_d=1, rho=1.0, level=0.95)
 
-    msg = r'cf_d must be in \[0,1\). 1.0 was passed.'
+    msg = r"cf_d must be in \[0,1\). 1.0 was passed."
     with pytest.raises(ValueError, match=msg):
         _ = dml_framework_obj_1.sensitivity_analysis(cf_y=0.1, cf_d=1.0)
     with pytest.raises(ValueError, match=msg):
@@ -336,7 +340,7 @@ def test_sensitivity_exceptions():
     with pytest.raises(TypeError, match=msg):
         _ = dml_framework_obj_1._calc_robustness_value(rho="1", null_hypothesis=0.0, level=0.95, idx_treatment=0)
 
-    msg = r'The absolute value of rho must be in \[0,1\]. 1.1 was passed.'
+    msg = r"The absolute value of rho must be in \[0,1\]. 1.1 was passed."
     with pytest.raises(ValueError, match=msg):
         _ = dml_framework_obj_1.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.1)
     with pytest.raises(ValueError, match=msg):
@@ -353,7 +357,7 @@ def test_sensitivity_exceptions():
     with pytest.raises(TypeError, match=msg):
         _ = dml_framework_obj_1._calc_robustness_value(rho=1.0, level=1, null_hypothesis=0.0, idx_treatment=0)
 
-    msg = r'The confidence level must be in \(0,1\). 1.0 was passed.'
+    msg = r"The confidence level must be in \(0,1\). 1.0 was passed."
     with pytest.raises(ValueError, match=msg):
         _ = dml_framework_obj_1.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.0, level=1.0)
     with pytest.raises(ValueError, match=msg):
@@ -361,7 +365,7 @@ def test_sensitivity_exceptions():
     with pytest.raises(ValueError, match=msg):
         _ = dml_framework_obj_1._calc_robustness_value(rho=1.0, level=1.0, null_hypothesis=0.0, idx_treatment=0)
 
-    msg = r'The confidence level must be in \(0,1\). 0.0 was passed.'
+    msg = r"The confidence level must be in \(0,1\). 0.0 was passed."
     with pytest.raises(ValueError, match=msg):
         _ = dml_framework_obj_1.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.0, level=0.0)
     with pytest.raises(ValueError, match=msg):
@@ -384,12 +388,12 @@ def test_sensitivity_exceptions():
         _ = dml_framework_obj_1._calc_robustness_value(null_hypothesis=np.array([1]), level=0.95, rho=1.0, idx_treatment=0)
 
     sensitivity_dict = generate_dml_dict(psi_a, psi_b)
-    sensitivity_dict['sensitivity_elements'] = {
-        'sigma2': np.ones(shape=(1, n_thetas, n_rep)),
-        'nu2': -1.0 * np.ones(shape=(1, n_thetas, n_rep)),
-        'psi_sigma2': np.ones(shape=(n_obs, n_thetas, n_rep)),
-        'psi_nu2': np.ones(shape=(n_obs, n_thetas, n_rep)),
-        'riesz_rep': np.ones(shape=(n_obs, n_thetas, n_rep))
+    sensitivity_dict["sensitivity_elements"] = {
+        "sigma2": np.ones(shape=(1, n_thetas, n_rep)),
+        "nu2": -1.0 * np.ones(shape=(1, n_thetas, n_rep)),
+        "psi_sigma2": np.ones(shape=(n_obs, n_thetas, n_rep)),
+        "psi_nu2": np.ones(shape=(n_obs, n_thetas, n_rep)),
+        "riesz_rep": np.ones(shape=(n_obs, n_thetas, n_rep)),
     }
     dml_framework_sensitivity = DoubleMLFramework(sensitivity_dict)
 
@@ -409,10 +413,10 @@ def test_sensitivity_exceptions():
 
     # test variances
     msg = (
-        r'sensitivity_elements sigma2 and nu2 have to be positive\. '
-        r'Got sigma2 \[\[\[1\. 1\. 1\. 1\. 1\.\]\n\s+\[1\. 1\. 1\. 1\. 1\.\]\]\] '
-        r'and nu2 \[\[\[-1\. -1\. -1\. -1\. -1\.\]\n\s+\[-1\. -1\. -1\. -1\. -1\.\]\]\]\. '
-        r'Most likely this is due to low quality learners \(especially propensity scores\)\.'
+        r"sensitivity_elements sigma2 and nu2 have to be positive\. "
+        r"Got sigma2 \[\[\[1\. 1\. 1\. 1\. 1\.\]\n\s+\[1\. 1\. 1\. 1\. 1\.\]\]\] "
+        r"and nu2 \[\[\[-1\. -1\. -1\. -1\. -1\.\]\n\s+\[-1\. -1\. -1\. -1\. -1\.\]\]\]\. "
+        r"Most likely this is due to low quality learners \(especially propensity scores\)\."
     )
     with pytest.raises(ValueError, match=msg):
         _ = dml_framework_sensitivity._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95)
@@ -424,7 +428,7 @@ def test_sensitivity_exceptions():
 def test_framework_sensitivity_plot_input():
     dml_framework_obj_plot = DoubleMLFramework(doubleml_dict)
 
-    msg = (r'Apply sensitivity_analysis\(\) to include senario in sensitivity_plot. ')
+    msg = r"Apply sensitivity_analysis\(\) to include senario in sensitivity_plot. "
     with pytest.raises(ValueError, match=msg):
         _ = dml_framework_obj_plot.sensitivity_plot()
 
@@ -442,30 +446,30 @@ def test_framework_sensitivity_plot_input():
         _ = dml_framework_obj_plot.sensitivity_plot(benchmarks="True")
     msg = r"benchmarks has to be a dictionary with keys cf_y, cf_d and name. Got dict_keys\(\['cf_y', 'cf_d'\]\)."
     with pytest.raises(ValueError, match=msg):
-        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={'cf_y': 0.1, 'cf_d': 0.15})
+        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={"cf_y": 0.1, "cf_d": 0.15})
     msg = r"benchmarks has to be a dictionary with values of same length. Got \[1, 2, 2\]."
     with pytest.raises(ValueError, match=msg):
-        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={'cf_y': [0.1], 'cf_d': [0.15, 0.2],
-                                                                'name': ['test', 'test2']})
+        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={"cf_y": [0.1], "cf_d": [0.15, 0.2], "name": ["test", "test2"]})
     msg = "benchmarks cf_y must be of float type. 2 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
-        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={'cf_y': [0.1, 2], 'cf_d': [0.15, 0.2],
-                                                                'name': ['test', 'test2']})
-    msg = r'benchmarks cf_y must be in \[0,1\). 1.0 was passed.'
+        _ = dml_framework_obj_plot.sensitivity_plot(
+            benchmarks={"cf_y": [0.1, 2], "cf_d": [0.15, 0.2], "name": ["test", "test2"]}
+        )
+    msg = r"benchmarks cf_y must be in \[0,1\). 1.0 was passed."
     with pytest.raises(ValueError, match=msg):
-        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={'cf_y': [0.1, 1.0], 'cf_d': [0.15, 0.2],
-                                                                'name': ['test', 'test2']})
+        _ = dml_framework_obj_plot.sensitivity_plot(
+            benchmarks={"cf_y": [0.1, 1.0], "cf_d": [0.15, 0.2], "name": ["test", "test2"]}
+        )
     msg = "benchmarks name must be of string type. 2 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
-        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={'cf_y': [0.1, 0.2], 'cf_d': [0.15, 0.2],
-                                                                'name': [2, 2]})
+        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={"cf_y": [0.1, 0.2], "cf_d": [0.15, 0.2], "name": [2, 2]})
 
     msg = "value must be a string. 2 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = dml_framework_obj_plot.sensitivity_plot(value=2)
     msg = "Invalid value test. Valid values theta or ci."
     with pytest.raises(ValueError, match=msg):
-        _ = dml_framework_obj_plot.sensitivity_plot(value='test')
+        _ = dml_framework_obj_plot.sensitivity_plot(value="test")
 
     msg = "fill has to be boolean. True of type <class 'str'> was passed."
     with pytest.raises(TypeError, match=msg):
@@ -483,12 +487,12 @@ def test_framework_sensitivity_plot_input():
         _ = dml_framework_obj_plot.sensitivity_plot(grid_bounds=(0.15, 1))
     with pytest.raises(TypeError, match=msg):
         _ = dml_framework_obj_plot.sensitivity_plot(grid_bounds=(1, 0.15))
-    msg = r'grid_bounds must be in \(0,1\). 1.0 was passed.'
+    msg = r"grid_bounds must be in \(0,1\). 1.0 was passed."
     with pytest.raises(ValueError, match=msg):
         _ = dml_framework_obj_plot.sensitivity_plot(grid_bounds=(1.0, 0.15))
     with pytest.raises(ValueError, match=msg):
         _ = dml_framework_obj_plot.sensitivity_plot(grid_bounds=(0.15, 1.0))
-    msg = r'grid_bounds must be in \(0,1\). 0.0 was passed.'
+    msg = r"grid_bounds must be in \(0,1\). 0.0 was passed."
     with pytest.raises(ValueError, match=msg):
         _ = dml_framework_obj_plot.sensitivity_plot(grid_bounds=(0.0, 0.15))
     with pytest.raises(ValueError, match=msg):
diff --git a/doubleml/tests/test_framework_pval_corrections.py b/doubleml/tests/test_framework_pval_corrections.py
index fe311f3c2..b69db44fe 100644
--- a/doubleml/tests/test_framework_pval_corrections.py
+++ b/doubleml/tests/test_framework_pval_corrections.py
@@ -1,30 +1,27 @@
-import pytest
-
 import numpy as np
+import pytest
 
 from doubleml.double_ml_framework import DoubleMLFramework
+
 from ._utils import generate_dml_dict
 
 
-@pytest.fixture(scope='module',
-                params=[1, 3])
+@pytest.fixture(scope="module", params=[1, 3])
 def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 5])
+@pytest.fixture(scope="module", params=[1, 5])
 def n_thetas(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.05, 0.1, 0.2])
+@pytest.fixture(scope="module", params=[0.05, 0.1, 0.2])
 def sig_level(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_framework_tstat_pval_fixture(n_rep, n_thetas):
     n_obs = 100
 
@@ -35,7 +32,7 @@ def dml_framework_tstat_pval_fixture(n_rep, n_thetas):
     dml_framework_obj = DoubleMLFramework(doubleml_dict)
 
     result_dict = {
-        'dml_framework_obj': dml_framework_obj,
+        "dml_framework_obj": dml_framework_obj,
     }
 
     return result_dict
@@ -43,10 +40,10 @@ def dml_framework_tstat_pval_fixture(n_rep, n_thetas):
 
 @pytest.mark.ci
 def test_dml_framework_tstat_shape(dml_framework_tstat_pval_fixture):
-    dml_framework_obj = dml_framework_tstat_pval_fixture['dml_framework_obj']
+    dml_framework_obj = dml_framework_tstat_pval_fixture["dml_framework_obj"]
 
     t_stats = dml_framework_obj.t_stats
-    assert dml_framework_obj.t_stats.shape == (dml_framework_obj.n_thetas, )
+    assert dml_framework_obj.t_stats.shape == (dml_framework_obj.n_thetas,)
     assert np.all(np.isfinite(t_stats))
 
     all_t_stats = dml_framework_obj.all_t_stats
@@ -56,10 +53,10 @@ def test_dml_framework_tstat_shape(dml_framework_tstat_pval_fixture):
 
 @pytest.mark.ci
 def test_dml_framework_pval_shape(dml_framework_tstat_pval_fixture):
-    dml_framework_obj = dml_framework_tstat_pval_fixture['dml_framework_obj']
+    dml_framework_obj = dml_framework_tstat_pval_fixture["dml_framework_obj"]
 
     p_vals = dml_framework_obj.pvals
-    assert p_vals.shape == (dml_framework_obj.n_thetas, )
+    assert p_vals.shape == (dml_framework_obj.n_thetas,)
     assert np.all(np.isfinite(p_vals))
 
     all_p_vals = dml_framework_obj.all_pvals
@@ -67,7 +64,7 @@ def test_dml_framework_pval_shape(dml_framework_tstat_pval_fixture):
     assert np.all(np.isfinite(all_p_vals))
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_framework_pval_cov_fixture(n_rep, sig_level):
     np.random.seed(42)
     n_thetas = 10
@@ -95,25 +92,25 @@ def dml_framework_pval_cov_fixture(n_rep, sig_level):
 
         # p_value corrections
         # bonferroni
-        p_vals_bonf, _ = dml_framework_obj.p_adjust(method='bonferroni')
-        type1_error_bonf[i] = any(p_vals_bonf['pval'] < sig_level)
+        p_vals_bonf, _ = dml_framework_obj.p_adjust(method="bonferroni")
+        type1_error_bonf[i] = any(p_vals_bonf["pval"] < sig_level)
 
         # holm
-        p_vals_holm, _ = dml_framework_obj.p_adjust(method='holm')
-        type1_error_holm[i] = any(p_vals_holm['pval'] < sig_level)
+        p_vals_holm, _ = dml_framework_obj.p_adjust(method="holm")
+        type1_error_holm[i] = any(p_vals_holm["pval"] < sig_level)
 
         # romano-wolf
         dml_framework_obj.bootstrap(n_rep_boot=1000)
-        p_vals_rw, _ = dml_framework_obj.p_adjust(method='romano-wolf')
-        type1_error_rw[i] = any(p_vals_rw['pval'] < sig_level)
+        p_vals_rw, _ = dml_framework_obj.p_adjust(method="romano-wolf")
+        type1_error_rw[i] = any(p_vals_rw["pval"] < sig_level)
 
     result_dict = {
-        'sig_level': sig_level,
-        'avg_type1_error_single_estimate': np.mean(avg_type1_error_single_estimate),
-        'avg_type1_error_all_single_estimate': np.mean(avg_type1_error_all_single_estimate),
-        'FWER_bonf': np.mean(type1_error_bonf),
-        'FWER_holm': np.mean(type1_error_holm),
-        'FWER_rw': np.mean(type1_error_rw),
+        "sig_level": sig_level,
+        "avg_type1_error_single_estimate": np.mean(avg_type1_error_single_estimate),
+        "avg_type1_error_all_single_estimate": np.mean(avg_type1_error_all_single_estimate),
+        "FWER_bonf": np.mean(type1_error_bonf),
+        "FWER_holm": np.mean(type1_error_holm),
+        "FWER_rw": np.mean(type1_error_rw),
     }
 
     return result_dict
@@ -121,22 +118,23 @@ def dml_framework_pval_cov_fixture(n_rep, sig_level):
 
 @pytest.mark.ci
 def test_dml_framework_pval_FWER(dml_framework_pval_cov_fixture):
-    sig_level = dml_framework_pval_cov_fixture['sig_level']
-    avg_type1_error_single_estimate = dml_framework_pval_cov_fixture['avg_type1_error_single_estimate']
-    avg_type1_error_all_single_estimate = dml_framework_pval_cov_fixture['avg_type1_error_all_single_estimate']
+    sig_level = dml_framework_pval_cov_fixture["sig_level"]
+    avg_type1_error_single_estimate = dml_framework_pval_cov_fixture["avg_type1_error_single_estimate"]
+    avg_type1_error_all_single_estimate = dml_framework_pval_cov_fixture["avg_type1_error_all_single_estimate"]
 
     tolerance = 0.02
     # only one-sided since median aggregation over independent data
     assert avg_type1_error_single_estimate <= sig_level + tolerance
-    assert (sig_level - tolerance <= avg_type1_error_all_single_estimate) & \
-        (avg_type1_error_all_single_estimate <= sig_level + tolerance)
+    assert (sig_level - tolerance <= avg_type1_error_all_single_estimate) & (
+        avg_type1_error_all_single_estimate <= sig_level + tolerance
+    )
 
     # test FWER control
-    FWER_bonf = dml_framework_pval_cov_fixture['FWER_bonf']
+    FWER_bonf = dml_framework_pval_cov_fixture["FWER_bonf"]
     assert FWER_bonf <= sig_level + tolerance
 
-    FWER_holm = dml_framework_pval_cov_fixture['FWER_holm']
+    FWER_holm = dml_framework_pval_cov_fixture["FWER_holm"]
     assert FWER_holm <= sig_level + tolerance
 
-    FWER_rw = dml_framework_pval_cov_fixture['FWER_rw']
+    FWER_rw = dml_framework_pval_cov_fixture["FWER_rw"]
     assert FWER_rw <= sig_level + tolerance
diff --git a/doubleml/tests/test_framework_sensitivity.py b/doubleml/tests/test_framework_sensitivity.py
index 044d89d22..209b72c14 100644
--- a/doubleml/tests/test_framework_sensitivity.py
+++ b/doubleml/tests/test_framework_sensitivity.py
@@ -1,18 +1,16 @@
 import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
 
-from doubleml.irm.irm import DoubleMLIRM
 from doubleml.double_ml_framework import concat
-
-from sklearn.linear_model import LinearRegression, LogisticRegression
+from doubleml.irm.irm import DoubleMLIRM
 
 
-@pytest.fixture(scope='module',
-                params=[1, 3])
+@pytest.fixture(scope="module", params=[1, 3])
 def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_framework_sensitivity_fixture(n_rep, generate_data_simple):
     dml_data, dml_data_2 = generate_data_simple
 
@@ -40,67 +38,69 @@ def dml_framework_sensitivity_fixture(n_rep, generate_data_simple):
     dml_framework_obj_concat = concat([dml_framework_obj, dml_framework_obj])
 
     result_dict = {
-        'dml_obj': dml_irm_obj,
-        'dml_obj_2': dml_irm_obj_2,
-        'dml_framework_obj': dml_framework_obj,
-        'dml_framework_obj_2': dml_framework_obj_2,
-        'dml_framework_obj_add_obj': dml_framework_obj_add_obj,
-        'dml_framework_obj_sub_obj': dml_framework_obj_sub_obj,
-        'dml_framework_obj_mul_obj': dml_framework_obj_mul_obj,
-        'dml_framework_obj_concat': dml_framework_obj_concat,
-        'n_rep': n_rep,
+        "dml_obj": dml_irm_obj,
+        "dml_obj_2": dml_irm_obj_2,
+        "dml_framework_obj": dml_framework_obj,
+        "dml_framework_obj_2": dml_framework_obj_2,
+        "dml_framework_obj_add_obj": dml_framework_obj_add_obj,
+        "dml_framework_obj_sub_obj": dml_framework_obj_sub_obj,
+        "dml_framework_obj_mul_obj": dml_framework_obj_mul_obj,
+        "dml_framework_obj_concat": dml_framework_obj_concat,
+        "n_rep": n_rep,
     }
     return result_dict
 
 
 @pytest.mark.ci
 def test_dml_framework_sensitivity_shapes(dml_framework_sensitivity_fixture):
-    n_rep = dml_framework_sensitivity_fixture['dml_framework_obj'].n_rep
-    n_thetas = dml_framework_sensitivity_fixture['dml_framework_obj'].n_thetas
-    n_obs = dml_framework_sensitivity_fixture['dml_framework_obj'].n_obs
-
-    object_list = ['dml_framework_obj',
-                   'dml_framework_obj_2',
-                   'dml_framework_obj_add_obj',
-                   'dml_framework_obj_sub_obj',
-                   'dml_framework_obj_mul_obj']
-    var_keys = ['sigma2', 'nu2']
-    score_keys = ['psi_sigma2', 'psi_nu2', 'riesz_rep']
+    n_rep = dml_framework_sensitivity_fixture["dml_framework_obj"].n_rep
+    n_thetas = dml_framework_sensitivity_fixture["dml_framework_obj"].n_thetas
+    n_obs = dml_framework_sensitivity_fixture["dml_framework_obj"].n_obs
+
+    object_list = [
+        "dml_framework_obj",
+        "dml_framework_obj_2",
+        "dml_framework_obj_add_obj",
+        "dml_framework_obj_sub_obj",
+        "dml_framework_obj_mul_obj",
+    ]
+    var_keys = ["sigma2", "nu2"]
+    score_keys = ["psi_sigma2", "psi_nu2", "riesz_rep"]
 
     for obj in object_list:
         assert dml_framework_sensitivity_fixture[obj]._sensitivity_implemented
         for key in var_keys:
-            assert dml_framework_sensitivity_fixture[obj]._sensitivity_elements[key].shape == \
-                (1, n_thetas, n_rep)
+            assert dml_framework_sensitivity_fixture[obj]._sensitivity_elements[key].shape == (1, n_thetas, n_rep)
         for key in score_keys:
-            assert dml_framework_sensitivity_fixture[obj]._sensitivity_elements[key].shape == \
-                (n_obs, n_thetas, n_rep)
+            assert dml_framework_sensitivity_fixture[obj]._sensitivity_elements[key].shape == (n_obs, n_thetas, n_rep)
 
     # separate test for concat
     for key in var_keys:
-        assert dml_framework_sensitivity_fixture['dml_framework_obj_concat']._sensitivity_elements[key].shape == \
-            (1, 2, n_rep)
+        assert dml_framework_sensitivity_fixture["dml_framework_obj_concat"]._sensitivity_elements[key].shape == (1, 2, n_rep)
     for key in score_keys:
-        assert dml_framework_sensitivity_fixture['dml_framework_obj_concat']._sensitivity_elements[key].shape == \
-            (n_obs, 2, n_rep)
+        assert dml_framework_sensitivity_fixture["dml_framework_obj_concat"]._sensitivity_elements[key].shape == (
+            n_obs,
+            2,
+            n_rep,
+        )
 
 
 @pytest.mark.ci
 def test_dml_framework_sensitivity_summary(dml_framework_sensitivity_fixture):
     # summary without sensitivity analysis
-    sensitivity_summary = dml_framework_sensitivity_fixture['dml_framework_obj_2'].sensitivity_summary
-    substring = 'Apply sensitivity_analysis() to generate sensitivity_summary.'
+    sensitivity_summary = dml_framework_sensitivity_fixture["dml_framework_obj_2"].sensitivity_summary
+    substring = "Apply sensitivity_analysis() to generate sensitivity_summary."
     assert substring in sensitivity_summary
 
     # summary with sensitivity analysis
-    sensitivity_summary = dml_framework_sensitivity_fixture['dml_framework_obj'].sensitivity_summary
+    sensitivity_summary = dml_framework_sensitivity_fixture["dml_framework_obj"].sensitivity_summary
     assert isinstance(sensitivity_summary, str)
     substrings = [
-        '\n------------------ Scenario          ------------------\n',
-        '\n------------------ Bounds with CI    ------------------\n',
-        '\n------------------ Robustness Values ------------------\n',
-        'Significance Level: level=',
-        'Sensitivity parameters: cf_y='
+        "\n------------------ Scenario          ------------------\n",
+        "\n------------------ Bounds with CI    ------------------\n",
+        "\n------------------ Robustness Values ------------------\n",
+        "Significance Level: level=",
+        "Sensitivity parameters: cf_y=",
     ]
     for substring in substrings:
         assert substring in sensitivity_summary
diff --git a/doubleml/tests/test_model_defaults.py b/doubleml/tests/test_model_defaults.py
index 8d7234d62..401827b19 100644
--- a/doubleml/tests/test_model_defaults.py
+++ b/doubleml/tests/test_model_defaults.py
@@ -1,12 +1,17 @@
-import pytest
 import numpy as np
+import pytest
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import Lasso, LogisticRegression
 
 import doubleml as dml
-from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data, make_did_SZ2020, \
-    make_ssm_data
-
-from sklearn.linear_model import Lasso, LogisticRegression
-from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from doubleml.datasets import (
+    make_did_SZ2020,
+    make_iivm_data,
+    make_irm_data,
+    make_pliv_CHS2015,
+    make_plr_CCDDHNR2018,
+    make_ssm_data,
+)
 
 np.random.seed(3141)
 dml_data_plr = make_plr_CCDDHNR2018(n_obs=100)
@@ -58,14 +63,14 @@ def _assert_resampling_default_settings(dml_obj):
     assert dml_obj.models is None
 
     # bootstrap method
-    assert dml_obj.boot_method == 'normal'
+    assert dml_obj.boot_method == "normal"
     assert dml_obj.n_rep_boot == 500
 
     # confint method
     assert dml_obj.confint().equals(dml_obj.confint(joint=False, level=0.95))
 
     # p_adjust method
-    assert dml_obj.p_adjust().equals(dml_obj.p_adjust(method='romano-wolf'))
+    assert dml_obj.p_adjust().equals(dml_obj.p_adjust(method="romano-wolf"))
 
 
 @pytest.mark.ci
@@ -73,7 +78,7 @@ def test_plr_defaults():
     _assert_is_none(dml_plr)
     _fit_bootstrap(dml_plr)
     _assert_resampling_default_settings(dml_plr)
-    assert dml_plr.score == 'partialling out'
+    assert dml_plr.score == "partialling out"
 
 
 @pytest.mark.ci
@@ -81,7 +86,7 @@ def test_pliv_defaults():
     _assert_is_none(dml_pliv)
     _fit_bootstrap(dml_pliv)
     _assert_resampling_default_settings(dml_pliv)
-    assert dml_pliv.score == 'partialling out'
+    assert dml_pliv.score == "partialling out"
     assert dml_pliv.partialX
     assert not dml_pliv.partialZ
 
@@ -91,12 +96,12 @@ def test_irm_defaults():
     _assert_is_none(dml_irm)
     _fit_bootstrap(dml_irm)
     _assert_resampling_default_settings(dml_irm)
-    assert dml_irm.score == 'ATE'
-    assert dml_irm.trimming_rule == 'truncate'
+    assert dml_irm.score == "ATE"
+    assert dml_irm.trimming_rule == "truncate"
     assert dml_irm.trimming_threshold == 1e-2
     assert not dml_irm.normalize_ipw
-    assert set(dml_irm.weights.keys()) == set(['weights'])
-    assert np.array_equal(dml_irm.weights['weights'], np.ones((dml_irm._dml_data.n_obs,)))
+    assert set(dml_irm.weights.keys()) == set(["weights"])
+    assert np.array_equal(dml_irm.weights["weights"], np.ones((dml_irm._dml_data.n_obs,)))
 
 
 @pytest.mark.ci
@@ -104,9 +109,9 @@ def test_iivm_defaults():
     _assert_is_none(dml_iivm)
     _fit_bootstrap(dml_iivm)
     _assert_resampling_default_settings(dml_iivm)
-    assert dml_iivm.score == 'LATE'
-    assert dml_iivm.subgroups == {'always_takers': True, 'never_takers': True}
-    assert dml_iivm.trimming_rule == 'truncate'
+    assert dml_iivm.score == "LATE"
+    assert dml_iivm.subgroups == {"always_takers": True, "never_takers": True}
+    assert dml_iivm.trimming_rule == "truncate"
     assert dml_iivm.trimming_threshold == 1e-2
     assert not dml_iivm.normalize_ipw
 
@@ -118,8 +123,8 @@ def test_cvar_defaults():
     _assert_resampling_default_settings(dml_cvar)
     assert dml_cvar.quantile == 0.5
     assert dml_cvar.treatment == 1
-    assert dml_cvar.score == 'CVaR'
-    assert dml_cvar.trimming_rule == 'truncate'
+    assert dml_cvar.score == "CVaR"
+    assert dml_cvar.trimming_rule == "truncate"
     assert dml_cvar.trimming_threshold == 1e-2
 
 
@@ -130,8 +135,8 @@ def test_pq_defaults():
     _assert_resampling_default_settings(dml_pq)
     assert dml_pq.quantile == 0.5
     assert dml_pq.treatment == 1
-    assert dml_pq.score == 'PQ'
-    assert dml_pq.trimming_rule == 'truncate'
+    assert dml_pq.score == "PQ"
+    assert dml_pq.trimming_rule == "truncate"
     assert dml_pq.trimming_threshold == 1e-2
     assert dml_pq.normalize_ipw
 
@@ -143,8 +148,8 @@ def test_lpq_defaults():
     _assert_resampling_default_settings(dml_lpq)
     assert dml_lpq.quantile == 0.5
     assert dml_lpq.treatment == 1
-    assert dml_lpq.score == 'LPQ'
-    assert dml_lpq.trimming_rule == 'truncate'
+    assert dml_lpq.score == "LPQ"
+    assert dml_lpq.trimming_rule == "truncate"
     assert dml_lpq.trimming_threshold == 1e-2
     assert dml_lpq.normalize_ipw
 
@@ -158,8 +163,8 @@ def test_qte_defaults():
     _fit_bootstrap(dml_qte)
     # not fix since its a differen object added in future versions _assert_resampling_default_settings(dml_qte)
     assert dml_qte.quantiles == 0.5
-    assert dml_qte.score == 'PQ'
-    assert dml_qte.trimming_rule == 'truncate'
+    assert dml_qte.score == "PQ"
+    assert dml_qte.trimming_rule == "truncate"
     assert dml_qte.trimming_threshold == 1e-2
     assert dml_qte.normalize_ipw
 
@@ -169,9 +174,9 @@ def test_did_defaults():
     _assert_is_none(dml_did)
     _fit_bootstrap(dml_did)
     _assert_resampling_default_settings(dml_did)
-    assert dml_did.score == 'observational'
+    assert dml_did.score == "observational"
     assert dml_did.in_sample_normalization
-    assert dml_did.trimming_rule == 'truncate'
+    assert dml_did.trimming_rule == "truncate"
     assert dml_did.trimming_threshold == 1e-2
 
 
@@ -180,9 +185,9 @@ def test_did_cs_defaults():
     _assert_is_none(dml_did_cs)
     _fit_bootstrap(dml_did_cs)
     _assert_resampling_default_settings(dml_did_cs)
-    assert dml_did.score == 'observational'
+    assert dml_did.score == "observational"
     assert dml_did_cs.in_sample_normalization
-    assert dml_did_cs.trimming_rule == 'truncate'
+    assert dml_did_cs.trimming_rule == "truncate"
     assert dml_did_cs.trimming_threshold == 1e-2
 
 
@@ -191,8 +196,8 @@ def test_ssm_defaults():
     _assert_is_none(dml_ssm)
     _fit_bootstrap(dml_ssm)
     _assert_resampling_default_settings(dml_ssm)
-    assert dml_ssm.score == 'missing-at-random'
-    assert dml_ssm.trimming_rule == 'truncate'
+    assert dml_ssm.score == "missing-at-random"
+    assert dml_ssm.trimming_rule == "truncate"
     assert dml_ssm.trimming_threshold == 1e-2
     assert not dml_ssm.normalize_ipw
 
@@ -202,12 +207,12 @@ def test_apo_defaults():
     _assert_is_none(dml_apo)
     _fit_bootstrap(dml_apo)
     _assert_resampling_default_settings(dml_apo)
-    assert dml_apo.score == 'APO'
-    assert dml_apo.trimming_rule == 'truncate'
+    assert dml_apo.score == "APO"
+    assert dml_apo.trimming_rule == "truncate"
     assert dml_apo.trimming_threshold == 1e-2
     assert not dml_apo.normalize_ipw
-    assert set(dml_apo.weights.keys()) == set(['weights'])
-    assert np.array_equal(dml_apo.weights['weights'], np.ones((dml_apo._dml_data.n_obs,)))
+    assert set(dml_apo.weights.keys()) == set(["weights"])
+    assert np.array_equal(dml_apo.weights["weights"], np.ones((dml_apo._dml_data.n_obs,)))
 
 
 @pytest.mark.ci
@@ -217,8 +222,8 @@ def test_apos_defaults():
     assert dml_apos.framework is None
     assert dml_apos.boot_t_stat is None
     _fit_bootstrap(dml_qte)
-    assert dml_apos.score == 'APO'
-    assert dml_apos.trimming_rule == 'truncate'
+    assert dml_apos.score == "APO"
+    assert dml_apos.trimming_rule == "truncate"
     assert dml_apos.trimming_threshold == 1e-2
     assert not dml_apos.normalize_ipw
     assert np.array_equal(dml_apos.weights, np.ones((dml_apos._dml_data.n_obs,)))
@@ -226,14 +231,10 @@ def test_apos_defaults():
 
 @pytest.mark.ci
 def test_sensitivity_defaults():
-    input_dict = {'cf_y': 0.03,
-                  'cf_d': 0.03,
-                  'rho': 1.0,
-                  'level': 0.95,
-                  'null_hypothesis': np.array([0.])}
+    input_dict = {"cf_y": 0.03, "cf_d": 0.03, "rho": 1.0, "level": 0.95, "null_hypothesis": np.array([0.0])}
 
     dml_plr.sensitivity_analysis()
-    assert dml_plr.sensitivity_params['input'] == input_dict
+    assert dml_plr.sensitivity_params["input"] == input_dict
 
 
 @pytest.mark.ci
diff --git a/doubleml/tests/test_multiway_cluster.py b/doubleml/tests/test_multiway_cluster.py
index 85980f8f0..2ccbe5b2a 100644
--- a/doubleml/tests/test_multiway_cluster.py
+++ b/doubleml/tests/test_multiway_cluster.py
@@ -1,17 +1,16 @@
-import numpy as np
-import pytest
 import math
 
-from sklearn.linear_model import LinearRegression, Lasso
+import numpy as np
+import pytest
 from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import Lasso, LinearRegression
 
 import doubleml as dml
 from doubleml.datasets import make_pliv_multiway_cluster_CKMS2021
 
+from ..plm.tests._utils_pliv_manual import compute_pliv_residuals, fit_pliv
 from ._utils import _clone
-from ._utils_cluster import var_one_way_cluster, est_one_way_cluster_dml2, \
-    est_two_way_cluster_dml2, var_two_way_cluster
-from ..plm.tests._utils_pliv_manual import fit_pliv, compute_pliv_residuals
+from ._utils_cluster import est_one_way_cluster_dml2, est_two_way_cluster_dml2, var_one_way_cluster, var_two_way_cluster
 
 np.random.seed(1234)
 # Set the simulation parameters
@@ -21,30 +20,32 @@
 
 obj_dml_cluster_data = make_pliv_multiway_cluster_CKMS2021(N, M, dim_x)
 
-obj_dml_oneway_cluster_data = make_pliv_multiway_cluster_CKMS2021(N, M, dim_x,
-                                                                  omega_X=np.array([0.25, 0]),
-                                                                  omega_epsilon=np.array([0.25, 0]),
-                                                                  omega_v=np.array([0.25, 0]),
-                                                                  omega_V=np.array([0.25, 0]))
+obj_dml_oneway_cluster_data = make_pliv_multiway_cluster_CKMS2021(
+    N,
+    M,
+    dim_x,
+    omega_X=np.array([0.25, 0]),
+    omega_epsilon=np.array([0.25, 0]),
+    omega_v=np.array([0.25, 0]),
+    omega_V=np.array([0.25, 0]),
+)
 # only the first cluster variable is relevant with the weight setting above
-obj_dml_oneway_cluster_data.cluster_cols = 'cluster_var_i'
+obj_dml_oneway_cluster_data.cluster_cols = "cluster_var_i"
 
 
-@pytest.fixture(scope='module',
-                params=[RandomForestRegressor(max_depth=2, n_estimators=10),
-                        LinearRegression(),
-                        Lasso(alpha=0.1)])
+@pytest.fixture(
+    scope="module", params=[RandomForestRegressor(max_depth=2, n_estimators=10), LinearRegression(), Lasso(alpha=0.1)]
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['partialling out', 'IV-type'])
+@pytest.fixture(scope="module", params=["partialling out", "IV-type"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_pliv_multiway_cluster_fixture(generate_data_iv, learner, score):
     n_folds = 2
     n_rep = 2
@@ -53,32 +54,22 @@ def dml_pliv_multiway_cluster_fixture(generate_data_iv, learner, score):
     ml_l = _clone(learner)
     ml_m = _clone(learner)
     ml_r = _clone(learner)
-    if score == 'IV-type':
+    if score == "IV-type":
         ml_g = _clone(learner)
     else:
         ml_g = None
 
     np.random.seed(3141)
-    dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_cluster_data,
-                                    ml_l, ml_m, ml_r, ml_g,
-                                    n_folds=n_folds,
-                                    n_rep=n_rep,
-                                    score=score)
+    dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_cluster_data, ml_l, ml_m, ml_r, ml_g, n_folds=n_folds, n_rep=n_rep, score=score)
 
     np.random.seed(3141)
     dml_pliv_obj.fit()
 
     dml_pliv_obj_ext_smpls = dml.DoubleMLPLIV(
-        obj_dml_cluster_data,
-        ml_l, ml_m, ml_r, ml_g,
-        n_folds=n_folds,
-        n_rep=n_rep,
-        score=score,
-        draw_sample_splitting=False)
+        obj_dml_cluster_data, ml_l, ml_m, ml_r, ml_g, n_folds=n_folds, n_rep=n_rep, score=score, draw_sample_splitting=False
+    )
 
-    dml_pliv_obj_ext_smpls.set_sample_splitting(
-        all_smpls=dml_pliv_obj.smpls,
-        all_smpls_cluster=dml_pliv_obj.smpls_cluster)
+    dml_pliv_obj_ext_smpls.set_sample_splitting(all_smpls=dml_pliv_obj.smpls, all_smpls_cluster=dml_pliv_obj.smpls_cluster)
 
     np.random.seed(3141)
     dml_pliv_obj_ext_smpls.fit()
@@ -89,45 +80,42 @@ def dml_pliv_multiway_cluster_fixture(generate_data_iv, learner, score):
     d = obj_dml_cluster_data.d
     z = np.ravel(obj_dml_cluster_data.z)
 
-    res_manual = fit_pliv(y, x, d, z,
-                          _clone(learner), _clone(learner), _clone(learner), _clone(learner),
-                          dml_pliv_obj.smpls, score,
-                          n_rep=n_rep)
+    res_manual = fit_pliv(
+        y, x, d, z, _clone(learner), _clone(learner), _clone(learner), _clone(learner), dml_pliv_obj.smpls, score, n_rep=n_rep
+    )
     thetas = np.full(n_rep, np.nan)
     ses = np.full(n_rep, np.nan)
     for i_rep in range(n_rep):
-        l_hat = res_manual['all_l_hat'][i_rep]
-        m_hat = res_manual['all_m_hat'][i_rep]
-        r_hat = res_manual['all_r_hat'][i_rep]
-        g_hat = res_manual['all_g_hat'][i_rep]
+        l_hat = res_manual["all_l_hat"][i_rep]
+        m_hat = res_manual["all_m_hat"][i_rep]
+        r_hat = res_manual["all_r_hat"][i_rep]
+        g_hat = res_manual["all_g_hat"][i_rep]
         smpls_one_split = dml_pliv_obj.smpls[i_rep]
         y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, y_minus_g_hat = compute_pliv_residuals(
-            y, d, z, l_hat, m_hat, r_hat, g_hat, smpls_one_split)
+            y, d, z, l_hat, m_hat, r_hat, g_hat, smpls_one_split
+        )
 
-        if score == 'partialling out':
+        if score == "partialling out":
             psi_a = -np.multiply(z_minus_m_hat, d_minus_r_hat)
             psi_b = np.multiply(z_minus_m_hat, y_minus_l_hat)
-            theta = est_two_way_cluster_dml2(psi_a, psi_b,
-                                             obj_dml_cluster_data.cluster_vars[:, 0],
-                                             obj_dml_cluster_data.cluster_vars[:, 1],
-                                             smpls_one_split)
+            theta = est_two_way_cluster_dml2(
+                psi_a, psi_b, obj_dml_cluster_data.cluster_vars[:, 0], obj_dml_cluster_data.cluster_vars[:, 1], smpls_one_split
+            )
 
             psi = np.multiply(y_minus_l_hat - d_minus_r_hat * theta, z_minus_m_hat)
         else:
-            assert score == 'IV-type'
+            assert score == "IV-type"
             psi_a = -np.multiply(z_minus_m_hat, d)
             psi_b = np.multiply(z_minus_m_hat, y_minus_g_hat)
-            theta = est_two_way_cluster_dml2(psi_a, psi_b,
-                                             obj_dml_cluster_data.cluster_vars[:, 0],
-                                             obj_dml_cluster_data.cluster_vars[:, 1],
-                                             smpls_one_split)
+            theta = est_two_way_cluster_dml2(
+                psi_a, psi_b, obj_dml_cluster_data.cluster_vars[:, 0], obj_dml_cluster_data.cluster_vars[:, 1], smpls_one_split
+            )
 
             psi = np.multiply(y_minus_g_hat - d * theta, z_minus_m_hat)
 
-        var = var_two_way_cluster(psi, psi_a,
-                                  obj_dml_cluster_data.cluster_vars[:, 0],
-                                  obj_dml_cluster_data.cluster_vars[:, 1],
-                                  smpls_one_split)
+        var = var_two_way_cluster(
+            psi, psi_a, obj_dml_cluster_data.cluster_vars[:, 0], obj_dml_cluster_data.cluster_vars[:, 1], smpls_one_split
+        )
         se = np.sqrt(var)
         thetas[i_rep] = theta
         ses[i_rep] = se[0]
@@ -138,37 +126,48 @@ def dml_pliv_multiway_cluster_fixture(generate_data_iv, learner, score):
     var_scaling_factor = min(n_clusters1, n_clusters2)
     se = np.sqrt(np.median(np.power(ses, 2) * var_scaling_factor + np.power(thetas - theta, 2)) / var_scaling_factor)
 
-    res_dict = {'coef': dml_pliv_obj.coef,
-                'se': dml_pliv_obj.se,
-                'coef_manual': theta,
-                'se_manual': se,
-                'coef_ext_smpls': dml_pliv_obj_ext_smpls.coef,
-                'se_ext_smpls': dml_pliv_obj_ext_smpls.se}
+    res_dict = {
+        "coef": dml_pliv_obj.coef,
+        "se": dml_pliv_obj.se,
+        "coef_manual": theta,
+        "se_manual": se,
+        "coef_ext_smpls": dml_pliv_obj_ext_smpls.coef,
+        "se_ext_smpls": dml_pliv_obj_ext_smpls.se,
+    }
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_pliv_multiway_cluster_coef(dml_pliv_multiway_cluster_fixture):
-    assert math.isclose(dml_pliv_multiway_cluster_fixture['coef'][0],
-                        dml_pliv_multiway_cluster_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
-    assert math.isclose(dml_pliv_multiway_cluster_fixture['coef'][0],
-                        dml_pliv_multiway_cluster_fixture['coef_ext_smpls'][0],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_pliv_multiway_cluster_fixture["coef"][0],
+        dml_pliv_multiway_cluster_fixture["coef_manual"],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
+    assert math.isclose(
+        dml_pliv_multiway_cluster_fixture["coef"][0],
+        dml_pliv_multiway_cluster_fixture["coef_ext_smpls"][0],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
 
 
 @pytest.mark.ci
 def test_dml_pliv_multiway_cluster_se(dml_pliv_multiway_cluster_fixture):
-    assert math.isclose(dml_pliv_multiway_cluster_fixture['se'][0],
-                        dml_pliv_multiway_cluster_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
-    assert math.isclose(dml_pliv_multiway_cluster_fixture['se'][0],
-                        dml_pliv_multiway_cluster_fixture['se_ext_smpls'][0],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_pliv_multiway_cluster_fixture["se"][0], dml_pliv_multiway_cluster_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4
+    )
+    assert math.isclose(
+        dml_pliv_multiway_cluster_fixture["se"][0],
+        dml_pliv_multiway_cluster_fixture["se_ext_smpls"][0],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_pliv_oneway_cluster_fixture(generate_data_iv, learner, score):
     n_folds = 3
 
@@ -176,30 +175,22 @@ def dml_pliv_oneway_cluster_fixture(generate_data_iv, learner, score):
     ml_l = _clone(learner)
     ml_m = _clone(learner)
     ml_r = _clone(learner)
-    if score == 'IV-type':
+    if score == "IV-type":
         ml_g = _clone(learner)
     else:
         ml_g = None
 
     np.random.seed(3141)
-    dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_oneway_cluster_data,
-                                    ml_l, ml_m, ml_r, ml_g,
-                                    n_folds=n_folds,
-                                    score=score)
+    dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_oneway_cluster_data, ml_l, ml_m, ml_r, ml_g, n_folds=n_folds, score=score)
 
     np.random.seed(3141)
     dml_pliv_obj.fit()
 
     dml_pliv_obj_ext_smpls = dml.DoubleMLPLIV(
-        obj_dml_oneway_cluster_data,
-        ml_l, ml_m, ml_r, ml_g,
-        n_folds=n_folds,
-        score=score,
-        draw_sample_splitting=False)
+        obj_dml_oneway_cluster_data, ml_l, ml_m, ml_r, ml_g, n_folds=n_folds, score=score, draw_sample_splitting=False
+    )
 
-    dml_pliv_obj_ext_smpls.set_sample_splitting(
-        all_smpls=dml_pliv_obj.smpls,
-        all_smpls_cluster=dml_pliv_obj.smpls_cluster)
+    dml_pliv_obj_ext_smpls.set_sample_splitting(all_smpls=dml_pliv_obj.smpls, all_smpls_cluster=dml_pliv_obj.smpls_cluster)
 
     np.random.seed(3141)
     dml_pliv_obj_ext_smpls.fit()
@@ -210,68 +201,71 @@ def dml_pliv_oneway_cluster_fixture(generate_data_iv, learner, score):
     d = obj_dml_oneway_cluster_data.d
     z = np.ravel(obj_dml_oneway_cluster_data.z)
 
-    res_manual = fit_pliv(y, x, d, z,
-                          _clone(learner), _clone(learner), _clone(learner), _clone(learner),
-                          dml_pliv_obj.smpls, score)
-    l_hat = res_manual['all_l_hat'][0]
-    m_hat = res_manual['all_m_hat'][0]
-    r_hat = res_manual['all_r_hat'][0]
-    g_hat = res_manual['all_g_hat'][0]
+    res_manual = fit_pliv(
+        y, x, d, z, _clone(learner), _clone(learner), _clone(learner), _clone(learner), dml_pliv_obj.smpls, score
+    )
+    l_hat = res_manual["all_l_hat"][0]
+    m_hat = res_manual["all_m_hat"][0]
+    r_hat = res_manual["all_r_hat"][0]
+    g_hat = res_manual["all_g_hat"][0]
     smpls_one_split = dml_pliv_obj.smpls[0]
     y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, y_minus_g_hat = compute_pliv_residuals(
-        y, d, z, l_hat, m_hat, r_hat, g_hat, smpls_one_split)
+        y, d, z, l_hat, m_hat, r_hat, g_hat, smpls_one_split
+    )
 
-    if score == 'partialling out':
+    if score == "partialling out":
         psi_a = -np.multiply(z_minus_m_hat, d_minus_r_hat)
         psi_b = np.multiply(z_minus_m_hat, y_minus_l_hat)
-        theta = est_one_way_cluster_dml2(psi_a, psi_b,
-                                         obj_dml_oneway_cluster_data.cluster_vars[:, 0],
-                                         smpls_one_split)
+        theta = est_one_way_cluster_dml2(psi_a, psi_b, obj_dml_oneway_cluster_data.cluster_vars[:, 0], smpls_one_split)
 
         psi = np.multiply(y_minus_l_hat - d_minus_r_hat * theta, z_minus_m_hat)
     else:
-        assert score == 'IV-type'
+        assert score == "IV-type"
         psi_a = -np.multiply(z_minus_m_hat, d)
         psi_b = np.multiply(z_minus_m_hat, y_minus_g_hat)
-        theta = est_one_way_cluster_dml2(psi_a, psi_b,
-                                         obj_dml_oneway_cluster_data.cluster_vars[:, 0],
-                                         smpls_one_split)
+        theta = est_one_way_cluster_dml2(psi_a, psi_b, obj_dml_oneway_cluster_data.cluster_vars[:, 0], smpls_one_split)
 
         psi = np.multiply(y_minus_g_hat - d * theta, z_minus_m_hat)
 
-    var = var_one_way_cluster(psi, psi_a,
-                              obj_dml_oneway_cluster_data.cluster_vars[:, 0],
-                              smpls_one_split)
+    var = var_one_way_cluster(psi, psi_a, obj_dml_oneway_cluster_data.cluster_vars[:, 0], smpls_one_split)
     se = np.sqrt(var)
 
-    res_dict = {'coef': dml_pliv_obj.coef,
-                'se': dml_pliv_obj.se,
-                'coef_manual': theta,
-                'se_manual': se,
-                'coef_ext_smpls': dml_pliv_obj_ext_smpls.coef,
-                'se_ext_smpls': dml_pliv_obj_ext_smpls.se}
+    res_dict = {
+        "coef": dml_pliv_obj.coef,
+        "se": dml_pliv_obj.se,
+        "coef_manual": theta,
+        "se_manual": se,
+        "coef_ext_smpls": dml_pliv_obj_ext_smpls.coef,
+        "se_ext_smpls": dml_pliv_obj_ext_smpls.se,
+    }
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_pliv_oneway_cluster_coef(dml_pliv_oneway_cluster_fixture):
-    assert math.isclose(dml_pliv_oneway_cluster_fixture['coef'][0],
-                        dml_pliv_oneway_cluster_fixture['coef_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
-    assert math.isclose(dml_pliv_oneway_cluster_fixture['coef'][0],
-                        dml_pliv_oneway_cluster_fixture['coef_ext_smpls'][0],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_pliv_oneway_cluster_fixture["coef"][0], dml_pliv_oneway_cluster_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4
+    )
+    assert math.isclose(
+        dml_pliv_oneway_cluster_fixture["coef"][0],
+        dml_pliv_oneway_cluster_fixture["coef_ext_smpls"][0],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
 
 
 @pytest.mark.ci
 def test_dml_pliv_oneway_cluster_se(dml_pliv_oneway_cluster_fixture):
-    assert math.isclose(dml_pliv_oneway_cluster_fixture['se'][0],
-                        dml_pliv_oneway_cluster_fixture['se_manual'],
-                        rel_tol=1e-9, abs_tol=1e-4)
-    assert math.isclose(dml_pliv_oneway_cluster_fixture['se'][0],
-                        dml_pliv_oneway_cluster_fixture['se_ext_smpls'][0],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_pliv_oneway_cluster_fixture["se"][0], dml_pliv_oneway_cluster_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4
+    )
+    assert math.isclose(
+        dml_pliv_oneway_cluster_fixture["se"][0],
+        dml_pliv_oneway_cluster_fixture["se_ext_smpls"][0],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
 
 
 @pytest.fixture(scope="module")
@@ -281,71 +275,61 @@ def dml_plr_cluster_with_index(generate_data1, learner):
 
     # collect data
     data = generate_data1
-    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
 
     # Set machine learning methods for m & l
     ml_l = _clone(learner)
     ml_m = _clone(learner)
 
-    obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols)
+    obj_dml_data = dml.DoubleMLData(data, "y", ["d"], x_cols)
     np.random.seed(3141)
-    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
-                                  ml_l, ml_m,
-                                  n_folds=n_folds)
+    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m, n_folds=n_folds)
     np.random.seed(3141)
     dml_plr_obj.fit()
 
     df = data.reset_index()
-    dml_cluster_data = dml.DoubleMLClusterData(df,
-                                               y_col='y',
-                                               d_cols='d',
-                                               x_cols=x_cols,
-                                               cluster_cols='index')
+    dml_cluster_data = dml.DoubleMLClusterData(df, y_col="y", d_cols="d", x_cols=x_cols, cluster_cols="index")
     np.random.seed(3141)
-    dml_plr_cluster_obj = dml.DoubleMLPLR(dml_cluster_data,
-                                          ml_l, ml_m,
-                                          n_folds=n_folds)
+    dml_plr_cluster_obj = dml.DoubleMLPLR(dml_cluster_data, ml_l, ml_m, n_folds=n_folds)
     np.random.seed(3141)
     dml_plr_cluster_obj.fit()
 
-    dml_plr_cluster_ext_smpls = dml.DoubleMLPLR(
-        dml_cluster_data,
-        ml_l, ml_m,
-        n_folds=n_folds,
-        draw_sample_splitting=False)
+    dml_plr_cluster_ext_smpls = dml.DoubleMLPLR(dml_cluster_data, ml_l, ml_m, n_folds=n_folds, draw_sample_splitting=False)
 
     dml_plr_cluster_ext_smpls.set_sample_splitting(
-        all_smpls=dml_plr_cluster_obj.smpls,
-        all_smpls_cluster=dml_plr_cluster_obj.smpls_cluster)
+        all_smpls=dml_plr_cluster_obj.smpls, all_smpls_cluster=dml_plr_cluster_obj.smpls_cluster
+    )
 
     np.random.seed(3141)
     dml_plr_cluster_ext_smpls.fit()
 
-    res_dict = {'coef': dml_plr_obj.coef,
-                'coef_manual': dml_plr_cluster_obj.coef,
-                'se': dml_plr_obj.se,
-                'se_manual': dml_plr_cluster_obj.se,
-                'coef_ext_smpls': dml_plr_cluster_ext_smpls.coef,
-                'se_ext_smpls': dml_plr_cluster_ext_smpls.se}
+    res_dict = {
+        "coef": dml_plr_obj.coef,
+        "coef_manual": dml_plr_cluster_obj.coef,
+        "se": dml_plr_obj.se,
+        "se_manual": dml_plr_cluster_obj.se,
+        "coef_ext_smpls": dml_plr_cluster_ext_smpls.coef,
+        "se_ext_smpls": dml_plr_cluster_ext_smpls.se,
+    }
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_plr_cluster_with_index_coef(dml_plr_cluster_with_index):
-    assert math.isclose(dml_plr_cluster_with_index['coef'][0],
-                        dml_plr_cluster_with_index['coef_manual'][0],
-                        rel_tol=1e-9, abs_tol=1e-4)
-    assert math.isclose(dml_plr_cluster_with_index['coef'][0],
-                        dml_plr_cluster_with_index['coef_ext_smpls'][0],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_plr_cluster_with_index["coef"][0], dml_plr_cluster_with_index["coef_manual"][0], rel_tol=1e-9, abs_tol=1e-4
+    )
+    assert math.isclose(
+        dml_plr_cluster_with_index["coef"][0], dml_plr_cluster_with_index["coef_ext_smpls"][0], rel_tol=1e-9, abs_tol=1e-4
+    )
 
 
 @pytest.mark.ci
 def test_dml_plr_cluster_with_index_se(dml_plr_cluster_with_index):
-    assert math.isclose(dml_plr_cluster_with_index['se'][0],
-                        dml_plr_cluster_with_index['se_manual'][0],
-                        rel_tol=1e-9, abs_tol=1e-4)
-    assert math.isclose(dml_plr_cluster_with_index['se'][0],
-                        dml_plr_cluster_with_index['se_ext_smpls'][0],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_plr_cluster_with_index["se"][0], dml_plr_cluster_with_index["se_manual"][0], rel_tol=1e-9, abs_tol=1e-4
+    )
+    assert math.isclose(
+        dml_plr_cluster_with_index["se"][0], dml_plr_cluster_with_index["se_ext_smpls"][0], rel_tol=1e-9, abs_tol=1e-4
+    )
diff --git a/doubleml/tests/test_nonlinear_cluster.py b/doubleml/tests/test_nonlinear_cluster.py
index 5bcfaace5..f84f3e2e9 100644
--- a/doubleml/tests/test_nonlinear_cluster.py
+++ b/doubleml/tests/test_nonlinear_cluster.py
@@ -1,13 +1,13 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
-from sklearn.linear_model import LinearRegression, Lasso
 from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import Lasso, LinearRegression
 
 import doubleml as dml
-from doubleml.datasets import make_pliv_multiway_cluster_CKMS2021, DoubleMLClusterData
+from doubleml.datasets import DoubleMLClusterData, make_pliv_multiway_cluster_CKMS2021
 
 from .test_nonlinear_score_mixin import DoubleMLPLRWithNonLinearScoreMixin
 
@@ -21,33 +21,35 @@
 x, y, d, cluster_vars, z = make_pliv_multiway_cluster_CKMS2021(N, M, dim_x, return_type="array")
 obj_dml_cluster_data = DoubleMLClusterData.from_arrays(x, y, d, cluster_vars)
 
-x, y, d, cluster_vars, z = make_pliv_multiway_cluster_CKMS2021(N, M, dim_x,
-                                                               omega_X=np.array([0.25, 0]),
-                                                               omega_epsilon=np.array([0.25, 0]),
-                                                               omega_v=np.array([0.25, 0]),
-                                                               omega_V=np.array([0.25, 0]),
-                                                               return_type='array')
+x, y, d, cluster_vars, z = make_pliv_multiway_cluster_CKMS2021(
+    N,
+    M,
+    dim_x,
+    omega_X=np.array([0.25, 0]),
+    omega_epsilon=np.array([0.25, 0]),
+    omega_v=np.array([0.25, 0]),
+    omega_V=np.array([0.25, 0]),
+    return_type="array",
+)
 obj_dml_oneway_cluster_data = DoubleMLClusterData.from_arrays(x, y, d, cluster_vars)
 
 # only the first cluster variable is relevant with the weight setting above
-obj_dml_oneway_cluster_data.cluster_cols = 'cluster_var1'
+obj_dml_oneway_cluster_data.cluster_cols = "cluster_var1"
 
 
-@pytest.fixture(scope='module',
-                params=[RandomForestRegressor(max_depth=2, n_estimators=10),
-                        LinearRegression(),
-                        Lasso(alpha=0.1)])
+@pytest.fixture(
+    scope="module", params=[RandomForestRegressor(max_depth=2, n_estimators=10), LinearRegression(), Lasso(alpha=0.1)]
+)
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['IV-type', 'partialling out'])
+@pytest.fixture(scope="module", params=["IV-type", "partialling out"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_plr_oneway_cluster_linear_vs_nonlinear_fixture(learner, score):
     n_folds = 3
 
@@ -57,60 +59,60 @@ def dml_plr_oneway_cluster_linear_vs_nonlinear_fixture(learner, score):
     ml_g = clone(learner)
 
     np.random.seed(3141)
-    if score == 'partialling out':
-        dml_plr_obj = dml.DoubleMLPLR(obj_dml_oneway_cluster_data,
-                                      ml_l, ml_m,
-                                      n_folds=n_folds,
-                                      score=score)
+    if score == "partialling out":
+        dml_plr_obj = dml.DoubleMLPLR(obj_dml_oneway_cluster_data, ml_l, ml_m, n_folds=n_folds, score=score)
     else:
-        assert score == 'IV-type'
-        dml_plr_obj = dml.DoubleMLPLR(obj_dml_oneway_cluster_data,
-                                      ml_l, ml_m, ml_g,
-                                      n_folds=n_folds,
-                                      score=score)
+        assert score == "IV-type"
+        dml_plr_obj = dml.DoubleMLPLR(obj_dml_oneway_cluster_data, ml_l, ml_m, ml_g, n_folds=n_folds, score=score)
 
     np.random.seed(3141)
     dml_plr_obj.fit()
 
     np.random.seed(3141)
-    if score == 'partialling out':
-        dml_plr_obj2 = DoubleMLPLRWithNonLinearScoreMixin(obj_dml_oneway_cluster_data,
-                                                          ml_l, ml_m,
-                                                          n_folds=n_folds,
-                                                          score=score)
+    if score == "partialling out":
+        dml_plr_obj2 = DoubleMLPLRWithNonLinearScoreMixin(
+            obj_dml_oneway_cluster_data, ml_l, ml_m, n_folds=n_folds, score=score
+        )
     else:
-        assert score == 'IV-type'
-        dml_plr_obj2 = DoubleMLPLRWithNonLinearScoreMixin(obj_dml_oneway_cluster_data,
-                                                          ml_l, ml_m, ml_g,
-                                                          n_folds=n_folds,
-                                                          score=score)
+        assert score == "IV-type"
+        dml_plr_obj2 = DoubleMLPLRWithNonLinearScoreMixin(
+            obj_dml_oneway_cluster_data, ml_l, ml_m, ml_g, n_folds=n_folds, score=score
+        )
 
     np.random.seed(3141)
     dml_plr_obj2.fit()
 
-    res_dict = {'coef_linear': dml_plr_obj.coef,
-                'coef_nonlinear': dml_plr_obj2.coef,
-                'se_linear': dml_plr_obj.se,
-                'se_nonlinear': dml_plr_obj2.se}
+    res_dict = {
+        "coef_linear": dml_plr_obj.coef,
+        "coef_nonlinear": dml_plr_obj2.coef,
+        "se_linear": dml_plr_obj.se,
+        "se_nonlinear": dml_plr_obj2.se,
+    }
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_plr_oneway_cluster_linear_vs_nonlinear_coef(dml_plr_oneway_cluster_linear_vs_nonlinear_fixture):
-    assert math.isclose(dml_plr_oneway_cluster_linear_vs_nonlinear_fixture['coef_linear'][0],
-                        dml_plr_oneway_cluster_linear_vs_nonlinear_fixture['coef_nonlinear'][0],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_plr_oneway_cluster_linear_vs_nonlinear_fixture["coef_linear"][0],
+        dml_plr_oneway_cluster_linear_vs_nonlinear_fixture["coef_nonlinear"][0],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
 
 
 @pytest.mark.ci
 def test_dml_plr_oneway_cluster_linear_vs_nonlinear_se(dml_plr_oneway_cluster_linear_vs_nonlinear_fixture):
-    assert math.isclose(dml_plr_oneway_cluster_linear_vs_nonlinear_fixture['se_linear'][0],
-                        dml_plr_oneway_cluster_linear_vs_nonlinear_fixture['se_nonlinear'][0],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_plr_oneway_cluster_linear_vs_nonlinear_fixture["se_linear"][0],
+        dml_plr_oneway_cluster_linear_vs_nonlinear_fixture["se_nonlinear"][0],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_plr_multiway_cluster_linear_vs_nonlinear_fixture(learner, score):
     n_folds = 2
     n_rep = 2
@@ -121,61 +123,57 @@ def dml_plr_multiway_cluster_linear_vs_nonlinear_fixture(learner, score):
     ml_g = clone(learner)
 
     np.random.seed(3141)
-    if score == 'partialling out':
-        dml_plr_obj = dml.DoubleMLPLR(obj_dml_oneway_cluster_data,
-                                      ml_l, ml_m,
-                                      n_folds=n_folds,
-                                      n_rep=n_rep,
-                                      score=score)
+    if score == "partialling out":
+        dml_plr_obj = dml.DoubleMLPLR(obj_dml_oneway_cluster_data, ml_l, ml_m, n_folds=n_folds, n_rep=n_rep, score=score)
     else:
-        assert score == 'IV-type'
-        dml_plr_obj = dml.DoubleMLPLR(obj_dml_oneway_cluster_data,
-                                      ml_l, ml_m, ml_g,
-                                      n_folds=n_folds,
-                                      n_rep=n_rep,
-                                      score=score)
+        assert score == "IV-type"
+        dml_plr_obj = dml.DoubleMLPLR(obj_dml_oneway_cluster_data, ml_l, ml_m, ml_g, n_folds=n_folds, n_rep=n_rep, score=score)
 
     np.random.seed(3141)
     dml_plr_obj.fit()
 
     np.random.seed(3141)
-    if score == 'partialling out':
-        dml_plr_obj2 = DoubleMLPLRWithNonLinearScoreMixin(obj_dml_oneway_cluster_data,
-                                                          ml_l, ml_m,
-                                                          n_folds=n_folds,
-                                                          n_rep=n_rep,
-                                                          score=score)
+    if score == "partialling out":
+        dml_plr_obj2 = DoubleMLPLRWithNonLinearScoreMixin(
+            obj_dml_oneway_cluster_data, ml_l, ml_m, n_folds=n_folds, n_rep=n_rep, score=score
+        )
     else:
-        assert score == 'IV-type'
-        dml_plr_obj2 = DoubleMLPLRWithNonLinearScoreMixin(obj_dml_oneway_cluster_data,
-                                                          ml_l, ml_m, ml_g,
-                                                          n_folds=n_folds,
-                                                          n_rep=n_rep,
-                                                          score=score)
+        assert score == "IV-type"
+        dml_plr_obj2 = DoubleMLPLRWithNonLinearScoreMixin(
+            obj_dml_oneway_cluster_data, ml_l, ml_m, ml_g, n_folds=n_folds, n_rep=n_rep, score=score
+        )
 
     np.random.seed(3141)
     dml_plr_obj2.fit()
 
-    res_dict = {'coef_linear': dml_plr_obj.coef,
-                'coef_nonlinear': dml_plr_obj2.coef,
-                'se_linear': dml_plr_obj.se,
-                'se_nonlinear': dml_plr_obj2.se}
+    res_dict = {
+        "coef_linear": dml_plr_obj.coef,
+        "coef_nonlinear": dml_plr_obj2.coef,
+        "se_linear": dml_plr_obj.se,
+        "se_nonlinear": dml_plr_obj2.se,
+    }
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_plr_multiway_cluster_linear_vs_nonlinear_coef(dml_plr_multiway_cluster_linear_vs_nonlinear_fixture):
-    assert math.isclose(dml_plr_multiway_cluster_linear_vs_nonlinear_fixture['coef_linear'][0],
-                        dml_plr_multiway_cluster_linear_vs_nonlinear_fixture['coef_nonlinear'][0],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_plr_multiway_cluster_linear_vs_nonlinear_fixture["coef_linear"][0],
+        dml_plr_multiway_cluster_linear_vs_nonlinear_fixture["coef_nonlinear"][0],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
 
 
 @pytest.mark.ci
 def test_dml_plr_multiway_cluster_linear_vs_nonlinear_se(dml_plr_multiway_cluster_linear_vs_nonlinear_fixture):
-    assert math.isclose(dml_plr_multiway_cluster_linear_vs_nonlinear_fixture['se_linear'][0],
-                        dml_plr_multiway_cluster_linear_vs_nonlinear_fixture['se_nonlinear'][0],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_plr_multiway_cluster_linear_vs_nonlinear_fixture["se_linear"][0],
+        dml_plr_multiway_cluster_linear_vs_nonlinear_fixture["se_nonlinear"][0],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
 
 
 @pytest.fixture(scope="module")
@@ -185,48 +183,48 @@ def dml_plr_cluster_nonlinear_with_index(generate_data1, learner):
 
     # collect data
     data = generate_data1
-    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
 
     # Set machine learning methods for m & l
     ml_l = clone(learner)
     ml_m = clone(learner)
 
-    obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols)
+    obj_dml_data = dml.DoubleMLData(data, "y", ["d"], x_cols)
     np.random.seed(3141)
-    dml_plr_obj = DoubleMLPLRWithNonLinearScoreMixin(obj_dml_data,
-                                                     ml_l, ml_m,
-                                                     n_folds=n_folds)
+    dml_plr_obj = DoubleMLPLRWithNonLinearScoreMixin(obj_dml_data, ml_l, ml_m, n_folds=n_folds)
     dml_plr_obj.fit()
 
     df = data.reset_index()
-    dml_cluster_data = dml.DoubleMLClusterData(df,
-                                               y_col='y',
-                                               d_cols='d',
-                                               x_cols=x_cols,
-                                               cluster_cols='index')
+    dml_cluster_data = dml.DoubleMLClusterData(df, y_col="y", d_cols="d", x_cols=x_cols, cluster_cols="index")
     np.random.seed(3141)
-    dml_plr_cluster_obj = DoubleMLPLRWithNonLinearScoreMixin(dml_cluster_data,
-                                                             ml_l, ml_m,
-                                                             n_folds=n_folds)
+    dml_plr_cluster_obj = DoubleMLPLRWithNonLinearScoreMixin(dml_cluster_data, ml_l, ml_m, n_folds=n_folds)
     dml_plr_cluster_obj.fit()
 
-    res_dict = {'coef': dml_plr_obj.coef,
-                'coef_cluster': dml_plr_cluster_obj.coef,
-                'se': dml_plr_obj.se,
-                'se_cluster': dml_plr_cluster_obj.se}
+    res_dict = {
+        "coef": dml_plr_obj.coef,
+        "coef_cluster": dml_plr_cluster_obj.coef,
+        "se": dml_plr_obj.se,
+        "se_cluster": dml_plr_cluster_obj.se,
+    }
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_plr_cluster_nonlinear_with_index_coef(dml_plr_cluster_nonlinear_with_index):
-    assert math.isclose(dml_plr_cluster_nonlinear_with_index['coef'][0],
-                        dml_plr_cluster_nonlinear_with_index['coef_cluster'][0],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_plr_cluster_nonlinear_with_index["coef"][0],
+        dml_plr_cluster_nonlinear_with_index["coef_cluster"][0],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
 
 
 @pytest.mark.ci
 def test_dml_plr_cluster_nonlinear_with_index_se(dml_plr_cluster_nonlinear_with_index):
-    assert math.isclose(dml_plr_cluster_nonlinear_with_index['se'][0],
-                        dml_plr_cluster_nonlinear_with_index['se_cluster'][0],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_plr_cluster_nonlinear_with_index["se"][0],
+        dml_plr_cluster_nonlinear_with_index["se_cluster"][0],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
diff --git a/doubleml/tests/test_nonlinear_score_mixin.py b/doubleml/tests/test_nonlinear_score_mixin.py
index e59dc7f01..d4e9a6950 100644
--- a/doubleml/tests/test_nonlinear_score_mixin.py
+++ b/doubleml/tests/test_nonlinear_score_mixin.py
@@ -1,141 +1,141 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.base import clone
 from sklearn.linear_model import LinearRegression
 from sklearn.utils import check_X_y
 
 import doubleml as dml
 from doubleml.double_ml import DoubleML
-from doubleml.utils._estimation import _dml_cv_predict
-from doubleml.utils._checks import _check_finite_predictions
 from doubleml.double_ml_score_mixins import NonLinearScoreMixin
+from doubleml.utils._checks import _check_finite_predictions
+from doubleml.utils._estimation import _dml_cv_predict
 
 
 class DoubleMLPLRWithNonLinearScoreMixin(NonLinearScoreMixin, DoubleML):
     _coef_bounds = (-np.inf, np.inf)
     _coef_start_val = 3.0
 
-    def __init__(self,
-                 obj_dml_data,
-                 ml_l,
-                 ml_m,
-                 ml_g=None,
-                 n_folds=5,
-                 n_rep=1,
-                 score='partialling out',
-                 draw_sample_splitting=True):
-        super().__init__(obj_dml_data,
-                         n_folds,
-                         n_rep,
-                         score,
-                         draw_sample_splitting)
+    def __init__(
+        self, obj_dml_data, ml_l, ml_m, ml_g=None, n_folds=5, n_rep=1, score="partialling out", draw_sample_splitting=True
+    ):
+        super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
 
         self._check_data(self._dml_data)
         self._check_score(self.score)
 
-        _ = self._check_learner(ml_l, 'ml_l', regressor=True, classifier=False)
-        _ = self._check_learner(ml_m, 'ml_m', regressor=True, classifier=False)
-        self._learner = {'ml_l': ml_l, 'ml_m': ml_m}
-        self._predict_method = {'ml_l': 'predict',
-                                'ml_m': 'predict'}
+        _ = self._check_learner(ml_l, "ml_l", regressor=True, classifier=False)
+        _ = self._check_learner(ml_m, "ml_m", regressor=True, classifier=False)
+        self._learner = {"ml_l": ml_l, "ml_m": ml_m}
+        self._predict_method = {"ml_l": "predict", "ml_m": "predict"}
 
         if ml_g is not None:
-            _ = self._check_learner(ml_g, 'ml_g', regressor=True, classifier=False)
-            self._learner['ml_g'] = ml_g
-            self._predict_method['ml_g'] = 'predict'
+            _ = self._check_learner(ml_g, "ml_g", regressor=True, classifier=False)
+            self._learner["ml_g"] = ml_g
+            self._predict_method["ml_g"] = "predict"
 
         self._initialize_ml_nuisance_params()
 
     @property
     def _score_element_names(self):
-        return ['psi_a', 'psi_b']
+        return ["psi_a", "psi_b"]
 
     def _compute_score(self, psi_elements, coef, inds=None):
-        psi_a = psi_elements['psi_a']
-        psi_b = psi_elements['psi_b']
+        psi_a = psi_elements["psi_a"]
+        psi_b = psi_elements["psi_b"]
         if inds is not None:
             psi_a = psi_a[inds]
             psi_b = psi_b[inds]
         psi = psi_a * coef + psi_b
-        if self.score == 'no_root_pos':
-            psi = np.full_like(psi, coef**2. + 0.05)
-        elif self.score == 'no_root_neg':
-            psi = np.full_like(psi, -coef**2. - 0.75)
+        if self.score == "no_root_pos":
+            psi = np.full_like(psi, coef**2.0 + 0.05)
+        elif self.score == "no_root_neg":
+            psi = np.full_like(psi, -(coef**2.0) - 0.75)
 
         return psi
 
     def _compute_score_deriv(self, psi_elements, coef, inds=None):
-        psi_a = psi_elements['psi_a']
+        psi_a = psi_elements["psi_a"]
         if inds is not None:
             psi_a = psi_a[inds]
 
-        if self.score == 'no_root_pos':
-            psi_a = np.full_like(psi_a, 2. * coef)
-        elif self.score == 'no_root_neg':
-            psi_a = np.full_like(psi_a, -2. * coef)
+        if self.score == "no_root_pos":
+            psi_a = np.full_like(psi_a, 2.0 * coef)
+        elif self.score == "no_root_neg":
+            psi_a = np.full_like(psi_a, -2.0 * coef)
         return psi_a
 
     def _initialize_ml_nuisance_params(self):
-        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
-                        for learner in self._learner}
+        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in self._learner}
 
     def _check_score(self, score):
         if isinstance(score, str):
-            valid_score = ['IV-type', 'partialling out', 'no_root_pos', 'no_root_neg']
+            valid_score = ["IV-type", "partialling out", "no_root_pos", "no_root_neg"]
             if score not in valid_score:
-                raise ValueError('Invalid score ' + score + '. ' +
-                                 'Valid score ' + ' or '.join(valid_score) + '.')
+                raise ValueError("Invalid score " + score + ". " + "Valid score " + " or ".join(valid_score) + ".")
         else:
             if not callable(score):
-                raise TypeError('score should be either a string or a callable. '
-                                '%r was passed.' % score)
+                raise TypeError("score should be either a string or a callable. %r was passed." % score)
         return
 
     def _check_data(self, obj_dml_data):
         pass
 
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
 
         # nuisance l
-        l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_l'), method=self._predict_method['ml_l'])
-        _check_finite_predictions(l_hat['preds'], self._learner['ml_l'], 'ml_l', smpls)
+        l_hat = _dml_cv_predict(
+            self._learner["ml_l"],
+            x,
+            y,
+            smpls=smpls,
+            n_jobs=n_jobs_cv,
+            est_params=self._get_params("ml_l"),
+            method=self._predict_method["ml_l"],
+        )
+        _check_finite_predictions(l_hat["preds"], self._learner["ml_l"], "ml_l", smpls)
 
         # nuisance m
-        m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'])
-        _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
+        m_hat = _dml_cv_predict(
+            self._learner["ml_m"],
+            x,
+            d,
+            smpls=smpls,
+            n_jobs=n_jobs_cv,
+            est_params=self._get_params("ml_m"),
+            method=self._predict_method["ml_m"],
+        )
+        _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
 
         # an estimate of g is obtained for the IV-type score and callable scores
-        g_hat = {'preds': None, 'targets': None, 'models': None}
-        if 'ml_g' in self._learner:
+        g_hat = {"preds": None, "targets": None, "models": None}
+        if "ml_g" in self._learner:
             # get an initial estimate for theta using the partialling out score
-            psi_a = -np.multiply(d - m_hat['preds'], d - m_hat['preds'])
-            psi_b = np.multiply(d - m_hat['preds'], y - l_hat['preds'])
+            psi_a = -np.multiply(d - m_hat["preds"], d - m_hat["preds"])
+            psi_b = np.multiply(d - m_hat["preds"], y - l_hat["preds"])
             theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
             # nuisance g
-            g_hat = _dml_cv_predict(self._learner['ml_g'], x, y - theta_initial*d, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_g'), method=self._predict_method['ml_g'])
-            _check_finite_predictions(g_hat['preds'], self._learner['ml_g'], 'ml_g', smpls)
-
-        psi_a, psi_b = self._score_elements(y, d, l_hat['preds'], m_hat['preds'], g_hat['preds'], smpls)
-        psi_elements = {'psi_a': psi_a,
-                        'psi_b': psi_b}
-        preds = {'predictions': {'ml_l': l_hat['preds'],
-                                 'ml_m': m_hat['preds'],
-                                 'ml_g': g_hat['preds']},
-                 'targets': {'ml_l': l_hat['targets'],
-                             'ml_m': m_hat['targets'],
-                             'ml_g': g_hat['targets']},
-                 'models': {'ml_l': l_hat['models'],
-                            'ml_m': m_hat['models'],
-                            'ml_g': g_hat['models']}}
+            g_hat = _dml_cv_predict(
+                self._learner["ml_g"],
+                x,
+                y - theta_initial * d,
+                smpls=smpls,
+                n_jobs=n_jobs_cv,
+                est_params=self._get_params("ml_g"),
+                method=self._predict_method["ml_g"],
+            )
+            _check_finite_predictions(g_hat["preds"], self._learner["ml_g"], "ml_g", smpls)
+
+        psi_a, psi_b = self._score_elements(y, d, l_hat["preds"], m_hat["preds"], g_hat["preds"], smpls)
+        psi_elements = {"psi_a": psi_a, "psi_b": psi_b}
+        preds = {
+            "predictions": {"ml_l": l_hat["preds"], "ml_m": m_hat["preds"], "ml_g": g_hat["preds"]},
+            "targets": {"ml_l": l_hat["targets"], "ml_m": m_hat["targets"], "ml_g": g_hat["targets"]},
+            "models": {"ml_l": l_hat["models"], "ml_m": m_hat["models"], "ml_g": g_hat["models"]},
+        }
 
         return psi_elements, preds
 
@@ -144,42 +144,39 @@ def _score_elements(self, y, d, l_hat, m_hat, g_hat, smpls):
         u_hat = y - l_hat
         v_hat = d - m_hat
 
-        if self.score == 'IV-type':
-            psi_a = - np.multiply(v_hat, d)
+        if self.score == "IV-type":
+            psi_a = -np.multiply(v_hat, d)
             psi_b = np.multiply(v_hat, y - g_hat)
-        elif self.score == 'partialling out':
+        elif self.score == "partialling out":
             psi_a = -np.multiply(v_hat, v_hat)
             psi_b = np.multiply(v_hat, u_hat)
         else:
-            assert self.score in ['no_root_pos', 'no_root_neg']
-            psi_a = 1.
-            psi_b = 1.
+            assert self.score in ["no_root_pos", "no_root_neg"]
+            psi_a = 1.0
+            psi_b = 1.0
 
         return psi_a, psi_b
 
-    def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                         search_mode, n_iter_randomized_search):
+    def _nuisance_tuning(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
         pass
 
     def _sensitivity_element_est(self, preds):
         pass
 
 
-@pytest.fixture(scope='module',
-                params=[LinearRegression()])
+@pytest.fixture(scope="module", params=[LinearRegression()])
 def learner(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=['IV-type', 'partialling out'])
+@pytest.fixture(scope="module", params=["IV-type", "partialling out"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[(-np.inf, np.inf),
-                        (0, 5)])
+@pytest.fixture(scope="module", params=[(-np.inf, np.inf), (0, 5)])
 def coef_bounds(request):
     return request.param
 
@@ -190,7 +187,7 @@ def dml_plr_w_nonlinear_mixin_fixture(generate_data1, learner, score, coef_bound
 
     # collect data
     data = generate_data1
-    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
 
     # Set machine learning methods for l, m & g
     ml_l = clone(learner)
@@ -198,80 +195,61 @@ def dml_plr_w_nonlinear_mixin_fixture(generate_data1, learner, score, coef_bound
     ml_g = clone(learner)
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols)
-    if score == 'partialling out':
-        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
-                                      ml_l, ml_m,
-                                      n_folds=n_folds,
-                                      score=score)
+    obj_dml_data = dml.DoubleMLData(data, "y", ["d"], x_cols)
+    if score == "partialling out":
+        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m, n_folds=n_folds, score=score)
     else:
-        assert score == 'IV-type'
-        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
-                                      ml_l, ml_m, ml_g,
-                                      n_folds,
-                                      score=score)
+        assert score == "IV-type"
+        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m, ml_g, n_folds, score=score)
 
     dml_plr_obj.fit()
 
     np.random.seed(3141)
-    if score == 'partialling out':
-        dml_plr_obj2 = DoubleMLPLRWithNonLinearScoreMixin(obj_dml_data,
-                                                          ml_l, ml_m,
-                                                          n_folds=n_folds,
-                                                          score=score)
+    if score == "partialling out":
+        dml_plr_obj2 = DoubleMLPLRWithNonLinearScoreMixin(obj_dml_data, ml_l, ml_m, n_folds=n_folds, score=score)
     else:
-        assert score == 'IV-type'
-        dml_plr_obj2 = DoubleMLPLRWithNonLinearScoreMixin(obj_dml_data,
-                                                          ml_l, ml_m, ml_g,
-                                                          n_folds,
-                                                          score=score)
+        assert score == "IV-type"
+        dml_plr_obj2 = DoubleMLPLRWithNonLinearScoreMixin(obj_dml_data, ml_l, ml_m, ml_g, n_folds, score=score)
 
     dml_plr_obj2._coef_bounds = coef_bounds  # use different settings to also unit test the solver for bounded problems
     dml_plr_obj2.fit()
 
-    res_dict = {'coef': dml_plr_obj.coef,
-                'coef2': dml_plr_obj2.coef,
-                'se': dml_plr_obj.se,
-                'se2': dml_plr_obj2.se}
+    res_dict = {"coef": dml_plr_obj.coef, "coef2": dml_plr_obj2.coef, "se": dml_plr_obj.se, "se2": dml_plr_obj2.se}
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_plr_coef(dml_plr_w_nonlinear_mixin_fixture):
-    assert math.isclose(dml_plr_w_nonlinear_mixin_fixture['coef'][0],
-                        dml_plr_w_nonlinear_mixin_fixture['coef2'][0],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_plr_w_nonlinear_mixin_fixture["coef"][0], dml_plr_w_nonlinear_mixin_fixture["coef2"][0], rel_tol=1e-9, abs_tol=1e-4
+    )
 
 
 @pytest.mark.ci
 def test_dml_plr_se(dml_plr_w_nonlinear_mixin_fixture):
-    assert math.isclose(dml_plr_w_nonlinear_mixin_fixture['se'][0],
-                        dml_plr_w_nonlinear_mixin_fixture['se2'][0],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_plr_w_nonlinear_mixin_fixture["se"][0], dml_plr_w_nonlinear_mixin_fixture["se2"][0], rel_tol=1e-9, abs_tol=1e-4
+    )
 
 
 @pytest.mark.ci
 def test_nonlinear_warnings(generate_data1, coef_bounds):
     # collect data
     data = generate_data1
-    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols)
+    obj_dml_data = dml.DoubleMLData(data, "y", ["d"], x_cols)
 
-    dml_plr_obj = DoubleMLPLRWithNonLinearScoreMixin(obj_dml_data,
-                                                     LinearRegression(), LinearRegression(),
-                                                     score='no_root_pos')
-    msg = 'Could not find a root of the score function.'
+    dml_plr_obj = DoubleMLPLRWithNonLinearScoreMixin(obj_dml_data, LinearRegression(), LinearRegression(), score="no_root_pos")
+    msg = "Could not find a root of the score function."
     with pytest.warns(UserWarning, match=msg):
         dml_plr_obj._coef_bounds = coef_bounds
         dml_plr_obj.fit()
 
-    dml_plr_obj = DoubleMLPLRWithNonLinearScoreMixin(obj_dml_data,
-                                                     LinearRegression(), LinearRegression(),
-                                                     score='no_root_neg')
-    msg = 'Could not find a root of the score function.'
+    dml_plr_obj = DoubleMLPLRWithNonLinearScoreMixin(obj_dml_data, LinearRegression(), LinearRegression(), score="no_root_neg")
+    msg = "Could not find a root of the score function."
     with pytest.warns(UserWarning, match=msg):
         dml_plr_obj._coef_bounds = coef_bounds
         dml_plr_obj.fit()
diff --git a/doubleml/tests/test_package.py b/doubleml/tests/test_package.py
index 5bfb52e5b..86b3bf0ea 100644
--- a/doubleml/tests/test_package.py
+++ b/doubleml/tests/test_package.py
@@ -4,4 +4,5 @@
 @pytest.mark.ci
 def test_version_is_string():
     import doubleml
+
     assert isinstance(doubleml.__version__, str)
diff --git a/doubleml/tests/test_return_types.py b/doubleml/tests/test_return_types.py
index a9014d089..c4a725fc0 100644
--- a/doubleml/tests/test_return_types.py
+++ b/doubleml/tests/test_return_types.py
@@ -1,39 +1,38 @@
-import pytest
-import pandas as pd
 import numpy as np
+import pandas as pd
 import plotly
+import pytest
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import Lasso, LogisticRegression
+from sklearn.svm import LinearSVR
 
 from doubleml import (
-    DoubleMLPLR,
-    DoubleMLIRM,
-    DoubleMLIIVM,
-    DoubleMLPLIV,
-    DoubleMLData,
+    DoubleMLAPO,
     DoubleMLClusterData,
     DoubleMLCVAR,
-    DoubleMLPQ,
-    DoubleMLLPQ,
+    DoubleMLData,
     DoubleMLDID,
     DoubleMLDIDCS,
-    DoubleMLPolicyTree,
     DoubleMLFramework,
+    DoubleMLIIVM,
+    DoubleMLIRM,
+    DoubleMLLPQ,
+    DoubleMLPLIV,
+    DoubleMLPLR,
+    DoubleMLPolicyTree,
+    DoubleMLPQ,
     DoubleMLSSM,
-    DoubleMLAPO
 )
 from doubleml.datasets import (
-    make_plr_CCDDHNR2018,
+    make_did_SZ2020,
+    make_iivm_data,
     make_irm_data,
     make_pliv_CHS2015,
-    make_iivm_data,
     make_pliv_multiway_cluster_CKMS2021,
-    make_did_SZ2020,
+    make_plr_CCDDHNR2018,
     make_ssm_data,
 )
 
-from sklearn.linear_model import Lasso, LogisticRegression
-from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
-from sklearn.svm import LinearSVR
-
 np.random.seed(3141)
 n_obs = 200
 dml_data_plr = make_plr_CCDDHNR2018(n_obs=n_obs)
@@ -43,7 +42,7 @@
 dml_cluster_data_pliv = make_pliv_multiway_cluster_CKMS2021(N=10, M=10)
 dml_data_did = make_did_SZ2020(n_obs=n_obs)
 dml_data_did_cs = make_did_SZ2020(n_obs=n_obs, cross_sectional_data=True)
-(x, y, d, t) = make_did_SZ2020(n_obs=n_obs, cross_sectional_data=True, return_type='array')
+(x, y, d, t) = make_did_SZ2020(n_obs=n_obs, cross_sectional_data=True, return_type="array")
 binary_outcome = np.random.binomial(n=1, p=0.5, size=n_obs)
 dml_data_did_binary_outcome = DoubleMLData.from_arrays(x, binary_outcome, d)
 dml_data_did_cs_binary_outcome = DoubleMLData.from_arrays(x, binary_outcome, d, t=t)
@@ -66,21 +65,25 @@
 
 
 @pytest.mark.ci
-@pytest.mark.parametrize('dml_obj, cls',
-                         [(dml_plr, DoubleMLPLR),
-                          (dml_pliv, DoubleMLPLIV),
-                          (dml_irm, DoubleMLIRM),
-                          (dml_iivm, DoubleMLIIVM),
-                          (dml_pliv_cluster, DoubleMLPLIV),
-                          (dml_cvar, DoubleMLCVAR),
-                          (dml_pq, DoubleMLPQ),
-                          (dml_lpq, DoubleMLLPQ),
-                          (dml_did, DoubleMLDID),
-                          (dml_did_binary_outcome, DoubleMLDID),
-                          (dml_did_cs, DoubleMLDIDCS),
-                          (dml_did_cs_binary_outcome, DoubleMLDIDCS),
-                          (dml_ssm, DoubleMLSSM),
-                          (dml_apo, DoubleMLAPO)])
+@pytest.mark.parametrize(
+    "dml_obj, cls",
+    [
+        (dml_plr, DoubleMLPLR),
+        (dml_pliv, DoubleMLPLIV),
+        (dml_irm, DoubleMLIRM),
+        (dml_iivm, DoubleMLIIVM),
+        (dml_pliv_cluster, DoubleMLPLIV),
+        (dml_cvar, DoubleMLCVAR),
+        (dml_pq, DoubleMLPQ),
+        (dml_lpq, DoubleMLLPQ),
+        (dml_did, DoubleMLDID),
+        (dml_did_binary_outcome, DoubleMLDID),
+        (dml_did_cs, DoubleMLDIDCS),
+        (dml_did_cs_binary_outcome, DoubleMLDIDCS),
+        (dml_ssm, DoubleMLSSM),
+        (dml_apo, DoubleMLAPO),
+    ],
+)
 def test_return_types(dml_obj, cls):
     # ToDo: A second test case with multiple treatment variables would be helpful
     assert isinstance(dml_obj.__str__(), str)
@@ -101,13 +104,13 @@ def test_return_types(dml_obj, cls):
     if not dml_obj._is_cluster_data:
         assert isinstance(dml_obj.p_adjust(), pd.DataFrame)
     else:
-        isinstance(dml_obj.p_adjust('bonferroni'), pd.DataFrame)
+        isinstance(dml_obj.p_adjust("bonferroni"), pd.DataFrame)
     if isinstance(dml_obj, DoubleMLLPQ):
-        assert isinstance(dml_obj.get_params('ml_m_z'), dict)
+        assert isinstance(dml_obj.get_params("ml_m_z"), dict)
     elif isinstance(dml_obj, DoubleMLSSM):
-        assert isinstance(dml_obj.get_params('ml_g_d0'), dict)
+        assert isinstance(dml_obj.get_params("ml_g_d0"), dict)
     else:
-        assert isinstance(dml_obj.get_params('ml_m'), dict)
+        assert isinstance(dml_obj.get_params("ml_m"), dict)
     assert isinstance(dml_obj._dml_data.__str__(), str)
 
     # for the following checks we need additional inputs
@@ -121,66 +124,61 @@ def test_return_types(dml_obj, cls):
 n_obs = 200
 n_rep_boot = 314
 
-plr_obj = DoubleMLPLR(dml_data_plr, Lasso(), LinearSVR(),
-                      n_rep=n_rep, n_folds=n_folds)
+plr_obj = DoubleMLPLR(dml_data_plr, Lasso(), LinearSVR(), n_rep=n_rep, n_folds=n_folds)
 plr_obj.fit(store_models=True)
 plr_obj.bootstrap(n_rep_boot=n_rep_boot)
 
-pliv_obj = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso(),
-                        n_rep=n_rep, n_folds=n_folds)
+pliv_obj = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso(), n_rep=n_rep, n_folds=n_folds)
 pliv_obj.fit()
 pliv_obj.bootstrap(n_rep_boot=n_rep_boot)
 
-irm_obj = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                      n_rep=n_rep, n_folds=n_folds, trimming_threshold=0.1)
+irm_obj = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), n_rep=n_rep, n_folds=n_folds, trimming_threshold=0.1)
 irm_obj.fit()
 irm_obj.bootstrap(n_rep_boot=n_rep_boot)
 
-iivm_obj = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(),
-                        n_rep=n_rep, n_folds=n_folds)
+iivm_obj = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(), n_rep=n_rep, n_folds=n_folds)
 iivm_obj.fit()
 iivm_obj.bootstrap(n_rep_boot=n_rep_boot)
 
-cvar_obj = DoubleMLCVAR(dml_data_irm, ml_g=RandomForestRegressor(), ml_m=RandomForestClassifier(),
-                        n_rep=n_rep, n_folds=n_folds)
+cvar_obj = DoubleMLCVAR(
+    dml_data_irm, ml_g=RandomForestRegressor(), ml_m=RandomForestClassifier(), n_rep=n_rep, n_folds=n_folds
+)
 cvar_obj.fit()
 cvar_obj.bootstrap(n_rep_boot=n_rep_boot)
 
-pq_obj = DoubleMLPQ(dml_data_irm, ml_g=RandomForestClassifier(), ml_m=RandomForestClassifier(),
-                    n_rep=n_rep, n_folds=n_folds)
+pq_obj = DoubleMLPQ(dml_data_irm, ml_g=RandomForestClassifier(), ml_m=RandomForestClassifier(), n_rep=n_rep, n_folds=n_folds)
 pq_obj.fit()
 pq_obj.bootstrap(n_rep_boot=n_rep_boot)
 
-lpq_obj = DoubleMLLPQ(dml_data_iivm, ml_g=RandomForestClassifier(), ml_m=RandomForestClassifier(),
-                      n_rep=n_rep, n_folds=n_folds)
+lpq_obj = DoubleMLLPQ(
+    dml_data_iivm, ml_g=RandomForestClassifier(), ml_m=RandomForestClassifier(), n_rep=n_rep, n_folds=n_folds
+)
 lpq_obj.fit()
 lpq_obj.bootstrap(n_rep_boot=n_rep_boot)
 
-did_obj = DoubleMLDID(dml_data_did, Lasso(), LogisticRegression(),
-                      n_rep=n_rep, n_folds=n_folds)
+did_obj = DoubleMLDID(dml_data_did, Lasso(), LogisticRegression(), n_rep=n_rep, n_folds=n_folds)
 did_obj.fit()
 did_obj.bootstrap(n_rep_boot=n_rep_boot)
 
-did_cs_obj = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(),
-                           n_rep=n_rep, n_folds=n_folds)
+did_cs_obj = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(), n_rep=n_rep, n_folds=n_folds)
 did_cs_obj.fit()
 did_cs_obj.bootstrap(n_rep_boot=n_rep_boot)
 
-ssm_obj = DoubleMLSSM(dml_data_ssm, ml_g=Lasso(), ml_m=LogisticRegression(), ml_pi=LogisticRegression(),
-                      n_rep=n_rep, n_folds=n_folds)
+ssm_obj = DoubleMLSSM(
+    dml_data_ssm, ml_g=Lasso(), ml_m=LogisticRegression(), ml_pi=LogisticRegression(), n_rep=n_rep, n_folds=n_folds
+)
 ssm_obj.fit()
 ssm_obj.bootstrap(n_rep_boot=n_rep_boot)
 
-apo_obj = DoubleMLAPO(dml_data_irm, Lasso(), LogisticRegression(), treatment_level=0,
-                      n_rep=n_rep, n_folds=n_folds)
+apo_obj = DoubleMLAPO(dml_data_irm, Lasso(), LogisticRegression(), treatment_level=0, n_rep=n_rep, n_folds=n_folds)
 apo_obj.fit()
 apo_obj.bootstrap(n_rep_boot=n_rep_boot)
 
 
 @pytest.mark.ci
-@pytest.mark.parametrize('dml_obj',
-                         [plr_obj, pliv_obj,  irm_obj,  iivm_obj, cvar_obj, pq_obj, lpq_obj,
-                          did_obj, did_cs_obj, ssm_obj, apo_obj])
+@pytest.mark.parametrize(
+    "dml_obj", [plr_obj, pliv_obj, irm_obj, iivm_obj, cvar_obj, pq_obj, lpq_obj, did_obj, did_cs_obj, ssm_obj, apo_obj]
+)
 def test_property_types_and_shapes(dml_obj):
     # not checked: learner, learner_names, params, params_names, score
     # already checked: summary
@@ -202,10 +200,14 @@ def test_property_types_and_shapes(dml_obj):
     assert dml_obj.boot_t_stat.shape == (n_rep_boot, n_treat, n_rep)
 
     assert isinstance(dml_obj.coef, np.ndarray)
-    assert dml_obj.coef.shape == (n_treat, )
+    assert dml_obj.coef.shape == (n_treat,)
 
     assert isinstance(dml_obj.psi, np.ndarray)
-    assert dml_obj.psi.shape == (n_obs, n_rep, n_treat, )
+    assert dml_obj.psi.shape == (
+        n_obs,
+        n_rep,
+        n_treat,
+    )
 
     assert isinstance(dml_obj.framework, DoubleMLFramework)
 
@@ -213,22 +215,34 @@ def test_property_types_and_shapes(dml_obj):
     if is_nonlinear:
         for score_element in dml_obj._score_element_names:
             assert isinstance(dml_obj.psi_elements[score_element], np.ndarray)
-            assert dml_obj.psi_elements[score_element].shape == (n_obs, n_rep, n_treat, )
+            assert dml_obj.psi_elements[score_element].shape == (
+                n_obs,
+                n_rep,
+                n_treat,
+            )
     else:
-        assert isinstance(dml_obj.psi_elements['psi_a'], np.ndarray)
-        assert dml_obj.psi_elements['psi_a'].shape == (n_obs, n_rep, n_treat, )
-
-        assert isinstance(dml_obj.psi_elements['psi_b'], np.ndarray)
-        assert dml_obj.psi_elements['psi_b'].shape == (n_obs, n_rep, n_treat, )
+        assert isinstance(dml_obj.psi_elements["psi_a"], np.ndarray)
+        assert dml_obj.psi_elements["psi_a"].shape == (
+            n_obs,
+            n_rep,
+            n_treat,
+        )
+
+        assert isinstance(dml_obj.psi_elements["psi_b"], np.ndarray)
+        assert dml_obj.psi_elements["psi_b"].shape == (
+            n_obs,
+            n_rep,
+            n_treat,
+        )
 
     assert isinstance(dml_obj.pval, np.ndarray)
-    assert dml_obj.pval.shape == (n_treat, )
+    assert dml_obj.pval.shape == (n_treat,)
 
     assert isinstance(dml_obj.se, np.ndarray)
-    assert dml_obj.se.shape == (n_treat, )
+    assert dml_obj.se.shape == (n_treat,)
 
     assert isinstance(dml_obj.t_stat, np.ndarray)
-    assert dml_obj.t_stat.shape == (n_treat, )
+    assert dml_obj.t_stat.shape == (n_treat,)
 
     assert isinstance(dml_obj._dml_data.binary_treats, pd.Series)
     assert len(dml_obj._dml_data.binary_treats) == n_treat
@@ -246,181 +260,181 @@ def test_property_types_and_shapes(dml_obj):
 
 @pytest.mark.ci
 def test_stored_models():
-    assert len(plr_obj.models['ml_l']['d']) == n_rep
-    assert len(plr_obj.models['ml_m']['d']) == n_rep
+    assert len(plr_obj.models["ml_l"]["d"]) == n_rep
+    assert len(plr_obj.models["ml_m"]["d"]) == n_rep
 
-    n_folds_each_model = np.array([len(mdl) for mdl in plr_obj.models['ml_l']['d']])
+    n_folds_each_model = np.array([len(mdl) for mdl in plr_obj.models["ml_l"]["d"]])
     assert np.all(n_folds_each_model == n_folds_each_model[0])
     assert n_folds_each_model[0] == n_folds
 
-    n_folds_each_model = np.array([len(mdl) for mdl in plr_obj.models['ml_m']['d']])
+    n_folds_each_model = np.array([len(mdl) for mdl in plr_obj.models["ml_m"]["d"]])
     assert np.all(n_folds_each_model == n_folds_each_model[0])
     assert n_folds_each_model[0] == n_folds
 
-    assert np.all([isinstance(mdl, plr_obj.learner['ml_l'].__class__) for mdl in plr_obj.models['ml_l']['d'][0]])
-    assert np.all([isinstance(mdl, plr_obj.learner['ml_m'].__class__) for mdl in plr_obj.models['ml_m']['d'][0]])
+    assert np.all([isinstance(mdl, plr_obj.learner["ml_l"].__class__) for mdl in plr_obj.models["ml_l"]["d"][0]])
+    assert np.all([isinstance(mdl, plr_obj.learner["ml_m"].__class__) for mdl in plr_obj.models["ml_m"]["d"][0]])
     # extend these tests to more models
 
 
 @pytest.mark.ci
 def test_stored_predictions():
-    assert plr_obj.predictions['ml_l'].shape == (n_obs, n_rep, n_treat)
-    assert plr_obj.predictions['ml_m'].shape == (n_obs, n_rep, n_treat)
+    assert plr_obj.predictions["ml_l"].shape == (n_obs, n_rep, n_treat)
+    assert plr_obj.predictions["ml_m"].shape == (n_obs, n_rep, n_treat)
 
-    assert pliv_obj.predictions['ml_l'].shape == (n_obs, n_rep, n_treat)
-    assert pliv_obj.predictions['ml_m'].shape == (n_obs, n_rep, n_treat)
-    assert pliv_obj.predictions['ml_r'].shape == (n_obs, n_rep, n_treat)
+    assert pliv_obj.predictions["ml_l"].shape == (n_obs, n_rep, n_treat)
+    assert pliv_obj.predictions["ml_m"].shape == (n_obs, n_rep, n_treat)
+    assert pliv_obj.predictions["ml_r"].shape == (n_obs, n_rep, n_treat)
 
-    assert irm_obj.predictions['ml_g0'].shape == (n_obs, n_rep, n_treat)
-    assert irm_obj.predictions['ml_g1'].shape == (n_obs, n_rep, n_treat)
-    assert irm_obj.predictions['ml_m'].shape == (n_obs, n_rep, n_treat)
+    assert irm_obj.predictions["ml_g0"].shape == (n_obs, n_rep, n_treat)
+    assert irm_obj.predictions["ml_g1"].shape == (n_obs, n_rep, n_treat)
+    assert irm_obj.predictions["ml_m"].shape == (n_obs, n_rep, n_treat)
 
-    assert iivm_obj.predictions['ml_g0'].shape == (n_obs, n_rep, n_treat)
-    assert iivm_obj.predictions['ml_g1'].shape == (n_obs, n_rep, n_treat)
-    assert iivm_obj.predictions['ml_m'].shape == (n_obs, n_rep, n_treat)
-    assert iivm_obj.predictions['ml_r0'].shape == (n_obs, n_rep, n_treat)
-    assert iivm_obj.predictions['ml_r1'].shape == (n_obs, n_rep, n_treat)
+    assert iivm_obj.predictions["ml_g0"].shape == (n_obs, n_rep, n_treat)
+    assert iivm_obj.predictions["ml_g1"].shape == (n_obs, n_rep, n_treat)
+    assert iivm_obj.predictions["ml_m"].shape == (n_obs, n_rep, n_treat)
+    assert iivm_obj.predictions["ml_r0"].shape == (n_obs, n_rep, n_treat)
+    assert iivm_obj.predictions["ml_r1"].shape == (n_obs, n_rep, n_treat)
 
-    assert cvar_obj.predictions['ml_g'].shape == (n_obs, n_rep, n_treat)
-    assert cvar_obj.predictions['ml_m'].shape == (n_obs, n_rep, n_treat)
+    assert cvar_obj.predictions["ml_g"].shape == (n_obs, n_rep, n_treat)
+    assert cvar_obj.predictions["ml_m"].shape == (n_obs, n_rep, n_treat)
 
-    assert pq_obj.predictions['ml_g'].shape == (n_obs, n_rep, n_treat)
-    assert pq_obj.predictions['ml_m'].shape == (n_obs, n_rep, n_treat)
+    assert pq_obj.predictions["ml_g"].shape == (n_obs, n_rep, n_treat)
+    assert pq_obj.predictions["ml_m"].shape == (n_obs, n_rep, n_treat)
 
-    assert lpq_obj.predictions['ml_g_du_z0'].shape == (n_obs, n_rep, n_treat)
-    assert lpq_obj.predictions['ml_g_du_z1'].shape == (n_obs, n_rep, n_treat)
-    assert lpq_obj.predictions['ml_m_z'].shape == (n_obs, n_rep, n_treat)
-    assert lpq_obj.predictions['ml_m_d_z0'].shape == (n_obs, n_rep, n_treat)
-    assert lpq_obj.predictions['ml_m_d_z1'].shape == (n_obs, n_rep, n_treat)
+    assert lpq_obj.predictions["ml_g_du_z0"].shape == (n_obs, n_rep, n_treat)
+    assert lpq_obj.predictions["ml_g_du_z1"].shape == (n_obs, n_rep, n_treat)
+    assert lpq_obj.predictions["ml_m_z"].shape == (n_obs, n_rep, n_treat)
+    assert lpq_obj.predictions["ml_m_d_z0"].shape == (n_obs, n_rep, n_treat)
+    assert lpq_obj.predictions["ml_m_d_z1"].shape == (n_obs, n_rep, n_treat)
 
-    assert did_obj.predictions['ml_g0'].shape == (n_obs, n_rep, n_treat)
-    assert did_obj.predictions['ml_g1'].shape == (n_obs, n_rep, n_treat)
-    assert did_obj.predictions['ml_m'].shape == (n_obs, n_rep, n_treat)
+    assert did_obj.predictions["ml_g0"].shape == (n_obs, n_rep, n_treat)
+    assert did_obj.predictions["ml_g1"].shape == (n_obs, n_rep, n_treat)
+    assert did_obj.predictions["ml_m"].shape == (n_obs, n_rep, n_treat)
 
-    assert did_cs_obj.predictions['ml_g_d0_t0'].shape == (n_obs, n_rep, n_treat)
-    assert did_cs_obj.predictions['ml_g_d0_t1'].shape == (n_obs, n_rep, n_treat)
-    assert did_cs_obj.predictions['ml_g_d1_t0'].shape == (n_obs, n_rep, n_treat)
-    assert did_cs_obj.predictions['ml_g_d1_t1'].shape == (n_obs, n_rep, n_treat)
-    assert did_cs_obj.predictions['ml_m'].shape == (n_obs, n_rep, n_treat)
+    assert did_cs_obj.predictions["ml_g_d0_t0"].shape == (n_obs, n_rep, n_treat)
+    assert did_cs_obj.predictions["ml_g_d0_t1"].shape == (n_obs, n_rep, n_treat)
+    assert did_cs_obj.predictions["ml_g_d1_t0"].shape == (n_obs, n_rep, n_treat)
+    assert did_cs_obj.predictions["ml_g_d1_t1"].shape == (n_obs, n_rep, n_treat)
+    assert did_cs_obj.predictions["ml_m"].shape == (n_obs, n_rep, n_treat)
 
-    assert ssm_obj.predictions['ml_g_d0'].shape == (n_obs, n_rep, n_treat)
-    assert ssm_obj.predictions['ml_g_d1'].shape == (n_obs, n_rep, n_treat)
-    assert ssm_obj.predictions['ml_m'].shape == (n_obs, n_rep, n_treat)
-    assert ssm_obj.predictions['ml_pi'].shape == (n_obs, n_rep, n_treat)
+    assert ssm_obj.predictions["ml_g_d0"].shape == (n_obs, n_rep, n_treat)
+    assert ssm_obj.predictions["ml_g_d1"].shape == (n_obs, n_rep, n_treat)
+    assert ssm_obj.predictions["ml_m"].shape == (n_obs, n_rep, n_treat)
+    assert ssm_obj.predictions["ml_pi"].shape == (n_obs, n_rep, n_treat)
 
-    assert apo_obj.predictions['ml_g0'].shape == (n_obs, n_rep, n_treat)
-    assert apo_obj.predictions['ml_g1'].shape == (n_obs, n_rep, n_treat)
-    assert apo_obj.predictions['ml_m'].shape == (n_obs, n_rep, n_treat)
+    assert apo_obj.predictions["ml_g0"].shape == (n_obs, n_rep, n_treat)
+    assert apo_obj.predictions["ml_g1"].shape == (n_obs, n_rep, n_treat)
+    assert apo_obj.predictions["ml_m"].shape == (n_obs, n_rep, n_treat)
 
 
 @pytest.mark.ci
 def test_stored_nuisance_targets():
-    assert plr_obj.nuisance_targets['ml_l'].shape == (n_obs, n_rep, n_treat)
-    assert plr_obj.nuisance_targets['ml_m'].shape == (n_obs, n_rep, n_treat)
+    assert plr_obj.nuisance_targets["ml_l"].shape == (n_obs, n_rep, n_treat)
+    assert plr_obj.nuisance_targets["ml_m"].shape == (n_obs, n_rep, n_treat)
 
-    assert pliv_obj.nuisance_targets['ml_l'].shape == (n_obs, n_rep, n_treat)
-    assert pliv_obj.nuisance_targets['ml_m'].shape == (n_obs, n_rep, n_treat)
-    assert pliv_obj.nuisance_targets['ml_r'].shape == (n_obs, n_rep, n_treat)
+    assert pliv_obj.nuisance_targets["ml_l"].shape == (n_obs, n_rep, n_treat)
+    assert pliv_obj.nuisance_targets["ml_m"].shape == (n_obs, n_rep, n_treat)
+    assert pliv_obj.nuisance_targets["ml_r"].shape == (n_obs, n_rep, n_treat)
 
-    assert irm_obj.nuisance_targets['ml_g0'].shape == (n_obs, n_rep, n_treat)
-    assert irm_obj.nuisance_targets['ml_g1'].shape == (n_obs, n_rep, n_treat)
-    assert irm_obj.nuisance_targets['ml_m'].shape == (n_obs, n_rep, n_treat)
+    assert irm_obj.nuisance_targets["ml_g0"].shape == (n_obs, n_rep, n_treat)
+    assert irm_obj.nuisance_targets["ml_g1"].shape == (n_obs, n_rep, n_treat)
+    assert irm_obj.nuisance_targets["ml_m"].shape == (n_obs, n_rep, n_treat)
 
-    assert iivm_obj.nuisance_targets['ml_g0'].shape == (n_obs, n_rep, n_treat)
-    assert iivm_obj.nuisance_targets['ml_g1'].shape == (n_obs, n_rep, n_treat)
-    assert iivm_obj.nuisance_targets['ml_m'].shape == (n_obs, n_rep, n_treat)
-    assert iivm_obj.nuisance_targets['ml_r0'].shape == (n_obs, n_rep, n_treat)
-    assert iivm_obj.nuisance_targets['ml_r1'].shape == (n_obs, n_rep, n_treat)
+    assert iivm_obj.nuisance_targets["ml_g0"].shape == (n_obs, n_rep, n_treat)
+    assert iivm_obj.nuisance_targets["ml_g1"].shape == (n_obs, n_rep, n_treat)
+    assert iivm_obj.nuisance_targets["ml_m"].shape == (n_obs, n_rep, n_treat)
+    assert iivm_obj.nuisance_targets["ml_r0"].shape == (n_obs, n_rep, n_treat)
+    assert iivm_obj.nuisance_targets["ml_r1"].shape == (n_obs, n_rep, n_treat)
 
-    assert cvar_obj.nuisance_targets['ml_g'].shape == (n_obs, n_rep, n_treat)
-    assert cvar_obj.nuisance_targets['ml_m'].shape == (n_obs, n_rep, n_treat)
+    assert cvar_obj.nuisance_targets["ml_g"].shape == (n_obs, n_rep, n_treat)
+    assert cvar_obj.nuisance_targets["ml_m"].shape == (n_obs, n_rep, n_treat)
 
-    assert pq_obj.nuisance_targets['ml_g'].shape == (n_obs, n_rep, n_treat)
-    assert pq_obj.nuisance_targets['ml_m'].shape == (n_obs, n_rep, n_treat)
+    assert pq_obj.nuisance_targets["ml_g"].shape == (n_obs, n_rep, n_treat)
+    assert pq_obj.nuisance_targets["ml_m"].shape == (n_obs, n_rep, n_treat)
 
-    assert lpq_obj.nuisance_targets['ml_g_du_z0'].shape == (n_obs, n_rep, n_treat)
-    assert lpq_obj.nuisance_targets['ml_g_du_z1'].shape == (n_obs, n_rep, n_treat)
-    assert lpq_obj.nuisance_targets['ml_m_z'].shape == (n_obs, n_rep, n_treat)
-    assert lpq_obj.nuisance_targets['ml_m_d_z0'].shape == (n_obs, n_rep, n_treat)
-    assert lpq_obj.nuisance_targets['ml_m_d_z1'].shape == (n_obs, n_rep, n_treat)
+    assert lpq_obj.nuisance_targets["ml_g_du_z0"].shape == (n_obs, n_rep, n_treat)
+    assert lpq_obj.nuisance_targets["ml_g_du_z1"].shape == (n_obs, n_rep, n_treat)
+    assert lpq_obj.nuisance_targets["ml_m_z"].shape == (n_obs, n_rep, n_treat)
+    assert lpq_obj.nuisance_targets["ml_m_d_z0"].shape == (n_obs, n_rep, n_treat)
+    assert lpq_obj.nuisance_targets["ml_m_d_z1"].shape == (n_obs, n_rep, n_treat)
 
-    assert did_obj.nuisance_targets['ml_g0'].shape == (n_obs, n_rep, n_treat)
-    assert did_obj.nuisance_targets['ml_g1'].shape == (n_obs, n_rep, n_treat)
-    assert did_obj.nuisance_targets['ml_m'].shape == (n_obs, n_rep, n_treat)
+    assert did_obj.nuisance_targets["ml_g0"].shape == (n_obs, n_rep, n_treat)
+    assert did_obj.nuisance_targets["ml_g1"].shape == (n_obs, n_rep, n_treat)
+    assert did_obj.nuisance_targets["ml_m"].shape == (n_obs, n_rep, n_treat)
 
-    assert did_cs_obj.nuisance_targets['ml_g_d0_t0'].shape == (n_obs, n_rep, n_treat)
-    assert did_cs_obj.nuisance_targets['ml_g_d0_t1'].shape == (n_obs, n_rep, n_treat)
-    assert did_cs_obj.nuisance_targets['ml_g_d1_t0'].shape == (n_obs, n_rep, n_treat)
-    assert did_cs_obj.nuisance_targets['ml_g_d1_t1'].shape == (n_obs, n_rep, n_treat)
-    assert did_cs_obj.nuisance_targets['ml_m'].shape == (n_obs, n_rep, n_treat)
+    assert did_cs_obj.nuisance_targets["ml_g_d0_t0"].shape == (n_obs, n_rep, n_treat)
+    assert did_cs_obj.nuisance_targets["ml_g_d0_t1"].shape == (n_obs, n_rep, n_treat)
+    assert did_cs_obj.nuisance_targets["ml_g_d1_t0"].shape == (n_obs, n_rep, n_treat)
+    assert did_cs_obj.nuisance_targets["ml_g_d1_t1"].shape == (n_obs, n_rep, n_treat)
+    assert did_cs_obj.nuisance_targets["ml_m"].shape == (n_obs, n_rep, n_treat)
 
-    assert ssm_obj.nuisance_targets['ml_g_d0'].shape == (n_obs, n_rep, n_treat)
-    assert ssm_obj.nuisance_targets['ml_g_d1'].shape == (n_obs, n_rep, n_treat)
-    assert ssm_obj.nuisance_targets['ml_m'].shape == (n_obs, n_rep, n_treat)
-    assert ssm_obj.nuisance_targets['ml_pi'].shape == (n_obs, n_rep, n_treat)
+    assert ssm_obj.nuisance_targets["ml_g_d0"].shape == (n_obs, n_rep, n_treat)
+    assert ssm_obj.nuisance_targets["ml_g_d1"].shape == (n_obs, n_rep, n_treat)
+    assert ssm_obj.nuisance_targets["ml_m"].shape == (n_obs, n_rep, n_treat)
+    assert ssm_obj.nuisance_targets["ml_pi"].shape == (n_obs, n_rep, n_treat)
 
-    assert apo_obj.nuisance_targets['ml_g0'].shape == (n_obs, n_rep, n_treat)
-    assert apo_obj.nuisance_targets['ml_g1'].shape == (n_obs, n_rep, n_treat)
-    assert apo_obj.nuisance_targets['ml_m'].shape == (n_obs, n_rep, n_treat)
+    assert apo_obj.nuisance_targets["ml_g0"].shape == (n_obs, n_rep, n_treat)
+    assert apo_obj.nuisance_targets["ml_g1"].shape == (n_obs, n_rep, n_treat)
+    assert apo_obj.nuisance_targets["ml_m"].shape == (n_obs, n_rep, n_treat)
 
 
 @pytest.mark.ci
 def test_nuisance_loss():
-    assert plr_obj.nuisance_loss['ml_l'].shape == (n_rep, n_treat)
-    assert plr_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat)
+    assert plr_obj.nuisance_loss["ml_l"].shape == (n_rep, n_treat)
+    assert plr_obj.nuisance_loss["ml_m"].shape == (n_rep, n_treat)
 
-    assert pliv_obj.nuisance_loss['ml_l'].shape == (n_rep, n_treat)
-    assert pliv_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat)
-    assert pliv_obj.nuisance_loss['ml_r'].shape == (n_rep, n_treat)
+    assert pliv_obj.nuisance_loss["ml_l"].shape == (n_rep, n_treat)
+    assert pliv_obj.nuisance_loss["ml_m"].shape == (n_rep, n_treat)
+    assert pliv_obj.nuisance_loss["ml_r"].shape == (n_rep, n_treat)
 
-    assert irm_obj.nuisance_loss['ml_g0'].shape == (n_rep, n_treat)
-    assert irm_obj.nuisance_loss['ml_g1'].shape == (n_rep, n_treat)
-    assert irm_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat)
+    assert irm_obj.nuisance_loss["ml_g0"].shape == (n_rep, n_treat)
+    assert irm_obj.nuisance_loss["ml_g1"].shape == (n_rep, n_treat)
+    assert irm_obj.nuisance_loss["ml_m"].shape == (n_rep, n_treat)
 
-    assert iivm_obj.nuisance_loss['ml_g0'].shape == (n_rep, n_treat)
-    assert iivm_obj.nuisance_loss['ml_g1'].shape == (n_rep, n_treat)
-    assert iivm_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat)
-    assert iivm_obj.nuisance_loss['ml_r0'].shape == (n_rep, n_treat)
-    assert iivm_obj.nuisance_loss['ml_r1'].shape == (n_rep, n_treat)
+    assert iivm_obj.nuisance_loss["ml_g0"].shape == (n_rep, n_treat)
+    assert iivm_obj.nuisance_loss["ml_g1"].shape == (n_rep, n_treat)
+    assert iivm_obj.nuisance_loss["ml_m"].shape == (n_rep, n_treat)
+    assert iivm_obj.nuisance_loss["ml_r0"].shape == (n_rep, n_treat)
+    assert iivm_obj.nuisance_loss["ml_r1"].shape == (n_rep, n_treat)
 
-    assert cvar_obj.nuisance_loss['ml_g'].shape == (n_rep, n_treat)
-    assert cvar_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat)
+    assert cvar_obj.nuisance_loss["ml_g"].shape == (n_rep, n_treat)
+    assert cvar_obj.nuisance_loss["ml_m"].shape == (n_rep, n_treat)
 
-    assert pq_obj.nuisance_loss['ml_g'].shape == (n_rep, n_treat)
-    assert pq_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat)
+    assert pq_obj.nuisance_loss["ml_g"].shape == (n_rep, n_treat)
+    assert pq_obj.nuisance_loss["ml_m"].shape == (n_rep, n_treat)
 
-    assert lpq_obj.nuisance_loss['ml_g_du_z0'].shape == (n_rep, n_treat)
-    assert lpq_obj.nuisance_loss['ml_g_du_z1'].shape == (n_rep, n_treat)
-    assert lpq_obj.nuisance_loss['ml_m_z'].shape == (n_rep, n_treat)
-    assert lpq_obj.nuisance_loss['ml_m_d_z0'].shape == (n_rep, n_treat)
-    assert lpq_obj.nuisance_loss['ml_m_d_z1'].shape == (n_rep, n_treat)
+    assert lpq_obj.nuisance_loss["ml_g_du_z0"].shape == (n_rep, n_treat)
+    assert lpq_obj.nuisance_loss["ml_g_du_z1"].shape == (n_rep, n_treat)
+    assert lpq_obj.nuisance_loss["ml_m_z"].shape == (n_rep, n_treat)
+    assert lpq_obj.nuisance_loss["ml_m_d_z0"].shape == (n_rep, n_treat)
+    assert lpq_obj.nuisance_loss["ml_m_d_z1"].shape == (n_rep, n_treat)
 
-    assert did_obj.nuisance_loss['ml_g0'].shape == (n_rep, n_treat)
-    assert did_obj.nuisance_loss['ml_g1'].shape == (n_rep, n_treat)
-    assert did_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat)
+    assert did_obj.nuisance_loss["ml_g0"].shape == (n_rep, n_treat)
+    assert did_obj.nuisance_loss["ml_g1"].shape == (n_rep, n_treat)
+    assert did_obj.nuisance_loss["ml_m"].shape == (n_rep, n_treat)
 
-    assert did_cs_obj.nuisance_loss['ml_g_d0_t0'].shape == (n_rep, n_treat)
-    assert did_cs_obj.nuisance_loss['ml_g_d0_t1'].shape == (n_rep, n_treat)
-    assert did_cs_obj.nuisance_loss['ml_g_d1_t0'].shape == (n_rep, n_treat)
-    assert did_cs_obj.nuisance_loss['ml_g_d1_t1'].shape == (n_rep, n_treat)
-    assert did_cs_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat)
+    assert did_cs_obj.nuisance_loss["ml_g_d0_t0"].shape == (n_rep, n_treat)
+    assert did_cs_obj.nuisance_loss["ml_g_d0_t1"].shape == (n_rep, n_treat)
+    assert did_cs_obj.nuisance_loss["ml_g_d1_t0"].shape == (n_rep, n_treat)
+    assert did_cs_obj.nuisance_loss["ml_g_d1_t1"].shape == (n_rep, n_treat)
+    assert did_cs_obj.nuisance_loss["ml_m"].shape == (n_rep, n_treat)
 
-    assert ssm_obj.nuisance_loss['ml_g_d0'].shape == (n_rep, n_treat)
-    assert ssm_obj.nuisance_loss['ml_g_d1'].shape == (n_rep, n_treat)
-    assert ssm_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat)
-    assert ssm_obj.nuisance_loss['ml_pi'].shape == (n_rep, n_treat)
+    assert ssm_obj.nuisance_loss["ml_g_d0"].shape == (n_rep, n_treat)
+    assert ssm_obj.nuisance_loss["ml_g_d1"].shape == (n_rep, n_treat)
+    assert ssm_obj.nuisance_loss["ml_m"].shape == (n_rep, n_treat)
+    assert ssm_obj.nuisance_loss["ml_pi"].shape == (n_rep, n_treat)
 
-    assert apo_obj.nuisance_loss['ml_g0'].shape == (n_rep, n_treat)
-    assert apo_obj.nuisance_loss['ml_g1'].shape == (n_rep, n_treat)
-    assert apo_obj.nuisance_loss['ml_m'].shape == (n_rep, n_treat)
+    assert apo_obj.nuisance_loss["ml_g0"].shape == (n_rep, n_treat)
+    assert apo_obj.nuisance_loss["ml_g1"].shape == (n_rep, n_treat)
+    assert apo_obj.nuisance_loss["ml_m"].shape == (n_rep, n_treat)
 
 
 def _test_sensitivity_return_types(dml_obj, n_rep, n_treat, benchmarking_set):
     assert isinstance(dml_obj.sensitivity_elements, dict)
-    for key in ['sigma2', 'nu2']:
+    for key in ["sigma2", "nu2"]:
         assert isinstance(dml_obj.sensitivity_elements[key], np.ndarray)
         assert dml_obj.sensitivity_elements[key].shape == (1, n_rep, n_treat)
-    for key in ['psi_sigma2', 'psi_nu2', 'riesz_rep']:
+    for key in ["psi_sigma2", "psi_nu2", "riesz_rep"]:
         assert isinstance(dml_obj.sensitivity_elements[key], np.ndarray)
         assert dml_obj.sensitivity_elements[key].shape == (n_obs, n_rep, n_treat)
 
@@ -428,13 +442,12 @@ def _test_sensitivity_return_types(dml_obj, n_rep, n_treat, benchmarking_set):
     dml_obj.sensitivity_analysis()
     assert isinstance(dml_obj.sensitivity_summary, str)
     assert isinstance(dml_obj.sensitivity_plot(), plotly.graph_objs._figure.Figure)
-    benchmarks = {'cf_y': [0.1, 0.2], 'cf_d': [0.15, 0.2], 'name': ["test1", "test2"]}
-    assert isinstance(dml_obj.sensitivity_plot(value='ci', benchmarks=benchmarks), plotly.graph_objs._figure.Figure)
+    benchmarks = {"cf_y": [0.1, 0.2], "cf_d": [0.15, 0.2], "name": ["test1", "test2"]}
+    assert isinstance(dml_obj.sensitivity_plot(value="ci", benchmarks=benchmarks), plotly.graph_objs._figure.Figure)
 
     assert isinstance(dml_obj.framework._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95), dict)
     assert isinstance(
-        dml_obj.framework._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0),
-        tuple
+        dml_obj.framework._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0), tuple
     )
     benchmark = dml_obj.sensitivity_benchmark(benchmarking_set=benchmarking_set)
     assert isinstance(benchmark, pd.DataFrame)
@@ -444,7 +457,6 @@ def _test_sensitivity_return_types(dml_obj, n_rep, n_treat, benchmarking_set):
 
 @pytest.mark.ci
 def test_sensitivity():
-
     # PLR
     _test_sensitivity_return_types(plr_obj, n_rep, n_treat, benchmarking_set=["X1"])
 
diff --git a/doubleml/tests/test_scores.py b/doubleml/tests/test_scores.py
index 5b5c68edb..c3281702d 100644
--- a/doubleml/tests/test_scores.py
+++ b/doubleml/tests/test_scores.py
@@ -1,11 +1,10 @@
-import pytest
 import numpy as np
-
-from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV
-from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data
-
+import pytest
 from sklearn.linear_model import Lasso, LogisticRegression
 
+from doubleml import DoubleMLIIVM, DoubleMLIRM, DoubleMLPLIV, DoubleMLPLR
+from doubleml.datasets import make_iivm_data, make_irm_data, make_pliv_CHS2015, make_plr_CCDDHNR2018
+
 np.random.seed(3141)
 dml_data_plr = make_plr_CCDDHNR2018(n_obs=100)
 dml_data_pliv = make_pliv_CHS2015(n_obs=100, dim_z=1)
@@ -14,7 +13,7 @@
 
 dml_plr = DoubleMLPLR(dml_data_plr, Lasso(), Lasso())
 dml_plr.fit()
-dml_plr_iv_type = DoubleMLPLR(dml_data_plr, Lasso(), Lasso(), Lasso(), score='IV-type')
+dml_plr_iv_type = DoubleMLPLR(dml_data_plr, Lasso(), Lasso(), Lasso(), score="IV-type")
 dml_plr_iv_type.fit()
 dml_pliv = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso())
 dml_pliv.fit()
@@ -25,26 +24,34 @@
 
 # fit models with callable scores
 plr_score = dml_plr._score_elements
-dml_plr_callable_score = DoubleMLPLR(dml_data_plr, Lasso(), Lasso(),
-                                     score=plr_score, draw_sample_splitting=False)
+dml_plr_callable_score = DoubleMLPLR(dml_data_plr, Lasso(), Lasso(), score=plr_score, draw_sample_splitting=False)
 dml_plr_callable_score.set_sample_splitting(dml_plr.smpls)
 dml_plr_callable_score.fit(store_predictions=True)
 
 plr_iv_type_score = dml_plr_iv_type._score_elements
-dml_plr_iv_type_callable_score = DoubleMLPLR(dml_data_plr, Lasso(), Lasso(), Lasso(),
-                                             score=plr_iv_type_score, draw_sample_splitting=False)
+dml_plr_iv_type_callable_score = DoubleMLPLR(
+    dml_data_plr, Lasso(), Lasso(), Lasso(), score=plr_iv_type_score, draw_sample_splitting=False
+)
 dml_plr_iv_type_callable_score.set_sample_splitting(dml_plr_iv_type.smpls)
 dml_plr_iv_type_callable_score.fit(store_predictions=True)
 
 irm_score = dml_irm._score_elements
-dml_irm_callable_score = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                                     score=irm_score, draw_sample_splitting=False, normalize_ipw=False)
+dml_irm_callable_score = DoubleMLIRM(
+    dml_data_irm, Lasso(), LogisticRegression(), score=irm_score, draw_sample_splitting=False, normalize_ipw=False
+)
 dml_irm_callable_score.set_sample_splitting(dml_irm.smpls)
 dml_irm_callable_score.fit(store_predictions=True)
 
 iivm_score = dml_iivm._score_elements
-dml_iivm_callable_score = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(),
-                                       score=iivm_score, draw_sample_splitting=False, normalize_ipw=False)
+dml_iivm_callable_score = DoubleMLIIVM(
+    dml_data_iivm,
+    Lasso(),
+    LogisticRegression(),
+    LogisticRegression(),
+    score=iivm_score,
+    draw_sample_splitting=False,
+    normalize_ipw=False,
+)
 dml_iivm_callable_score.set_sample_splitting(dml_iivm.smpls)
 dml_iivm_callable_score.fit(store_predictions=True)
 
@@ -68,181 +75,135 @@ def non_orth_score_w_l(y, d, l_hat, m_hat, g_hat, smpls):
     return psi_a, psi_b
 
 
-dml_plr_non_orth_score_w_g = DoubleMLPLR(dml_data_plr, Lasso(), Lasso(), Lasso(),
-                                         score=non_orth_score_w_g)
+dml_plr_non_orth_score_w_g = DoubleMLPLR(dml_data_plr, Lasso(), Lasso(), Lasso(), score=non_orth_score_w_g)
 dml_plr_non_orth_score_w_g.fit(store_predictions=True)
 
-dml_plr_non_orth_score_w_l = DoubleMLPLR(dml_data_plr, Lasso(), Lasso(),
-                                         score=non_orth_score_w_l)
+dml_plr_non_orth_score_w_l = DoubleMLPLR(dml_data_plr, Lasso(), Lasso(), score=non_orth_score_w_l)
 dml_plr_non_orth_score_w_l.fit(store_predictions=True)
 
 
 @pytest.mark.ci
-@pytest.mark.parametrize('dml_obj',
-                         [dml_plr, dml_plr_iv_type, dml_pliv, dml_irm, dml_iivm])
+@pytest.mark.parametrize("dml_obj", [dml_plr, dml_plr_iv_type, dml_pliv, dml_irm, dml_iivm])
 def test_linear_score(dml_obj):
-    assert np.allclose(dml_obj.psi,
-                       dml_obj.psi_elements['psi_a'] * dml_obj.coef + dml_obj.psi_elements['psi_b'],
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(
+        dml_obj.psi, dml_obj.psi_elements["psi_a"] * dml_obj.coef + dml_obj.psi_elements["psi_b"], rtol=1e-9, atol=1e-4
+    )
 
 
 @pytest.mark.ci
 def test_plr_callable_vs_str_score():
-    assert np.allclose(dml_plr.psi,
-                       dml_plr_callable_score.psi,
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_plr.coef,
-                       dml_plr_callable_score.coef,
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_plr.psi, dml_plr_callable_score.psi, rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_plr.coef, dml_plr_callable_score.coef, rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_plr_callable_vs_pred_export():
     preds = dml_plr_callable_score.predictions
-    l_hat = preds['ml_l'].squeeze()
-    m_hat = preds['ml_m'].squeeze()
+    l_hat = preds["ml_l"].squeeze()
+    m_hat = preds["ml_m"].squeeze()
     g_hat = None
-    psi_a, psi_b = plr_score(dml_data_plr.y, dml_data_plr.d,
-                             l_hat, m_hat, g_hat,
-                             dml_plr_callable_score.smpls[0])
-    assert np.allclose(dml_plr.psi_elements['psi_a'].squeeze(),
-                       psi_a,
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_plr.psi_elements['psi_b'].squeeze(),
-                       psi_b,
-                       rtol=1e-9, atol=1e-4)
+    psi_a, psi_b = plr_score(dml_data_plr.y, dml_data_plr.d, l_hat, m_hat, g_hat, dml_plr_callable_score.smpls[0])
+    assert np.allclose(dml_plr.psi_elements["psi_a"].squeeze(), psi_a, rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_plr.psi_elements["psi_b"].squeeze(), psi_b, rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_plr_iv_type_callable_vs_str_score():
-    assert np.allclose(dml_plr_iv_type.psi,
-                       dml_plr_iv_type_callable_score.psi,
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_plr_iv_type.coef,
-                       dml_plr_iv_type_callable_score.coef,
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_plr_iv_type.psi, dml_plr_iv_type_callable_score.psi, rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_plr_iv_type.coef, dml_plr_iv_type_callable_score.coef, rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_plr_iv_type_callable_vs_pred_export():
     preds = dml_plr_iv_type_callable_score.predictions
-    l_hat = preds['ml_l'].squeeze()
-    m_hat = preds['ml_m'].squeeze()
-    g_hat = preds['ml_g'].squeeze()
-    psi_a, psi_b = plr_iv_type_score(dml_data_plr.y, dml_data_plr.d,
-                                     l_hat, m_hat, g_hat,
-                                     dml_plr_iv_type_callable_score.smpls[0])
-    assert np.allclose(dml_plr_iv_type.psi_elements['psi_a'].squeeze(),
-                       psi_a,
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_plr_iv_type.psi_elements['psi_b'].squeeze(),
-                       psi_b,
-                       rtol=1e-9, atol=1e-4)
+    l_hat = preds["ml_l"].squeeze()
+    m_hat = preds["ml_m"].squeeze()
+    g_hat = preds["ml_g"].squeeze()
+    psi_a, psi_b = plr_iv_type_score(
+        dml_data_plr.y, dml_data_plr.d, l_hat, m_hat, g_hat, dml_plr_iv_type_callable_score.smpls[0]
+    )
+    assert np.allclose(dml_plr_iv_type.psi_elements["psi_a"].squeeze(), psi_a, rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_plr_iv_type.psi_elements["psi_b"].squeeze(), psi_b, rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_plr_non_orth_score_w_g_callable_vs_pred_export():
     preds = dml_plr_non_orth_score_w_g.predictions
-    l_hat = preds['ml_l'].squeeze()
-    m_hat = preds['ml_m'].squeeze()
-    g_hat = preds['ml_g'].squeeze()
-    psi_a, psi_b = non_orth_score_w_g(dml_data_plr.y, dml_data_plr.d,
-                                      l_hat, m_hat, g_hat,
-                                      dml_plr_non_orth_score_w_g.smpls[0])
-    assert np.allclose(dml_plr_non_orth_score_w_g.psi_elements['psi_a'].squeeze(),
-                       psi_a,
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_plr_non_orth_score_w_g.psi_elements['psi_b'].squeeze(),
-                       psi_b,
-                       rtol=1e-9, atol=1e-4)
+    l_hat = preds["ml_l"].squeeze()
+    m_hat = preds["ml_m"].squeeze()
+    g_hat = preds["ml_g"].squeeze()
+    psi_a, psi_b = non_orth_score_w_g(dml_data_plr.y, dml_data_plr.d, l_hat, m_hat, g_hat, dml_plr_non_orth_score_w_g.smpls[0])
+    assert np.allclose(dml_plr_non_orth_score_w_g.psi_elements["psi_a"].squeeze(), psi_a, rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_plr_non_orth_score_w_g.psi_elements["psi_b"].squeeze(), psi_b, rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_plr_non_orth_score_w_l_callable_vs_pred_export():
     preds = dml_plr_non_orth_score_w_l.predictions
-    l_hat = preds['ml_l'].squeeze()
-    m_hat = preds['ml_m'].squeeze()
+    l_hat = preds["ml_l"].squeeze()
+    m_hat = preds["ml_m"].squeeze()
     g_hat = None
-    psi_a, psi_b = non_orth_score_w_l(dml_data_plr.y, dml_data_plr.d,
-                                      l_hat, m_hat, g_hat,
-                                      dml_plr_non_orth_score_w_l.smpls[0])
-    assert np.allclose(dml_plr_non_orth_score_w_l.psi_elements['psi_a'].squeeze(),
-                       psi_a,
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_plr_non_orth_score_w_l.psi_elements['psi_b'].squeeze(),
-                       psi_b,
-                       rtol=1e-9, atol=1e-4)
+    psi_a, psi_b = non_orth_score_w_l(dml_data_plr.y, dml_data_plr.d, l_hat, m_hat, g_hat, dml_plr_non_orth_score_w_l.smpls[0])
+    assert np.allclose(dml_plr_non_orth_score_w_l.psi_elements["psi_a"].squeeze(), psi_a, rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_plr_non_orth_score_w_l.psi_elements["psi_b"].squeeze(), psi_b, rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_irm_callable_vs_str_score():
-    assert np.allclose(dml_irm.psi,
-                       dml_irm_callable_score.psi,
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_irm.coef,
-                       dml_irm_callable_score.coef,
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_irm.psi, dml_irm_callable_score.psi, rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_irm.coef, dml_irm_callable_score.coef, rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_irm_callable_vs_pred_export():
     preds = dml_irm_callable_score.predictions
-    g_hat0 = preds['ml_g0'].squeeze()
-    g_hat1 = preds['ml_g1'].squeeze()
-    m_hat = preds['ml_m'].squeeze()
-    psi_a, psi_b = irm_score(dml_data_irm.y, dml_data_irm.d,
-                             g_hat0, g_hat1, m_hat,
-                             dml_irm_callable_score.smpls[0])
-    assert np.allclose(dml_irm.psi_elements['psi_a'].squeeze(),
-                       psi_a,
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_irm.psi_elements['psi_b'].squeeze(),
-                       psi_b,
-                       rtol=1e-9, atol=1e-4)
+    g_hat0 = preds["ml_g0"].squeeze()
+    g_hat1 = preds["ml_g1"].squeeze()
+    m_hat = preds["ml_m"].squeeze()
+    psi_a, psi_b = irm_score(dml_data_irm.y, dml_data_irm.d, g_hat0, g_hat1, m_hat, dml_irm_callable_score.smpls[0])
+    assert np.allclose(dml_irm.psi_elements["psi_a"].squeeze(), psi_a, rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_irm.psi_elements["psi_b"].squeeze(), psi_b, rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_iivm_callable_vs_str_score():
-    assert np.allclose(dml_iivm.psi,
-                       dml_iivm_callable_score.psi,
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_iivm.coef,
-                       dml_iivm_callable_score.coef,
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_iivm.psi, dml_iivm_callable_score.psi, rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_iivm.coef, dml_iivm_callable_score.coef, rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_iivm_callable_vs_pred_export():
     preds = dml_iivm_callable_score.predictions
-    g_hat0 = preds['ml_g0'].squeeze()
-    g_hat1 = preds['ml_g1'].squeeze()
-    m_hat = preds['ml_m'].squeeze()
-    r_hat0 = preds['ml_r0'].squeeze()
-    r_hat1 = preds['ml_r1'].squeeze()
-    psi_a, psi_b = iivm_score(dml_data_iivm.y, dml_data_iivm.z.squeeze(), dml_data_iivm.d,
-                              g_hat0, g_hat1, m_hat, r_hat0, r_hat1,
-                              dml_iivm_callable_score.smpls[0])
-    assert np.allclose(dml_iivm.psi_elements['psi_a'].squeeze(),
-                       psi_a,
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_iivm.psi_elements['psi_b'].squeeze(),
-                       psi_b,
-                       rtol=1e-9, atol=1e-4)
+    g_hat0 = preds["ml_g0"].squeeze()
+    g_hat1 = preds["ml_g1"].squeeze()
+    m_hat = preds["ml_m"].squeeze()
+    r_hat0 = preds["ml_r0"].squeeze()
+    r_hat1 = preds["ml_r1"].squeeze()
+    psi_a, psi_b = iivm_score(
+        dml_data_iivm.y,
+        dml_data_iivm.z.squeeze(),
+        dml_data_iivm.d,
+        g_hat0,
+        g_hat1,
+        m_hat,
+        r_hat0,
+        r_hat1,
+        dml_iivm_callable_score.smpls[0],
+    )
+    assert np.allclose(dml_iivm.psi_elements["psi_a"].squeeze(), psi_a, rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_iivm.psi_elements["psi_b"].squeeze(), psi_b, rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_pliv_callable_vs_str_score():
     pliv_score = dml_pliv._score_elements
-    dml_pliv_callable_score = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso(),
-                                           score=pliv_score, draw_sample_splitting=False)
+    dml_pliv_callable_score = DoubleMLPLIV(
+        dml_data_pliv, Lasso(), Lasso(), Lasso(), score=pliv_score, draw_sample_splitting=False
+    )
     dml_pliv_callable_score.set_sample_splitting(dml_pliv.smpls)
     dml_pliv_callable_score.fit()
-    assert np.allclose(dml_pliv.psi,
-                       dml_pliv_callable_score.psi,
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_pliv.coef,
-                       dml_pliv_callable_score.coef,
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_pliv.psi, dml_pliv_callable_score.psi, rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_pliv.coef, dml_pliv_callable_score.coef, rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
@@ -251,20 +212,17 @@ def test_pliv_callable_not_implemented():
     dml_data_pliv_2z = make_pliv_CHS2015(n_obs=100, dim_z=2)
     pliv_score = dml_pliv._score_elements
 
-    dml_pliv_callable_score = DoubleMLPLIV._partialX(dml_data_pliv_2z, Lasso(), Lasso(), Lasso(),
-                                                     score=pliv_score)
-    msg = 'Callable score not implemented for DoubleMLPLIV.partialX with several instruments.'
+    dml_pliv_callable_score = DoubleMLPLIV._partialX(dml_data_pliv_2z, Lasso(), Lasso(), Lasso(), score=pliv_score)
+    msg = "Callable score not implemented for DoubleMLPLIV.partialX with several instruments."
     with pytest.raises(NotImplementedError, match=msg):
         dml_pliv_callable_score.fit()
 
-    dml_pliv_callable_score = DoubleMLPLIV._partialZ(dml_data_pliv_2z, Lasso(),
-                                                     score=pliv_score)
-    msg = 'Callable score not implemented for DoubleMLPLIV.partialZ.'
+    dml_pliv_callable_score = DoubleMLPLIV._partialZ(dml_data_pliv_2z, Lasso(), score=pliv_score)
+    msg = "Callable score not implemented for DoubleMLPLIV.partialZ."
     with pytest.raises(NotImplementedError, match=msg):
         dml_pliv_callable_score.fit()
 
-    dml_pliv_callable_score = DoubleMLPLIV._partialXZ(dml_data_pliv_2z, Lasso(), Lasso(), Lasso(),
-                                                      score=pliv_score)
-    msg = 'Callable score not implemented for DoubleMLPLIV.partialXZ.'
+    dml_pliv_callable_score = DoubleMLPLIV._partialXZ(dml_data_pliv_2z, Lasso(), Lasso(), Lasso(), score=pliv_score)
+    msg = "Callable score not implemented for DoubleMLPLIV.partialXZ."
     with pytest.raises(NotImplementedError, match=msg):
         dml_pliv_callable_score.fit()
diff --git a/doubleml/tests/test_sensitivity.py b/doubleml/tests/test_sensitivity.py
index 9c9ca9f36..70001dfb2 100644
--- a/doubleml/tests/test_sensitivity.py
+++ b/doubleml/tests/test_sensitivity.py
@@ -1,14 +1,14 @@
-import pytest
-import numpy as np
 import copy
 
+import numpy as np
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
 import doubleml as dml
-from doubleml import DoubleMLIRM, DoubleMLData
+from doubleml import DoubleMLData, DoubleMLIRM
 from doubleml.datasets import make_irm_data
-from sklearn.linear_model import LinearRegression, LogisticRegression
 
-from ._utils_doubleml_sensitivity_manual import doubleml_sensitivity_manual, \
-    doubleml_sensitivity_benchmark_manual
+from ._utils_doubleml_sensitivity_manual import doubleml_sensitivity_benchmark_manual, doubleml_sensitivity_manual
 
 
 @pytest.fixture(scope="module", params=[["X1"], ["X2"], ["X3"]])
@@ -16,97 +16,89 @@ def benchmarking_set(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 3])
+@pytest.fixture(scope="module", params=[1, 3])
 def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.03, 0.3])
+@pytest.fixture(scope="module", params=[0.03, 0.3])
 def cf_y(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.03, 0.3])
+@pytest.fixture(scope="module", params=[0.03, 0.3])
 def cf_d(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[-0.5, 0.0, 1.0])
+@pytest.fixture(scope="module", params=[-0.5, 0.0, 1.0])
 def rho(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.8, 0.95])
+@pytest.fixture(scope="module", params=[0.8, 0.95])
 def level(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
 def dml_sensitivity_multitreat_fixture(generate_data_bivariate, n_rep, cf_y, cf_d, rho, level):
-
     # collect data
     data = generate_data_bivariate
-    x_cols = data.columns[data.columns.str.startswith('X')].tolist()
-    d_cols = data.columns[data.columns.str.startswith('d')].tolist()
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
+    d_cols = data.columns[data.columns.str.startswith("d")].tolist()
 
     # Set machine learning methods for m & g
     ml_l = LinearRegression()
     ml_m = LinearRegression()
 
     np.random.seed(3141)
-    obj_dml_data = dml.DoubleMLData(data, 'y', d_cols, x_cols)
-    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
-                                  ml_l,
-                                  ml_m,
-                                  n_folds=5,
-                                  n_rep=n_rep,
-                                  score='partialling out')
+    obj_dml_data = dml.DoubleMLData(data, "y", d_cols, x_cols)
+    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m, n_folds=5, n_rep=n_rep, score="partialling out")
 
     dml_plr_obj.fit()
     dml_plr_obj.sensitivity_analysis(cf_y=cf_y, cf_d=cf_d, rho=rho, level=level, null_hypothesis=0.0)
-    res_manual = doubleml_sensitivity_manual(sensitivity_elements=dml_plr_obj.sensitivity_elements,
-                                             all_coefs=dml_plr_obj.all_coef,
-                                             psi=dml_plr_obj.psi,
-                                             psi_deriv=dml_plr_obj.psi_deriv,
-                                             cf_y=cf_y,
-                                             cf_d=cf_d,
-                                             rho=rho,
-                                             level=level)
+    res_manual = doubleml_sensitivity_manual(
+        sensitivity_elements=dml_plr_obj.sensitivity_elements,
+        all_coefs=dml_plr_obj.all_coef,
+        psi=dml_plr_obj.psi,
+        psi_deriv=dml_plr_obj.psi_deriv,
+        cf_y=cf_y,
+        cf_d=cf_d,
+        rho=rho,
+        level=level,
+    )
     benchmark = dml_plr_obj.sensitivity_benchmark(benchmarking_set=["X1"])
 
-    benchmark_manual = doubleml_sensitivity_benchmark_manual(dml_obj=dml_plr_obj,
-                                                             benchmarking_set=["X1"])
-    res_dict = {'sensitivity_params': dml_plr_obj.sensitivity_params,
-                'sensitivity_params_manual': res_manual,
-                'benchmark': benchmark,
-                'benchmark_manual': benchmark_manual,
-                'd_cols': d_cols,
-                }
+    benchmark_manual = doubleml_sensitivity_benchmark_manual(dml_obj=dml_plr_obj, benchmarking_set=["X1"])
+    res_dict = {
+        "sensitivity_params": dml_plr_obj.sensitivity_params,
+        "sensitivity_params_manual": res_manual,
+        "benchmark": benchmark,
+        "benchmark_manual": benchmark_manual,
+        "d_cols": d_cols,
+    }
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_sensitivity_params(dml_sensitivity_multitreat_fixture):
-    sensitivity_param_names = ['theta', 'se', 'ci']
+    sensitivity_param_names = ["theta", "se", "ci"]
     for sensitivity_param in sensitivity_param_names:
-        for bound in ['lower', 'upper']:
-            assert np.allclose(dml_sensitivity_multitreat_fixture['sensitivity_params'][sensitivity_param][bound],
-                               dml_sensitivity_multitreat_fixture['sensitivity_params_manual'][sensitivity_param][bound])
+        for bound in ["lower", "upper"]:
+            assert np.allclose(
+                dml_sensitivity_multitreat_fixture["sensitivity_params"][sensitivity_param][bound],
+                dml_sensitivity_multitreat_fixture["sensitivity_params_manual"][sensitivity_param][bound],
+            )
 
 
 @pytest.mark.ci
 def test_dml_sensitivity_benchmark(dml_sensitivity_multitreat_fixture):
     expected_columns = ["cf_y", "cf_d", "rho", "delta_theta"]
-    assert all(dml_sensitivity_multitreat_fixture['benchmark'].columns == expected_columns)
-    assert all(dml_sensitivity_multitreat_fixture['benchmark'].index ==
-               dml_sensitivity_multitreat_fixture['d_cols'])
-    assert dml_sensitivity_multitreat_fixture['benchmark'].equals(dml_sensitivity_multitreat_fixture['benchmark_manual'])
+    assert all(dml_sensitivity_multitreat_fixture["benchmark"].columns == expected_columns)
+    assert all(dml_sensitivity_multitreat_fixture["benchmark"].index == dml_sensitivity_multitreat_fixture["d_cols"])
+    assert dml_sensitivity_multitreat_fixture["benchmark"].equals(dml_sensitivity_multitreat_fixture["benchmark_manual"])
 
 
 @pytest.fixture(scope="module")
@@ -120,11 +112,9 @@ def test_dml_benchmark_fixture(benchmarking_set, n_rep):
     np.random.seed(3141)
     dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
     x_list_long = copy.deepcopy(dml_data.x_cols)
-    dml_int = DoubleMLIRM(dml_data,
-                          ml_m=classifier_class(random_state=random_state),
-                          ml_g=regressor_class(),
-                          n_folds=2,
-                          n_rep=n_rep)
+    dml_int = DoubleMLIRM(
+        dml_data, ml_m=classifier_class(random_state=random_state), ml_g=regressor_class(), n_folds=2, n_rep=n_rep
+    )
     dml_int.fit(store_predictions=True)
     dml_int.sensitivity_analysis()
     dml_ext = copy.deepcopy(dml_int)
@@ -133,30 +123,29 @@ def test_dml_benchmark_fixture(benchmarking_set, n_rep):
     np.random.seed(3141)
     dml_data_short = DoubleMLData.from_arrays(x=x, y=y, d=d)
     dml_data_short.x_cols = [x for x in x_list_long if x not in benchmarking_set]
-    dml_short = DoubleMLIRM(dml_data_short,
-                            ml_m=classifier_class(random_state=random_state),
-                            ml_g=regressor_class(),
-                            n_folds=2,
-                            n_rep=n_rep)
+    dml_short = DoubleMLIRM(
+        dml_data_short, ml_m=classifier_class(random_state=random_state), ml_g=regressor_class(), n_folds=2, n_rep=n_rep
+    )
     dml_short.fit(store_predictions=True)
-    fit_args = {"external_predictions": {"d": {"ml_m": dml_short.predictions["ml_m"][:, :, 0],
-                                               "ml_g0": dml_short.predictions["ml_g0"][:, :, 0],
-                                               "ml_g1": dml_short.predictions["ml_g1"][:, :, 0],
-                                               }
-                                         },
-                }
+    fit_args = {
+        "external_predictions": {
+            "d": {
+                "ml_m": dml_short.predictions["ml_m"][:, :, 0],
+                "ml_g0": dml_short.predictions["ml_g0"][:, :, 0],
+                "ml_g1": dml_short.predictions["ml_g1"][:, :, 0],
+            }
+        },
+    }
     dml_ext.sensitivity_analysis()
     df_bm_ext = dml_ext.sensitivity_benchmark(benchmarking_set=benchmarking_set, fit_args=fit_args)
 
-    res_dict = {"default_benchmark": df_bm,
-                "external_benchmark": df_bm_ext}
+    res_dict = {"default_benchmark": df_bm, "external_benchmark": df_bm_ext}
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_sensitivity_external_predictions(test_dml_benchmark_fixture):
-    assert np.allclose(test_dml_benchmark_fixture["default_benchmark"],
-                       test_dml_benchmark_fixture["external_benchmark"],
-                       rtol=1e-9,
-                       atol=1e-4)
+    assert np.allclose(
+        test_dml_benchmark_fixture["default_benchmark"], test_dml_benchmark_fixture["external_benchmark"], rtol=1e-9, atol=1e-4
+    )
diff --git a/doubleml/tests/test_sensitivity_cluster.py b/doubleml/tests/test_sensitivity_cluster.py
index 9485e7373..65ec0d644 100644
--- a/doubleml/tests/test_sensitivity_cluster.py
+++ b/doubleml/tests/test_sensitivity_cluster.py
@@ -1,11 +1,12 @@
-import numpy as np
-import pytest
 import math
 
+import numpy as np
+import pytest
 from sklearn.linear_model import LinearRegression
 
 import doubleml as dml
 from doubleml.datasets import make_pliv_multiway_cluster_CKMS2021
+
 from ._utils_doubleml_sensitivity_manual import doubleml_sensitivity_benchmark_manual
 
 np.random.seed(1234)
@@ -15,27 +16,30 @@
 dim_x = 10  # dimension of x
 
 
-(x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(N, M, dim_x, return_type='array')
+(x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(N, M, dim_x, return_type="array")
 obj_dml_cluster_data = dml.DoubleMLClusterData.from_arrays(x, y, d, cluster_vars)
 
-(x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(N, M, dim_x,
-                                                                 omega_X=np.array([0.25, 0]),
-                                                                 omega_epsilon=np.array([0.25, 0]),
-                                                                 omega_v=np.array([0.25, 0]),
-                                                                 omega_V=np.array([0.25, 0]),
-                                                                 return_type='array')
+(x, y, d, cluster_vars, z) = make_pliv_multiway_cluster_CKMS2021(
+    N,
+    M,
+    dim_x,
+    omega_X=np.array([0.25, 0]),
+    omega_epsilon=np.array([0.25, 0]),
+    omega_v=np.array([0.25, 0]),
+    omega_V=np.array([0.25, 0]),
+    return_type="array",
+)
 obj_dml_oneway_cluster_data = dml.DoubleMLClusterData.from_arrays(x, y, d, cluster_vars)
 # only the first cluster variable is relevant with the weight setting above
-obj_dml_oneway_cluster_data.cluster_cols = 'cluster_var1'
+obj_dml_oneway_cluster_data.cluster_cols = "cluster_var1"
 
 
-@pytest.fixture(scope='module',
-                params=['IV-type', 'partialling out'])
+@pytest.fixture(scope="module", params=["IV-type", "partialling out"])
 def score(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_plr_multiway_cluster_sensitivity_rho0(score):
     n_folds = 3
     cf_y = 0.03
@@ -48,30 +52,22 @@ def dml_plr_multiway_cluster_sensitivity_rho0(score):
     ml_g = LinearRegression()
 
     np.random.seed(3141)
-    if score == 'partialling out':
-        dml_plr_obj = dml.DoubleMLPLR(obj_dml_cluster_data,
-                                      ml_l, ml_m,
-                                      n_folds=n_folds,
-                                      score=score)
+    if score == "partialling out":
+        dml_plr_obj = dml.DoubleMLPLR(obj_dml_cluster_data, ml_l, ml_m, n_folds=n_folds, score=score)
     else:
-        assert score == 'IV-type'
-        dml_plr_obj = dml.DoubleMLPLR(obj_dml_cluster_data,
-                                      ml_l, ml_m, ml_g,
-                                      n_folds=n_folds,
-                                      score=score)
+        assert score == "IV-type"
+        dml_plr_obj = dml.DoubleMLPLR(obj_dml_cluster_data, ml_l, ml_m, ml_g, n_folds=n_folds, score=score)
 
     dml_plr_obj.fit()
-    dml_plr_obj.sensitivity_analysis(cf_y=cf_y, cf_d=cf_d,
-                                     rho=0.0, level=level, null_hypothesis=0.0)
+    dml_plr_obj.sensitivity_analysis(cf_y=cf_y, cf_d=cf_d, rho=0.0, level=level, null_hypothesis=0.0)
     benchmark = dml_plr_obj.sensitivity_benchmark(benchmarking_set=["X1"])
-    benchmark_manual = doubleml_sensitivity_benchmark_manual(dml_obj=dml_plr_obj,
-                                                             benchmarking_set=["X1"])
+    benchmark_manual = doubleml_sensitivity_benchmark_manual(dml_obj=dml_plr_obj, benchmarking_set=["X1"])
     res_dict = {
-        'coef': dml_plr_obj.coef,
-        'se': dml_plr_obj.se,
-        'sensitivity_params': dml_plr_obj.sensitivity_params,
-        'benchmark': benchmark,
-        'benchmark_manual': benchmark_manual
+        "coef": dml_plr_obj.coef,
+        "se": dml_plr_obj.se,
+        "sensitivity_params": dml_plr_obj.sensitivity_params,
+        "benchmark": benchmark,
+        "benchmark_manual": benchmark_manual,
     }
 
     return res_dict
@@ -79,24 +75,31 @@ def dml_plr_multiway_cluster_sensitivity_rho0(score):
 
 @pytest.mark.ci
 def test_dml_plr_multiway_cluster_sensitivity_coef(dml_plr_multiway_cluster_sensitivity_rho0):
-    assert math.isclose(dml_plr_multiway_cluster_sensitivity_rho0['coef'][0],
-                        dml_plr_multiway_cluster_sensitivity_rho0['sensitivity_params']['theta']['lower'][0],
-                        rel_tol=1e-9, abs_tol=1e-4)
-    assert math.isclose(dml_plr_multiway_cluster_sensitivity_rho0['coef'][0],
-                        dml_plr_multiway_cluster_sensitivity_rho0['sensitivity_params']['theta']['upper'][0],
-                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        dml_plr_multiway_cluster_sensitivity_rho0["coef"][0],
+        dml_plr_multiway_cluster_sensitivity_rho0["sensitivity_params"]["theta"]["lower"][0],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
+    assert math.isclose(
+        dml_plr_multiway_cluster_sensitivity_rho0["coef"][0],
+        dml_plr_multiway_cluster_sensitivity_rho0["sensitivity_params"]["theta"]["upper"][0],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
 
 
 @pytest.mark.ci
 def test_dml_sensitivity_benchmark(dml_plr_multiway_cluster_sensitivity_rho0):
     expected_columns = ["cf_y", "cf_d", "rho", "delta_theta"]
-    assert all(dml_plr_multiway_cluster_sensitivity_rho0['benchmark'].columns == expected_columns)
-    assert all(dml_plr_multiway_cluster_sensitivity_rho0['benchmark'].index == ["d"])
-    assert dml_plr_multiway_cluster_sensitivity_rho0['benchmark'].equals(
-        dml_plr_multiway_cluster_sensitivity_rho0['benchmark_manual'])
+    assert all(dml_plr_multiway_cluster_sensitivity_rho0["benchmark"].columns == expected_columns)
+    assert all(dml_plr_multiway_cluster_sensitivity_rho0["benchmark"].index == ["d"])
+    assert dml_plr_multiway_cluster_sensitivity_rho0["benchmark"].equals(
+        dml_plr_multiway_cluster_sensitivity_rho0["benchmark_manual"]
+    )
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_plr_multiway_cluster_sensitivity_rho0_se():
     n_folds = 3
     cf_y = 0.03
@@ -108,18 +111,12 @@ def dml_plr_multiway_cluster_sensitivity_rho0_se():
     ml_m = LinearRegression()
 
     np.random.seed(3141)
-    dml_plr_obj = dml.DoubleMLPLR(obj_dml_cluster_data,
-                                  ml_l, ml_m,
-                                  n_folds=n_folds,
-                                  score='partialling out')
+    dml_plr_obj = dml.DoubleMLPLR(obj_dml_cluster_data, ml_l, ml_m, n_folds=n_folds, score="partialling out")
 
     dml_plr_obj.fit()
-    dml_plr_obj.sensitivity_analysis(cf_y=cf_y, cf_d=cf_d,
-                                     rho=0.0, level=level, null_hypothesis=0.0)
+    dml_plr_obj.sensitivity_analysis(cf_y=cf_y, cf_d=cf_d, rho=0.0, level=level, null_hypothesis=0.0)
 
-    res_dict = {'coef': dml_plr_obj.coef,
-                'se': dml_plr_obj.se,
-                'sensitivity_params': dml_plr_obj.sensitivity_params}
+    res_dict = {"coef": dml_plr_obj.coef, "se": dml_plr_obj.se, "sensitivity_params": dml_plr_obj.sensitivity_params}
 
     return res_dict
 
@@ -127,9 +124,15 @@ def dml_plr_multiway_cluster_sensitivity_rho0_se():
 # only valid for 'partialling out '; This might have slightly less precision in the calculations
 @pytest.mark.ci
 def test_dml_pliv_multiway_cluster_sensitivity_se(dml_plr_multiway_cluster_sensitivity_rho0_se):
-    assert math.isclose(dml_plr_multiway_cluster_sensitivity_rho0_se['se'][0],
-                        dml_plr_multiway_cluster_sensitivity_rho0_se['sensitivity_params']['se']['lower'][0],
-                        rel_tol=1e-9, abs_tol=1e-3)
-    assert math.isclose(dml_plr_multiway_cluster_sensitivity_rho0_se['se'][0],
-                        dml_plr_multiway_cluster_sensitivity_rho0_se['sensitivity_params']['se']['upper'][0],
-                        rel_tol=1e-9, abs_tol=1e-3)
+    assert math.isclose(
+        dml_plr_multiway_cluster_sensitivity_rho0_se["se"][0],
+        dml_plr_multiway_cluster_sensitivity_rho0_se["sensitivity_params"]["se"]["lower"][0],
+        rel_tol=1e-9,
+        abs_tol=1e-3,
+    )
+    assert math.isclose(
+        dml_plr_multiway_cluster_sensitivity_rho0_se["se"][0],
+        dml_plr_multiway_cluster_sensitivity_rho0_se["sensitivity_params"]["se"]["upper"][0],
+        rel_tol=1e-9,
+        abs_tol=1e-3,
+    )
diff --git a/doubleml/tests/test_set_ml_nuisance_params.py b/doubleml/tests/test_set_ml_nuisance_params.py
index bab5f5c7a..a189b184b 100644
--- a/doubleml/tests/test_set_ml_nuisance_params.py
+++ b/doubleml/tests/test_set_ml_nuisance_params.py
@@ -1,16 +1,15 @@
-import pytest
 import numpy as np
+import pytest
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 
-from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV, DoubleMLCVAR, DoubleMLPQ, DoubleMLLPQ
-from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data
-
-from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
+from doubleml import DoubleMLCVAR, DoubleMLIIVM, DoubleMLIRM, DoubleMLLPQ, DoubleMLPLIV, DoubleMLPLR, DoubleMLPQ
+from doubleml.datasets import make_iivm_data, make_irm_data, make_pliv_CHS2015, make_plr_CCDDHNR2018
 
 # set default and test values
 n_est_default = 100
 n_est_test = 5
 n_folds = 2
-test_values = [[{'n_estimators': 5}, {'n_estimators': 5}]]
+test_values = [[{"n_estimators": 5}, {"n_estimators": 5}]]
 
 np.random.seed(3141)
 dml_data_plr = make_plr_CCDDHNR2018(n_obs=100)
@@ -23,18 +22,16 @@
 
 # linear models
 dml_plr = DoubleMLPLR(dml_data_plr, reg_learner, reg_learner, n_folds=n_folds)
-dml_pliv = DoubleMLPLIV(dml_data_pliv, reg_learner, reg_learner,
-                        reg_learner, n_folds=n_folds)
+dml_pliv = DoubleMLPLIV(dml_data_pliv, reg_learner, reg_learner, reg_learner, n_folds=n_folds)
 dml_irm = DoubleMLIRM(dml_data_irm, reg_learner, class_learner, n_folds=n_folds)
-dml_iivm = DoubleMLIIVM(dml_data_iivm, reg_learner, class_learner,
-                        class_learner, n_folds=n_folds)
+dml_iivm = DoubleMLIIVM(dml_data_iivm, reg_learner, class_learner, class_learner, n_folds=n_folds)
 dml_cvar = DoubleMLCVAR(dml_data_irm, ml_g=reg_learner, ml_m=class_learner, n_folds=n_folds)
 
-dml_plr.set_ml_nuisance_params('ml_l', 'd', {'n_estimators': n_est_test})
-dml_pliv.set_ml_nuisance_params('ml_l', 'd', {'n_estimators': n_est_test})
-dml_irm.set_ml_nuisance_params('ml_g0', 'd', {'n_estimators': n_est_test})
-dml_iivm.set_ml_nuisance_params('ml_g0', 'd', {'n_estimators': n_est_test})
-dml_cvar.set_ml_nuisance_params('ml_g', 'd', {'n_estimators': n_est_test})
+dml_plr.set_ml_nuisance_params("ml_l", "d", {"n_estimators": n_est_test})
+dml_pliv.set_ml_nuisance_params("ml_l", "d", {"n_estimators": n_est_test})
+dml_irm.set_ml_nuisance_params("ml_g0", "d", {"n_estimators": n_est_test})
+dml_iivm.set_ml_nuisance_params("ml_g0", "d", {"n_estimators": n_est_test})
+dml_cvar.set_ml_nuisance_params("ml_g", "d", {"n_estimators": n_est_test})
 
 dml_plr.fit(store_models=True)
 dml_pliv.fit(store_models=True)
@@ -46,59 +43,59 @@
 dml_pq = DoubleMLPQ(dml_data_irm, ml_g=class_learner, ml_m=class_learner, n_folds=n_folds)
 dml_lpq = DoubleMLLPQ(dml_data_iivm, ml_g=class_learner, ml_m=class_learner, n_folds=n_folds)
 
-dml_pq.set_ml_nuisance_params('ml_g', 'd', {'n_estimators': n_est_test})
-dml_lpq.set_ml_nuisance_params('ml_m_z', 'd', {'n_estimators': n_est_test})
+dml_pq.set_ml_nuisance_params("ml_g", "d", {"n_estimators": n_est_test})
+dml_lpq.set_ml_nuisance_params("ml_m_z", "d", {"n_estimators": n_est_test})
 
 dml_pq.fit(store_models=True)
 dml_lpq.fit(store_models=True)
 
 
 def _assert_nuisance_params(dml_obj, learner_1, learner_2):
-    assert dml_obj.params[learner_1]['d'] == test_values
-    assert dml_obj.params[learner_2]['d'][0] is None
+    assert dml_obj.params[learner_1]["d"] == test_values
+    assert dml_obj.params[learner_2]["d"][0] is None
 
-    param_list_1 = [dml_obj.models[learner_1]['d'][0][fold].n_estimators for fold in range(n_folds)]
+    param_list_1 = [dml_obj.models[learner_1]["d"][0][fold].n_estimators for fold in range(n_folds)]
     assert all(param == n_est_test for param in param_list_1)
-    param_list_2 = [dml_obj.models[learner_2]['d'][0][fold].n_estimators for fold in range(n_folds)]
+    param_list_2 = [dml_obj.models[learner_2]["d"][0][fold].n_estimators for fold in range(n_folds)]
     assert all(param == n_est_default for param in param_list_2)
 
 
 @pytest.mark.ci
 def test_plr_params():
-    _assert_nuisance_params(dml_plr, 'ml_l', 'ml_m')
+    _assert_nuisance_params(dml_plr, "ml_l", "ml_m")
 
 
 @pytest.mark.ci
 def test_pliv_params():
-    _assert_nuisance_params(dml_pliv, 'ml_l', 'ml_m')
+    _assert_nuisance_params(dml_pliv, "ml_l", "ml_m")
 
 
 @pytest.mark.ci
 def test_irm_params():
-    _assert_nuisance_params(dml_irm, 'ml_g0', 'ml_g1')
+    _assert_nuisance_params(dml_irm, "ml_g0", "ml_g1")
 
 
 @pytest.mark.ci
 def test_iivm_params():
-    _assert_nuisance_params(dml_iivm, 'ml_g0', 'ml_g1')
+    _assert_nuisance_params(dml_iivm, "ml_g0", "ml_g1")
 
 
 @pytest.mark.ci
 def test_cvar_params():
-    _assert_nuisance_params(dml_cvar, 'ml_g', 'ml_m')
+    _assert_nuisance_params(dml_cvar, "ml_g", "ml_m")
 
 
 @pytest.mark.ci
 def test_pq_params():
-    _assert_nuisance_params(dml_pq, 'ml_g', 'ml_m')
+    _assert_nuisance_params(dml_pq, "ml_g", "ml_m")
 
 
 @pytest.mark.ci
 def test_lpq_params():
-    _assert_nuisance_params(dml_lpq, 'ml_m_z', 'ml_m_d_z0')
-    param_list_2 = [dml_lpq.models['ml_m_d_z1']['d'][0][fold].n_estimators for fold in range(n_folds)]
+    _assert_nuisance_params(dml_lpq, "ml_m_z", "ml_m_d_z0")
+    param_list_2 = [dml_lpq.models["ml_m_d_z1"]["d"][0][fold].n_estimators for fold in range(n_folds)]
     assert all(param == n_est_default for param in param_list_2)
-    param_list_2 = [dml_lpq.models['ml_g_du_z0']['d'][0][fold].n_estimators for fold in range(n_folds)]
+    param_list_2 = [dml_lpq.models["ml_g_du_z0"]["d"][0][fold].n_estimators for fold in range(n_folds)]
     assert all(param == n_est_default for param in param_list_2)
-    param_list_2 = [dml_lpq.models['ml_g_du_z1']['d'][0][fold].n_estimators for fold in range(n_folds)]
+    param_list_2 = [dml_lpq.models["ml_g_du_z1"]["d"][0][fold].n_estimators for fold in range(n_folds)]
     assert all(param == n_est_default for param in param_list_2)
diff --git a/doubleml/tests/test_set_sample_splitting.py b/doubleml/tests/test_set_sample_splitting.py
index 4a1474a74..97313a00e 100644
--- a/doubleml/tests/test_set_sample_splitting.py
+++ b/doubleml/tests/test_set_sample_splitting.py
@@ -1,19 +1,16 @@
-import pytest
 import numpy as np
+import pytest
+from sklearn.linear_model import Lasso
 
 from doubleml import DoubleMLPLR
 from doubleml.datasets import make_plr_CCDDHNR2018
 
-from sklearn.linear_model import Lasso
-
 np.random.seed(3141)
 dml_data = make_plr_CCDDHNR2018(n_obs=10)
 n_obs = dml_data.n_obs
 ml_l = Lasso()
 ml_m = Lasso()
-dml_plr = DoubleMLPLR(dml_data, ml_l, ml_m,
-                      n_folds=7, n_rep=8,
-                      draw_sample_splitting=False)
+dml_plr = DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=7, n_rep=8, draw_sample_splitting=False)
 
 
 def _assert_resampling_pars(dml_obj0, dml_obj1):
@@ -33,7 +30,6 @@ def _assert_smpls_equal(smpls0, smpls1):
 
 @pytest.mark.ci
 def test_doubleml_set_sample_splitting_tuple():
-
     # no sample splitting
     smpls = (np.arange(n_obs), np.arange(n_obs))
     dml_plr.set_sample_splitting(smpls)
@@ -42,12 +38,12 @@ def test_doubleml_set_sample_splitting_tuple():
     _assert_smpls_equal([[smpls]], dml_plr.smpls)
 
     smpls = ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9])
-    msg = 'Invalid partition provided. ' + 'Tuple provided that doesn\'t form a partition.'
+    msg = "Invalid partition provided. " + "Tuple provided that doesn't form a partition."
     with pytest.raises(ValueError, match=msg):
         dml_plr.set_sample_splitting(smpls)
 
     smpls = ([0, 1, 2, 3, 4], [5, 6], [7, 8, 9])
-    msg = 'Invalid partition provided. ' + 'Tuple for train_ind and test_ind must consist of exactly two elements.'
+    msg = "Invalid partition provided. " + "Tuple for train_ind and test_ind must consist of exactly two elements."
     with pytest.raises(ValueError, match=msg):
         dml_plr.set_sample_splitting(smpls)
 
@@ -55,29 +51,26 @@ def test_doubleml_set_sample_splitting_tuple():
 @pytest.mark.ci
 def test_doubleml_set_sample_splitting_all_tuple():
     # sample splitting with two folds and cross-fitting but no repeated cross-fitting
-    smpls = [([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]),
-             ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])]
+    smpls = [([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]), ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])]
     dml_plr.set_sample_splitting(smpls)
     assert dml_plr.n_folds == 2
     assert dml_plr.n_rep == 1
     _assert_smpls_equal([smpls], dml_plr.smpls)
 
-    smpls = [([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]),
-             ([5, 6, 7, 8, 9], [0, 1, 2], [3, 4])]
-    msg = 'Invalid partition provided. ' + 'All tuples for train_ind and test_ind must consist of exactly two elements.'
+    smpls = [([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]), ([5, 6, 7, 8, 9], [0, 1, 2], [3, 4])]
+    msg = "Invalid partition provided. " + "All tuples for train_ind and test_ind must consist of exactly two elements."
     with pytest.raises(ValueError, match=msg):
         dml_plr.set_sample_splitting(smpls)
 
     # not valid partition
     smpls = [([0, 1, 2, 3, 4], [5, 6, 7, 8, 9])]
-    msg = 'Invalid partition provided. ' + 'Tuples provided that don\'t form a partition.'
+    msg = "Invalid partition provided. " + "Tuples provided that don't form a partition."
     with pytest.raises(ValueError, match=msg):
         dml_plr.set_sample_splitting(smpls)
 
     # sample splitting with cross-fitting and two folds that do not form a partition
-    smpls = [([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]),
-             ([5, 6, 7, 8], [0, 1, 2, 3, 4, 9])]
-    msg = 'Invalid partition provided. ' + 'Tuples provided that don\'t form a partition.'
+    smpls = [([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]), ([5, 6, 7, 8], [0, 1, 2, 3, 4, 9])]
+    msg = "Invalid partition provided. " + "Tuples provided that don't form a partition."
     with pytest.raises(ValueError, match=msg):
         dml_plr.set_sample_splitting(smpls)
 
@@ -85,10 +78,10 @@ def test_doubleml_set_sample_splitting_all_tuple():
 @pytest.mark.ci
 def test_doubleml_set_sample_splitting_all_list():
     # sample splitting with two folds and repeated cross-fitting with n_rep = 2
-    smpls = [[([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]),
-              ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
-             [([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]),
-              ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])]]
+    smpls = [
+        [([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]), ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
+        [([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]), ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])],
+    ]
     dml_plr.set_sample_splitting(smpls)
     assert dml_plr.n_folds == 2
     assert dml_plr.n_rep == 2
@@ -101,54 +94,47 @@ def test_doubleml_set_sample_splitting_all_list():
         dml_plr.set_sample_splitting(smpls)
 
     # second sample splitting is not a list
-    smpls = [[([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]),
-              ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
-             (([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]),
-              ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8]))]
-    msg = ('Invalid partition provided. '
-           'all_smpls is a list where neither all elements are tuples nor all elements are lists.')
+    smpls = [
+        [([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]), ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
+        (([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]), ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])),
+    ]
+    msg = "Invalid partition provided. all_smpls is a list where neither all elements are tuples nor all elements are lists."
     with pytest.raises(ValueError, match=msg):
         dml_plr.set_sample_splitting(smpls)
 
-    smpls = [[([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]),
-              ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
-             [[[0, 2, 4, 6, 8], [1, 3, 5, 7, 9]],  # not a tuple
-              ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])]]
-    msg = 'For repeated sample splitting all_smpls must be list of lists of tuples.'
+    smpls = [
+        [([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]), ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
+        [[[0, 2, 4, 6, 8], [1, 3, 5, 7, 9]], ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])],  # not a tuple
+    ]
+    msg = "For repeated sample splitting all_smpls must be list of lists of tuples."
     with pytest.raises(TypeError, match=msg):
         dml_plr.set_sample_splitting(smpls)
 
-    smpls = [[([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]),
-              ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
-             [([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]),
-              ([1, 3, 5, 7, 9], [0, 2, 4], [6, 8])]]
-    msg = 'Invalid partition provided. ' + 'All tuples for train_ind and test_ind must consist of exactly two elements.'
+    smpls = [
+        [([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]), ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
+        [([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]), ([1, 3, 5, 7, 9], [0, 2, 4], [6, 8])],
+    ]
+    msg = "Invalid partition provided. " + "All tuples for train_ind and test_ind must consist of exactly two elements."
     with pytest.raises(ValueError, match=msg):
         dml_plr.set_sample_splitting(smpls)
 
-    smpls = [[([0, 1, 2, 3, 6], [4, 5, 7, 8, 9]),
-              ([4, 5, 7, 8, 9], [0, 1, 2, 3, 6])],
-             [([0, 1, 4, 5, 7, 9], [2, 3, 6, 8]),
-              ([0, 2, 3, 4, 6, 8, 9], [1, 5, 7]),
-              ([1, 2, 3, 5, 6, 7, 8], [0, 4, 9])]]
-    msg = 'Invalid partition provided. ' + 'Different number of folds for repeated sample splitting.'
+    smpls = [
+        [([0, 1, 2, 3, 6], [4, 5, 7, 8, 9]), ([4, 5, 7, 8, 9], [0, 1, 2, 3, 6])],
+        [([0, 1, 4, 5, 7, 9], [2, 3, 6, 8]), ([0, 2, 3, 4, 6, 8, 9], [1, 5, 7]), ([1, 2, 3, 5, 6, 7, 8], [0, 4, 9])],
+    ]
+    msg = "Invalid partition provided. " + "Different number of folds for repeated sample splitting."
     with pytest.raises(ValueError, match=msg):
         dml_plr.set_sample_splitting(smpls)
 
     # sample splitting with cross-fitting and two folds that do not form a partition
-    smpls = [[([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]),
-              ([5, 6, 7, 8], [0, 1, 2, 3, 4, 9])]]
-    msg = ('Invalid partition provided. '
-           'At least one inner list does not form a partition.')
+    smpls = [[([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]), ([5, 6, 7, 8], [0, 1, 2, 3, 4, 9])]]
+    msg = "Invalid partition provided. At least one inner list does not form a partition."
     with pytest.raises(ValueError, match=msg):
         dml_plr.set_sample_splitting(smpls)
 
     # repeated no-cross-fitting (does not form a partition)
-    smpls = [[([0, 1, 5, 7, 9], [2, 3, 4, 6, 8])],
-             [([2, 4, 7, 8, 9], [0, 1, 3, 5, 6])],
-             [([0, 1, 4, 6, 8], [2, 3, 5, 7, 9])]]
-    msg = ('Invalid partition provided. '
-           'At least one inner list does not form a partition.')
+    smpls = [[([0, 1, 5, 7, 9], [2, 3, 4, 6, 8])], [([2, 4, 7, 8, 9], [0, 1, 3, 5, 6])], [([0, 1, 4, 6, 8], [2, 3, 5, 7, 9])]]
+    msg = "Invalid partition provided. At least one inner list does not form a partition."
     with pytest.raises(ValueError, match=msg):
         dml_plr.set_sample_splitting(smpls)
 
@@ -158,42 +144,36 @@ def test_doubleml_draw_vs_set():
     np.random.seed(3141)
     dml_plr_set = DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=7, n_rep=8)
 
-    msg = 'n_folds must be greater than 1. You can use set_sample_splitting with a tuple to only use one fold.'
+    msg = "n_folds must be greater than 1. You can use set_sample_splitting with a tuple to only use one fold."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLPLR(dml_data, ml_l, ml_m,
-                        n_folds=1, n_rep=1)
+        _ = DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=1, n_rep=1)
 
-    dml_plr_drawn = DoubleMLPLR(dml_data, ml_l, ml_m,
-                                n_folds=2, n_rep=1)
+    dml_plr_drawn = DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, n_rep=1)
     dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls)
     _assert_resampling_pars(dml_plr_drawn, dml_plr_set)
     dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls[0])
     _assert_resampling_pars(dml_plr_drawn, dml_plr_set)
-    msg = 'Invalid partition provided. Tuple provided that doesn\'t form a partition.'
+    msg = "Invalid partition provided. Tuple provided that doesn't form a partition."
     with pytest.raises(ValueError, match=msg):
         dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls[0][0])
 
-    dml_plr_drawn = DoubleMLPLR(dml_data, ml_l, ml_m,
-                                n_folds=2, n_rep=1)
+    dml_plr_drawn = DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, n_rep=1)
     dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls)
     _assert_resampling_pars(dml_plr_drawn, dml_plr_set)
     dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls[0])
     _assert_resampling_pars(dml_plr_drawn, dml_plr_set)
 
-    dml_plr_drawn = DoubleMLPLR(dml_data, ml_l, ml_m,
-                                n_folds=5, n_rep=1)
+    dml_plr_drawn = DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=5, n_rep=1)
     dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls)
     _assert_resampling_pars(dml_plr_drawn, dml_plr_set)
     dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls[0])
     _assert_resampling_pars(dml_plr_drawn, dml_plr_set)
 
-    dml_plr_drawn = DoubleMLPLR(dml_data, ml_l, ml_m,
-                                n_folds=5, n_rep=3)
+    dml_plr_drawn = DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=5, n_rep=3)
     dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls)
     _assert_resampling_pars(dml_plr_drawn, dml_plr_set)
 
-    dml_plr_drawn = DoubleMLPLR(dml_data, ml_l, ml_m,
-                                n_folds=2, n_rep=4)
+    dml_plr_drawn = DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, n_rep=4)
     dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls)
     _assert_resampling_pars(dml_plr_drawn, dml_plr_set)
 
@@ -201,58 +181,58 @@ def test_doubleml_draw_vs_set():
 @pytest.mark.ci
 def test_doubleml_set_sample_splitting_invalid_sets():
     # sample splitting with two folds and repeated cross-fitting with n_rep = 2
-    smpls = [[([0, 1.2, 2, 3, 4], [5, 6, 7, 8, 9]),
-              ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
-             [([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]),
-              ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])]]
-    msg = 'Invalid sample split. Train indices must be of type integer.'
+    smpls = [
+        [([0, 1.2, 2, 3, 4], [5, 6, 7, 8, 9]), ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
+        [([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]), ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])],
+    ]
+    msg = "Invalid sample split. Train indices must be of type integer."
     with pytest.raises(TypeError, match=msg):
         dml_plr.set_sample_splitting(smpls)
 
-    smpls = [[([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]),
-              ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
-             [([0, 2, 4, 6, 8], [1, 3.5, 5, 7, 9]),
-              ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])]]
-    msg = 'Invalid sample split. Test indices must be of type integer.'
+    smpls = [
+        [([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]), ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
+        [([0, 2, 4, 6, 8], [1, 3.5, 5, 7, 9]), ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])],
+    ]
+    msg = "Invalid sample split. Test indices must be of type integer."
     with pytest.raises(TypeError, match=msg):
         dml_plr.set_sample_splitting(smpls)
 
-    smpls = [[([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]),
-              ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
-             [([0, 2, 3, 4, 6, 8], [1, 3, 5, 7, 9]),
-              ([1, 5, 7, 9], [0, 2, 4, 6, 8])]]
-    msg = 'Invalid sample split. Intersection of train and test indices is not empty.'
+    smpls = [
+        [([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]), ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
+        [([0, 2, 3, 4, 6, 8], [1, 3, 5, 7, 9]), ([1, 5, 7, 9], [0, 2, 4, 6, 8])],
+    ]
+    msg = "Invalid sample split. Intersection of train and test indices is not empty."
     with pytest.raises(ValueError, match=msg):
         dml_plr.set_sample_splitting(smpls)
 
-    smpls = [[([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]),
-              ([5, 6, 7, 7, 8, 9], [0, 1, 2, 3, 4])],
-             [([0, 2, 4, 4, 6, 8], [1, 3, 5, 7, 9]),
-              ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])]]
-    msg = 'Invalid sample split. Train indices contain non-unique entries.'
+    smpls = [
+        [([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]), ([5, 6, 7, 7, 8, 9], [0, 1, 2, 3, 4])],
+        [([0, 2, 4, 4, 6, 8], [1, 3, 5, 7, 9]), ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])],
+    ]
+    msg = "Invalid sample split. Train indices contain non-unique entries."
     with pytest.raises(ValueError, match=msg):
         dml_plr.set_sample_splitting(smpls)
 
-    smpls = [[([0, 1, 2, 3, 4], [5, 5, 6, 7, 8, 9]),
-              ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
-             [([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]),
-              ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])]]
-    msg = 'Invalid sample split. Test indices contain non-unique entries.'
+    smpls = [
+        [([0, 1, 2, 3, 4], [5, 5, 6, 7, 8, 9]), ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
+        [([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]), ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])],
+    ]
+    msg = "Invalid sample split. Test indices contain non-unique entries."
     with pytest.raises(ValueError, match=msg):
         dml_plr.set_sample_splitting(smpls)
 
-    smpls = [[([0, 1, 2, 3, 20], [5, 6, 7, 8, 9]),
-              ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
-             [([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]),
-              ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])]]
-    msg = r'Invalid sample split. Train indices must be in \[0, n_obs\).'
+    smpls = [
+        [([0, 1, 2, 3, 20], [5, 6, 7, 8, 9]), ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
+        [([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]), ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])],
+    ]
+    msg = r"Invalid sample split. Train indices must be in \[0, n_obs\)."
     with pytest.raises(ValueError, match=msg):
         dml_plr.set_sample_splitting(smpls)
 
-    smpls = [[([0, 1, 2, 3, 4], [5, -6, 7, 8, 9]),
-              ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
-             [([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]),
-              ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])]]
-    msg = r'Invalid sample split. Test indices must be in \[0, n_obs\).'
+    smpls = [
+        [([0, 1, 2, 3, 4], [5, -6, 7, 8, 9]), ([5, 6, 7, 8, 9], [0, 1, 2, 3, 4])],
+        [([0, 2, 4, 6, 8], [1, 3, 5, 7, 9]), ([1, 3, 5, 7, 9], [0, 2, 4, 6, 8])],
+    ]
+    msg = r"Invalid sample split. Test indices must be in \[0, n_obs\)."
     with pytest.raises(ValueError, match=msg):
         dml_plr.set_sample_splitting(smpls)
diff --git a/doubleml/utils/__init__.py b/doubleml/utils/__init__.py
index 5281435ec..386586ce9 100644
--- a/doubleml/utils/__init__.py
+++ b/doubleml/utils/__init__.py
@@ -2,13 +2,12 @@
 The :mod:`doubleml.utils` module includes various utilities.
 """
 
-from .dummy_learners import DMLDummyRegressor
-from .dummy_learners import DMLDummyClassifier
-from .resampling import DoubleMLResampling, DoubleMLClusterResampling
 from .blp import DoubleMLBLP
-from .policytree import DoubleMLPolicyTree
+from .dummy_learners import DMLDummyClassifier, DMLDummyRegressor
 from .gain_statistics import gain_statistics
 from .global_learner import GlobalClassifier, GlobalRegressor
+from .policytree import DoubleMLPolicyTree
+from .resampling import DoubleMLClusterResampling, DoubleMLResampling
 
 __all__ = [
     "DMLDummyRegressor",
@@ -19,5 +18,5 @@
     "DoubleMLPolicyTree",
     "gain_statistics",
     "GlobalClassifier",
-    "GlobalRegressor"
+    "GlobalRegressor",
 ]
diff --git a/doubleml/utils/_checks.py b/doubleml/utils/_checks.py
index 8a546cb15..90833dedb 100644
--- a/doubleml/utils/_checks.py
+++ b/doubleml/utils/_checks.py
@@ -1,66 +1,54 @@
-import numpy as np
-import warnings
 import inspect
+import warnings
 
+import numpy as np
 from sklearn.utils.multiclass import type_of_target
 
 
 def _check_in_zero_one(value, name, include_zero=True, include_one=True):
     if not isinstance(value, float):
-        raise TypeError(f'{name} must be of float type. '
-                        f'{str(value)} of type {str(type(value))} was passed.')
+        raise TypeError(f"{name} must be of float type. {str(value)} of type {str(type(value))} was passed.")
     if include_zero & include_one:
         if (value < 0) | (value > 1):
-            raise ValueError(f'{name} must be in [0,1]. '
-                             f'{str(value)} was passed.')
+            raise ValueError(f"{name} must be in [0,1]. {str(value)} was passed.")
     elif (not include_zero) & include_one:
         if (value <= 0) | (value > 1):
-            raise ValueError(f'{name} must be in (0,1]. '
-                             f'{str(value)} was passed.')
+            raise ValueError(f"{name} must be in (0,1]. {str(value)} was passed.")
     elif include_zero & (not include_one):
         if (value < 0) | (value >= 1):
-            raise ValueError(f'{name} must be in [0,1). '
-                             f'{str(value)} was passed.')
+            raise ValueError(f"{name} must be in [0,1). {str(value)} was passed.")
     else:
         if (value <= 0) | (value >= 1):
-            raise ValueError(f'{name} must be in (0,1). '
-                             f'{str(value)} was passed.')
+            raise ValueError(f"{name} must be in (0,1). {str(value)} was passed.")
     return
 
 
 def _check_integer(value, name, lower_bound=None, upper_bound=None):
     if not isinstance(value, int):
-        raise TypeError(f'{name} must be an integer.'
-                        f' {str(value)} of type {str(type(value))} was passed.')
+        raise TypeError(f"{name} must be an integer. {str(value)} of type {str(type(value))} was passed.")
     if lower_bound is not None:
         if value < lower_bound:
-            raise ValueError(f'{name} must be larger or equal to {lower_bound}. '
-                             f'{str(value)} was passed.')
+            raise ValueError(f"{name} must be larger or equal to {lower_bound}. {str(value)} was passed.")
     if upper_bound is not None:
         if value > upper_bound:
-            raise ValueError(f'{name} must be smaller or equal to {upper_bound}. '
-                             f'{str(value)} was passed.')
+            raise ValueError(f"{name} must be smaller or equal to {upper_bound}. {str(value)} was passed.")
     return
 
 
 def _check_float(value, name, lower_bound=None, upper_bound=None):
     if not isinstance(value, float):
-        raise TypeError(f'{name} must be of float type.'
-                        f' {str(value)} of type {str(type(value))} was passed.')
+        raise TypeError(f"{name} must be of float type. {str(value)} of type {str(type(value))} was passed.")
     if lower_bound is not None:
         if value < lower_bound:
-            raise ValueError(f'{name} must be larger or equal to {lower_bound}. '
-                             f'{str(value)} was passed.')
+            raise ValueError(f"{name} must be larger or equal to {lower_bound}. {str(value)} was passed.")
     if upper_bound is not None:
         if value > upper_bound:
-            raise ValueError(f'{name} must be smaller or equal to {upper_bound}. '
-                             f'{str(value)} was passed.')
+            raise ValueError(f"{name} must be smaller or equal to {upper_bound}. {str(value)} was passed.")
 
 
 def _check_bool(value, name):
     if not isinstance(value, bool):
-        raise TypeError(f'{name} has to be boolean.'
-                        f' {str(value)} of type {str(type(value))} was passed.')
+        raise TypeError(f"{name} has to be boolean. {str(value)} of type {str(type(value))} was passed.")
 
 
 def _check_is_partition(smpls, n_obs):
@@ -93,18 +81,18 @@ def _check_smpl_split_tpl(tpl, n_obs, check_intersect=False):
     test_index = np.sort(np.array(tpl[1]))
 
     if not issubclass(train_index.dtype.type, np.integer):
-        raise TypeError('Invalid sample split. Train indices must be of type integer.')
+        raise TypeError("Invalid sample split. Train indices must be of type integer.")
     if not issubclass(test_index.dtype.type, np.integer):
-        raise TypeError('Invalid sample split. Test indices must be of type integer.')
+        raise TypeError("Invalid sample split. Test indices must be of type integer.")
 
     if check_intersect:
         if set(train_index) & set(test_index):
-            raise ValueError('Invalid sample split. Intersection of train and test indices is not empty.')
+            raise ValueError("Invalid sample split. Intersection of train and test indices is not empty.")
 
     if len(np.unique(train_index)) != len(train_index):
-        raise ValueError('Invalid sample split. Train indices contain non-unique entries.')
+        raise ValueError("Invalid sample split. Train indices contain non-unique entries.")
     if len(np.unique(test_index)) != len(test_index):
-        raise ValueError('Invalid sample split. Test indices contain non-unique entries.')
+        raise ValueError("Invalid sample split. Test indices contain non-unique entries.")
 
     # we sort the indices above
     # if not np.all(np.diff(train_index) > 0):
@@ -113,9 +101,9 @@ def _check_smpl_split_tpl(tpl, n_obs, check_intersect=False):
     #     raise NotImplementedError('Invalid sample split. Only sorted test indices are supported.')
 
     if not set(train_index).issubset(range(n_obs)):
-        raise ValueError('Invalid sample split. Train indices must be in [0, n_obs).')
+        raise ValueError("Invalid sample split. Train indices must be in [0, n_obs).")
     if not set(test_index).issubset(range(n_obs)):
-        raise ValueError('Invalid sample split. Test indices must be in [0, n_obs).')
+        raise ValueError("Invalid sample split. Test indices must be in [0, n_obs).")
 
     return train_index, test_index
 
@@ -123,135 +111,137 @@ def _check_smpl_split_tpl(tpl, n_obs, check_intersect=False):
 def _check_finite_predictions(preds, learner, learner_name, smpls):
     test_indices = np.concatenate([test_index for _, test_index in smpls])
     if not np.all(np.isfinite(preds[test_indices])):
-        raise ValueError(f'Predictions from learner {str(learner)} for {learner_name} are not finite.')
+        raise ValueError(f"Predictions from learner {str(learner)} for {learner_name} are not finite.")
     return
 
 
 def _check_score(score, valid_score, allow_callable=True):
     if isinstance(score, str):
         if score not in valid_score:
-            raise ValueError('Invalid score ' + score + '. ' +
-                             'Valid score ' + ' or '.join(valid_score) + '.')
+            raise ValueError("Invalid score " + score + ". " + "Valid score " + " or ".join(valid_score) + ".")
     else:
         if allow_callable:
             if not callable(score):
-                raise TypeError('score should be either a string or a callable. '
-                                f'{str(score)} was passed.')
+                raise TypeError(f"score should be either a string or a callable. {str(score)} was passed.")
         else:
-            raise TypeError('score should be a string. '
-                            f'{str(score)} was passed.')
+            raise TypeError(f"score should be a string. {str(score)} was passed.")
     return
 
 
 def _check_trimming(trimming_rule, trimming_threshold):
-    valid_trimming_rule = ['truncate']
+    valid_trimming_rule = ["truncate"]
     if trimming_rule not in valid_trimming_rule:
-        raise ValueError('Invalid trimming_rule ' + str(trimming_rule) + '. ' +
-                         'Valid trimming_rule ' + ' or '.join(valid_trimming_rule) + '.')
+        raise ValueError(
+            "Invalid trimming_rule "
+            + str(trimming_rule)
+            + ". "
+            + "Valid trimming_rule "
+            + " or ".join(valid_trimming_rule)
+            + "."
+        )
     if not isinstance(trimming_threshold, float):
-        raise TypeError('trimming_threshold has to be a float. ' +
-                        f'Object of type {str(type(trimming_threshold))} passed.')
+        raise TypeError("trimming_threshold has to be a float. " + f"Object of type {str(type(trimming_threshold))} passed.")
     if (trimming_threshold <= 0) | (trimming_threshold >= 0.5):
-        raise ValueError('Invalid trimming_threshold ' + str(trimming_threshold) + '. ' +
-                         'trimming_threshold has to be between 0 and 0.5.')
+        raise ValueError(
+            "Invalid trimming_threshold " + str(trimming_threshold) + ". " + "trimming_threshold has to be between 0 and 0.5."
+        )
     return
 
 
 def _check_zero_one_treatment(obj_dml):
-    one_treat = (obj_dml._dml_data.n_treat == 1)
-    binary_treat = (type_of_target(obj_dml._dml_data.d) == 'binary')
+    one_treat = obj_dml._dml_data.n_treat == 1
+    binary_treat = type_of_target(obj_dml._dml_data.d) == "binary"
     zero_one_treat = np.all((np.power(obj_dml._dml_data.d, 2) - obj_dml._dml_data.d) == 0)
     if not (one_treat & binary_treat & zero_one_treat):
-        raise ValueError('Incompatible data. '
-                         f'To fit an {str(obj_dml.score)} model with DML '
-                         'exactly one binary variable with values 0 and 1 '
-                         'needs to be specified as treatment variable.')
+        raise ValueError(
+            "Incompatible data. "
+            f"To fit an {str(obj_dml.score)} model with DML "
+            "exactly one binary variable with values 0 and 1 "
+            "needs to be specified as treatment variable."
+        )
 
 
 def _check_treatment(treatment):
     if not isinstance(treatment, int):
-        raise TypeError('Treatment indicator has to be an integer. ' +
-                        f'Object of type {str(type(treatment))} passed.')
+        raise TypeError("Treatment indicator has to be an integer. " + f"Object of type {str(type(treatment))} passed.")
 
     if (treatment != 0) & (treatment != 1):
-        raise ValueError('Treatment indicator has be either 0 or 1. ' +
-                         f'Treatment indicator {str(treatment)} passed.')
+        raise ValueError("Treatment indicator has be either 0 or 1. " + f"Treatment indicator {str(treatment)} passed.")
     return
 
 
 def _check_quantile(quantile):
     if not isinstance(quantile, float):
-        raise TypeError('Quantile has to be a float. ' +
-                        f'Object of type {str(type(quantile))} passed.')
+        raise TypeError("Quantile has to be a float. " + f"Object of type {str(type(quantile))} passed.")
 
     if (quantile <= 0) | (quantile >= 1):
-        raise ValueError('Quantile has be between 0 or 1. ' +
-                         f'Quantile {str(quantile)} passed.')
+        raise ValueError("Quantile has be between 0 or 1. " + f"Quantile {str(quantile)} passed.")
     return
 
 
 def _check_contains_iv(obj_dml_data):
     if obj_dml_data.z_cols is not None:
-        raise ValueError('Incompatible data. ' +
-                         ' and '.join(obj_dml_data.z_cols) +
-                         ' have been set as instrumental variable(s). '
-                         'To fit an local model see the documentation.')
+        raise ValueError(
+            "Incompatible data. " + " and ".join(obj_dml_data.z_cols) + " have been set as instrumental variable(s). "
+            "To fit an local model see the documentation."
+        )
     return
 
 
 def _check_is_propensity(preds, learner, learner_name, smpls, eps=1e-12):
     test_indices = np.concatenate([test_index for _, test_index in smpls])
     if any((preds[test_indices] < eps) | (preds[test_indices] > 1 - eps)):
-        warnings.warn(f'Propensity predictions from learner {str(learner)} for'
-                      f' {learner_name} are close to zero or one (eps={eps}).')
+        warnings.warn(
+            f"Propensity predictions from learner {str(learner)} for {learner_name} are close to zero or one (eps={eps})."
+        )
     return
 
 
 def _check_binary_predictions(pred, learner, learner_name, variable_name):
-    binary_preds = (type_of_target(pred) == 'binary')
+    binary_preds = type_of_target(pred) == "binary"
     zero_one_preds = np.all((np.power(pred, 2) - pred) == 0)
     if binary_preds & zero_one_preds:
-        raise ValueError(f'For the binary variable {variable_name}, '
-                         f'predictions obtained with the {learner_name} learner {str(learner)} are also '
-                         'observed to be binary with values 0 and 1. Make sure that for classifiers '
-                         'probabilities and not labels are predicted.')
+        raise ValueError(
+            f"For the binary variable {variable_name}, "
+            f"predictions obtained with the {learner_name} learner {str(learner)} are also "
+            "observed to be binary with values 0 and 1. Make sure that for classifiers "
+            "probabilities and not labels are predicted."
+        )
 
 
 def _check_benchmarks(benchmarks):
     if benchmarks is not None:
         if not isinstance(benchmarks, dict):
-            raise TypeError('benchmarks has to be either None or a dictionary. '
-                            f'{str(benchmarks)} of type {type(benchmarks)} was passed.')
-        if not set(benchmarks.keys()) == {'cf_y', 'cf_d', 'name'}:
-            raise ValueError('benchmarks has to be a dictionary with keys cf_y, cf_d and name. '
-                             f'Got {str(benchmarks.keys())}.')
+            raise TypeError(
+                f"benchmarks has to be either None or a dictionary. {str(benchmarks)} of type {type(benchmarks)} was passed."
+            )
+        if not set(benchmarks.keys()) == {"cf_y", "cf_d", "name"}:
+            raise ValueError(f"benchmarks has to be a dictionary with keys cf_y, cf_d and name. Got {str(benchmarks.keys())}.")
 
         value_lengths = [len(value) for value in benchmarks.values()]
         if not len(set(value_lengths)) == 1:
-            raise ValueError('benchmarks has to be a dictionary with values of same length. '
-                             f'Got {str(value_lengths)}.')
-        for i in (range(value_lengths[0])):
-            for key in ['cf_y', 'cf_d']:
+            raise ValueError(f"benchmarks has to be a dictionary with values of same length. Got {str(value_lengths)}.")
+        for i in range(value_lengths[0]):
+            for key in ["cf_y", "cf_d"]:
                 _check_in_zero_one(benchmarks[key][i], f"benchmarks {key}", include_zero=True, include_one=False)
             if not isinstance(benchmarks["name"][i], str):
-                raise TypeError('benchmarks name must be of string type. '
-                                f'{str(benchmarks["name"][i])} of type {str(type(benchmarks["name"][i]))} was passed.')
+                raise TypeError(
+                    "benchmarks name must be of string type. "
+                    f"{str(benchmarks['name'][i])} of type {str(type(benchmarks['name'][i]))} was passed."
+                )
     return
 
 
 def _check_weights(weights, score, n_obs, n_rep):
     if weights is not None:
-
         # check general type
         if (not isinstance(weights, np.ndarray)) and (not isinstance(weights, dict)):
-            raise TypeError("weights must be a numpy array or dictionary. "
-                            f"weights of type {str(type(weights))} was passed.")
+            raise TypeError(f"weights must be a numpy array or dictionary. weights of type {str(type(weights))} was passed.")
 
         # check shape
         if isinstance(weights, np.ndarray):
             if (weights.ndim != 1) or weights.shape[0] != n_obs:
-                raise ValueError(f"weights must have shape ({n_obs},). "
-                                 f"weights of shape {weights.shape} was passed.")
+                raise ValueError(f"weights must have shape ({n_obs},). weights of shape {weights.shape} was passed.")
             if not np.all(0 <= weights):
                 raise ValueError("All weights values must be greater or equal 0.")
             if weights.sum() == 0:
@@ -260,8 +250,9 @@ def _check_weights(weights, score, n_obs, n_rep):
         # check special form for ATTE score
         if score == "ATTE":
             if not isinstance(weights, np.ndarray):
-                raise TypeError("weights must be a numpy array for ATTE score. "
-                                f"weights of type {str(type(weights))} was passed.")
+                raise TypeError(
+                    f"weights must be a numpy array for ATTE score. weights of type {str(type(weights))} was passed."
+                )
 
             is_binary = np.all((np.power(weights, 2) - weights) == 0)
             if not is_binary:
@@ -272,16 +263,18 @@ def _check_weights(weights, score, n_obs, n_rep):
             assert score == "ATE"
             expected_keys = ["weights", "weights_bar"]
             if not set(weights.keys()) == set(expected_keys):
-                raise ValueError(f"weights must have keys {expected_keys}. "
-                                 f"keys {str(weights.keys())} were passed.")
+                raise ValueError(f"weights must have keys {expected_keys}. keys {str(weights.keys())} were passed.")
 
             expected_shapes = [(n_obs,), (n_obs, n_rep)]
             if weights["weights"].shape != expected_shapes[0]:
-                raise ValueError(f"weights must have shape {expected_shapes[0]}. "
-                                 f"weights of shape {weights['weights'].shape} was passed.")
+                raise ValueError(
+                    f"weights must have shape {expected_shapes[0]}. weights of shape {weights['weights'].shape} was passed."
+                )
             if weights["weights_bar"].shape != expected_shapes[1]:
-                raise ValueError(f"weights_bar must have shape {expected_shapes[1]}. "
-                                 f"weights_bar of shape {weights['weights_bar'].shape} was passed.")
+                raise ValueError(
+                    f"weights_bar must have shape {expected_shapes[1]}. "
+                    f"weights_bar of shape {weights['weights_bar'].shape} was passed."
+                )
             if (not np.all(weights["weights"] >= 0)) or (not np.all(weights["weights_bar"] >= 0)):
                 raise ValueError("All weights values must be greater or equal 0.")
             if (weights["weights"].sum() == 0) or (weights["weights_bar"].sum() == 0):
@@ -292,76 +285,101 @@ def _check_weights(weights, score, n_obs, n_rep):
 def _check_external_predictions(external_predictions, valid_treatments, valid_learners, n_obs, n_rep):
     if external_predictions is not None:
         if not isinstance(external_predictions, dict):
-            raise TypeError('external_predictions must be a dictionary. '
-                            f'{str(external_predictions)} of type {str(type(external_predictions))} was passed.')
+            raise TypeError(
+                "external_predictions must be a dictionary. "
+                f"{str(external_predictions)} of type {str(type(external_predictions))} was passed."
+            )
 
         supplied_treatments = list(external_predictions.keys())
         if not set(supplied_treatments).issubset(valid_treatments):
-            raise ValueError('Invalid external_predictions. '
-                             f'Invalid treatment variable in {str(supplied_treatments)}. '
-                             'Valid treatment variables ' + ' or '.join(valid_treatments) + '.')
+            raise ValueError(
+                "Invalid external_predictions. "
+                f"Invalid treatment variable in {str(supplied_treatments)}. "
+                "Valid treatment variables " + " or ".join(valid_treatments) + "."
+            )
 
         for treatment in supplied_treatments:
             if not isinstance(external_predictions[treatment], dict):
-                raise TypeError('external_predictions must be a nested dictionary. '
-                                f'For treatment {str(treatment)} a value of type '
-                                f'{str(type(external_predictions[treatment]))} was passed.')
+                raise TypeError(
+                    "external_predictions must be a nested dictionary. "
+                    f"For treatment {str(treatment)} a value of type "
+                    f"{str(type(external_predictions[treatment]))} was passed."
+                )
 
             supplied_learners = list(external_predictions[treatment].keys())
             if not set(supplied_learners).issubset(valid_learners):
-                raise ValueError('Invalid external_predictions. '
-                                 f'Invalid nuisance learner for treatment {str(treatment)} in {str(supplied_learners)}. '
-                                 'Valid nuisance learners ' + ' or '.join(valid_learners) + '.')
+                raise ValueError(
+                    "Invalid external_predictions. "
+                    f"Invalid nuisance learner for treatment {str(treatment)} in {str(supplied_learners)}. "
+                    "Valid nuisance learners " + " or ".join(valid_learners) + "."
+                )
 
             for learner in supplied_learners:
                 if not isinstance(external_predictions[treatment][learner], np.ndarray):
-                    raise TypeError('Invalid external_predictions. '
-                                    'The values of the nested list must be a numpy array. '
-                                    'Invalid predictions for treatment ' + str(treatment) +
-                                    ' and learner ' + str(learner) + '. ' +
-                                    f'Object of type {str(type(external_predictions[treatment][learner]))} was passed.')
+                    raise TypeError(
+                        "Invalid external_predictions. "
+                        "The values of the nested list must be a numpy array. "
+                        "Invalid predictions for treatment "
+                        + str(treatment)
+                        + " and learner "
+                        + str(learner)
+                        + ". "
+                        + f"Object of type {str(type(external_predictions[treatment][learner]))} was passed."
+                    )
 
                 expected_shape = (n_obs, n_rep)
                 if external_predictions[treatment][learner].shape != expected_shape:
-                    raise ValueError('Invalid external_predictions. '
-                                     f'The supplied predictions have to be of shape {str(expected_shape)}. '
-                                     'Invalid predictions for treatment ' + str(treatment) +
-                                     ' and learner ' + str(learner) + '. ' +
-                                     f'Predictions of shape {str(external_predictions[treatment][learner].shape)} passed.')
+                    raise ValueError(
+                        "Invalid external_predictions. "
+                        f"The supplied predictions have to be of shape {str(expected_shape)}. "
+                        "Invalid predictions for treatment "
+                        + str(treatment)
+                        + " and learner "
+                        + str(learner)
+                        + ". "
+                        + f"Predictions of shape {str(external_predictions[treatment][learner].shape)} passed."
+                    )
 
 
 def _check_bootstrap(method, n_rep_boot):
-
-    if (not isinstance(method, str)) | (method not in ['Bayes', 'normal', 'wild']):
-        raise ValueError('Method must be "Bayes", "normal" or "wild". '
-                         f'Got {str(method)}.')
+    if (not isinstance(method, str)) | (method not in ["Bayes", "normal", "wild"]):
+        raise ValueError(f'Method must be "Bayes", "normal" or "wild". Got {str(method)}.')
 
     if not isinstance(n_rep_boot, int):
-        raise TypeError('The number of bootstrap replications must be of int type. '
-                        f'{str(n_rep_boot)} of type {str(type(n_rep_boot))} was passed.')
+        raise TypeError(
+            "The number of bootstrap replications must be of int type. "
+            f"{str(n_rep_boot)} of type {str(type(n_rep_boot))} was passed."
+        )
     if n_rep_boot < 1:
-        raise ValueError('The number of bootstrap replications must be positive. '
-                         f'{str(n_rep_boot)} was passed.')
+        raise ValueError(f"The number of bootstrap replications must be positive. {str(n_rep_boot)} was passed.")
     return
 
 
 def _check_framework_compatibility(dml_framework_1, dml_framework_2, check_treatments=True):
     if not dml_framework_1.n_obs == dml_framework_2.n_obs:
-        raise ValueError('The number of observations in DoubleMLFrameworks must be the same. '
-                         f'Got {str(dml_framework_1.n_obs)} and {str(dml_framework_2.n_obs)}.')
+        raise ValueError(
+            "The number of observations in DoubleMLFrameworks must be the same. "
+            f"Got {str(dml_framework_1.n_obs)} and {str(dml_framework_2.n_obs)}."
+        )
 
     if not dml_framework_1.n_rep == dml_framework_2.n_rep:
-        raise ValueError('The number of replications in DoubleMLFrameworks must be the same. '
-                         f'Got {str(dml_framework_1.n_rep)} and {str(dml_framework_2.n_rep)}.')
+        raise ValueError(
+            "The number of replications in DoubleMLFrameworks must be the same. "
+            f"Got {str(dml_framework_1.n_rep)} and {str(dml_framework_2.n_rep)}."
+        )
 
     if check_treatments:
         if not dml_framework_1.n_thetas == dml_framework_2.n_thetas:
-            raise ValueError('The number of parameters theta in DoubleMLFrameworks must be the same. '
-                             f'Got {str(dml_framework_1.n_thetas)} and {str(dml_framework_2.n_thetas)}.')
+            raise ValueError(
+                "The number of parameters theta in DoubleMLFrameworks must be the same. "
+                f"Got {str(dml_framework_1.n_thetas)} and {str(dml_framework_2.n_thetas)}."
+            )
 
     if dml_framework_1._is_cluster_data != dml_framework_2._is_cluster_data:
-        raise ValueError('The cluster structure in DoubleMLFrameworks must be the same. '
-                         f'Got {str(dml_framework_1._is_cluster_data)} and {str(dml_framework_2._is_cluster_data)}.')
+        raise ValueError(
+            "The cluster structure in DoubleMLFrameworks must be the same. "
+            f"Got {str(dml_framework_1._is_cluster_data)} and {str(dml_framework_2._is_cluster_data)}."
+        )
     return
 
 
@@ -371,18 +389,17 @@ def _check_set(x):
 
 def _check_resampling_specification(n_folds, n_rep):
     if not isinstance(n_folds, int):
-        raise TypeError('The number of folds must be of int type. '
-                        f'{str(n_folds)} of type {str(type(n_folds))} was passed.')
+        raise TypeError(f"The number of folds must be of int type. {str(n_folds)} of type {str(type(n_folds))} was passed.")
     if n_folds < 2:
-        raise ValueError('The number of folds greater or equal to 2. '
-                         f'{str(n_folds)} was passed.')
+        raise ValueError(f"The number of folds greater or equal to 2. {str(n_folds)} was passed.")
 
     if not isinstance(n_rep, int):
-        raise TypeError('The number of repetitions for the sample splitting must be of int type. '
-                        f'{str(n_rep)} of type {str(type(n_rep))} was passed.')
+        raise TypeError(
+            "The number of repetitions for the sample splitting must be of int type. "
+            f"{str(n_rep)} of type {str(type(n_rep))} was passed."
+        )
     if n_rep < 1:
-        raise ValueError('The number of repetitions for the sample splitting has to be positive. '
-                         f'{str(n_rep)} was passed.')
+        raise ValueError(f"The number of repetitions for the sample splitting has to be positive. {str(n_rep)} was passed.")
     return
 
 
@@ -397,85 +414,86 @@ def _check_cluster_partitions(smpls, values):
 
 def _check_cluster_sample_splitting(all_smpls_cluster, dml_data, n_rep, n_folds):
     if all_smpls_cluster is None:
-        raise ValueError('For cluster data, all_smpls_cluster must be provided.')
+        raise ValueError("For cluster data, all_smpls_cluster must be provided.")
 
     n_rep_cluster = len(all_smpls_cluster)
     if n_rep_cluster != n_rep:
-        raise ValueError('Invalid samples provided. '
-                         'Number of repetitions for all_smpls and all_smpls_cluster must be the same.')
+        raise ValueError(
+            "Invalid samples provided. Number of repetitions for all_smpls and all_smpls_cluster must be the same."
+        )
 
     for i_rep in range(n_rep):
         n_folds_cluster = len(all_smpls_cluster[i_rep])
         if n_folds_cluster != n_folds:
-            raise ValueError('Invalid samples provided. '
-                             'Number of folds for all_smpls and all_smpls_cluster must be the same.')
+            raise ValueError("Invalid samples provided. Number of folds for all_smpls and all_smpls_cluster must be the same.")
         for i_cluster in range(dml_data.n_cluster_vars):
             this_cluster_var = dml_data.cluster_vars[:, i_cluster]
             clusters = np.unique(this_cluster_var)
             cluster_partition = [all_smpls_cluster[0][0][0][i_cluster], all_smpls_cluster[0][0][1][i_cluster]]
             is_cluster_partition = _check_cluster_partitions(cluster_partition, clusters)
             if not is_cluster_partition:
-                raise ValueError('Invalid cluster partition provided. '
-                                 'At least one inner list does not form a partition.')
+                raise ValueError("Invalid cluster partition provided. At least one inner list does not form a partition.")
 
     smpls_cluster = all_smpls_cluster
     return smpls_cluster
 
 
 def _check_sample_splitting(all_smpls, all_smpls_cluster, dml_data, is_cluster_data):
-
     if isinstance(all_smpls, tuple):
         if not len(all_smpls) == 2:
-            raise ValueError('Invalid partition provided. '
-                             'Tuple for train_ind and test_ind must consist of exactly two elements.')
+            raise ValueError(
+                "Invalid partition provided. Tuple for train_ind and test_ind must consist of exactly two elements."
+            )
         all_smpls = _check_smpl_split_tpl(all_smpls, dml_data.n_obs)
-        if (_check_is_partition([all_smpls], dml_data.n_obs) &
-                _check_is_partition([(all_smpls[1], all_smpls[0])], dml_data.n_obs)):
+        if _check_is_partition([all_smpls], dml_data.n_obs) & _check_is_partition(
+            [(all_smpls[1], all_smpls[0])], dml_data.n_obs
+        ):
             n_rep = 1
             n_folds = 1
             smpls = [[all_smpls]]
         else:
-            raise ValueError('Invalid partition provided. '
-                             'Tuple provided that doesn\'t form a partition.')
+            raise ValueError("Invalid partition provided. Tuple provided that doesn't form a partition.")
     else:
         if not isinstance(all_smpls, list):
-            raise TypeError('all_smpls must be of list or tuple type. '
-                            f'{str(all_smpls)} of type {str(type(all_smpls))} was passed.')
+            raise TypeError(
+                f"all_smpls must be of list or tuple type. {str(all_smpls)} of type {str(type(all_smpls))} was passed."
+            )
         all_tuple = all([isinstance(tpl, tuple) for tpl in all_smpls])
         if all_tuple:
             if not all([len(tpl) == 2 for tpl in all_smpls]):
-                raise ValueError('Invalid partition provided. '
-                                 'All tuples for train_ind and test_ind must consist of exactly two elements.')
+                raise ValueError(
+                    "Invalid partition provided. All tuples for train_ind and test_ind must consist of exactly two elements."
+                )
             n_rep = 1
             all_smpls = _check_smpl_split(all_smpls, dml_data.n_obs)
             if _check_is_partition(all_smpls, dml_data.n_obs):
-                if ((len(all_smpls) == 1) &
-                        _check_is_partition([(all_smpls[0][1], all_smpls[0][0])], dml_data.n_obs)):
+                if (len(all_smpls) == 1) & _check_is_partition([(all_smpls[0][1], all_smpls[0][0])], dml_data.n_obs):
                     n_folds = 1
                     smpls = [all_smpls]
                 else:
                     n_folds = len(all_smpls)
                     smpls = _check_all_smpls([all_smpls], dml_data.n_obs, check_intersect=True)
             else:
-                raise ValueError('Invalid partition provided. '
-                                 'Tuples provided that don\'t form a partition.')
+                raise ValueError("Invalid partition provided. Tuples provided that don't form a partition.")
         else:
             all_list = all([isinstance(smpl, list) for smpl in all_smpls])
             if not all_list:
-                raise ValueError('Invalid partition provided. '
-                                 'all_smpls is a list where neither all elements are tuples '
-                                 'nor all elements are lists.')
+                raise ValueError(
+                    "Invalid partition provided. "
+                    "all_smpls is a list where neither all elements are tuples "
+                    "nor all elements are lists."
+                )
             all_tuple = all([all([isinstance(tpl, tuple) for tpl in smpl]) for smpl in all_smpls])
             if not all_tuple:
-                raise TypeError('For repeated sample splitting all_smpls must be list of lists of tuples.')
+                raise TypeError("For repeated sample splitting all_smpls must be list of lists of tuples.")
             all_pairs = all([all([len(tpl) == 2 for tpl in smpl]) for smpl in all_smpls])
             if not all_pairs:
-                raise ValueError('Invalid partition provided. '
-                                 'All tuples for train_ind and test_ind must consist of exactly two elements.')
+                raise ValueError(
+                    "Invalid partition provided. All tuples for train_ind and test_ind must consist of exactly two elements."
+                )
             n_folds_each_smpl = np.array([len(smpl) for smpl in all_smpls])
             if not np.all(n_folds_each_smpl == n_folds_each_smpl[0]):
-                raise ValueError('Invalid partition provided. '
-                                 'Different number of folds for repeated sample splitting.')
+                raise ValueError("Invalid partition provided. Different number of folds for repeated sample splitting.")
             all_smpls = _check_all_smpls(all_smpls, dml_data.n_obs)
             smpls_are_partitions = [_check_is_partition(smpl, dml_data.n_obs) for smpl in all_smpls]
 
@@ -484,8 +502,7 @@ def _check_sample_splitting(all_smpls, all_smpls_cluster, dml_data, is_cluster_d
                 n_folds = int(n_folds_each_smpl[0])
                 smpls = _check_all_smpls(all_smpls, dml_data.n_obs, check_intersect=True)
             else:
-                raise ValueError('Invalid partition provided. '
-                                 'At least one inner list does not form a partition.')
+                raise ValueError("Invalid partition provided. At least one inner list does not form a partition.")
 
     if is_cluster_data:
         smpls_cluster = _check_cluster_sample_splitting(all_smpls_cluster, dml_data, n_rep, n_folds)
@@ -496,7 +513,9 @@ def _check_sample_splitting(all_smpls, all_smpls_cluster, dml_data, is_cluster_d
 
 
 def _check_supports_sample_weights(learner, learner_name):
-    if not ('sample_weight' in inspect.signature(learner.fit).parameters):
-        raise ValueError(f"The {learner_name} learner {str(learner)} does not support sample weights. "
-                         "Please choose a learner that supports sample weights.")
+    if "sample_weight" not in inspect.signature(learner.fit).parameters:
+        raise ValueError(
+            f"The {learner_name} learner {str(learner)} does not support sample weights. "
+            "Please choose a learner that supports sample weights."
+        )
     return
diff --git a/doubleml/utils/_descriptive.py b/doubleml/utils/_descriptive.py
index 54144bc8c..6868da677 100644
--- a/doubleml/utils/_descriptive.py
+++ b/doubleml/utils/_descriptive.py
@@ -3,9 +3,8 @@
 
 
 def generate_summary(coef, se, t_stat, pval, ci, index_names=None):
-    col_names = ['coef', 'std err', 't', 'P>|t|']
-    summary_stats = np.transpose(np.vstack(
-        [coef, se, t_stat, pval]))
+    col_names = ["coef", "std err", "t", "P>|t|"]
+    summary_stats = np.transpose(np.vstack([coef, se, t_stat, pval]))
     df_summary = pd.DataFrame(summary_stats, columns=col_names)
     if index_names is not None:
         df_summary.index = index_names
diff --git a/doubleml/utils/_estimation.py b/doubleml/utils/_estimation.py
index b88a75c7b..408c2f516 100644
--- a/doubleml/utils/_estimation.py
+++ b/doubleml/utils/_estimation.py
@@ -1,17 +1,14 @@
-import numpy as np
 import warnings
-from scipy.optimize import minimize_scalar
 
-from sklearn.model_selection import cross_val_predict
+import numpy as np
+from joblib import Parallel, delayed
+from scipy.optimize import minimize_scalar
 from sklearn.base import clone
+from sklearn.metrics import log_loss, root_mean_squared_error
+from sklearn.model_selection import GridSearchCV, KFold, RandomizedSearchCV, cross_val_predict
 from sklearn.preprocessing import LabelEncoder
-from sklearn.model_selection import KFold, GridSearchCV, RandomizedSearchCV
-from sklearn.metrics import root_mean_squared_error, log_loss
-
 from statsmodels.nonparametric.kde import KDEUnivariate
 
-from joblib import Parallel, delayed
-
 from ._checks import _check_is_partition
 
 
@@ -19,7 +16,7 @@ def _assure_2d_array(x):
     if x.ndim == 1:
         x = x.reshape(-1, 1)
     elif x.ndim > 2:
-        raise ValueError('Only one- or two-dimensional arrays are allowed')
+        raise ValueError("Only one- or two-dimensional arrays are allowed")
     return x
 
 
@@ -46,17 +43,19 @@ def _fit(estimator, x, y, train_index, idx=None):
     return estimator, idx
 
 
-def _dml_cv_predict(estimator, x, y, smpls=None,
-                    n_jobs=None, est_params=None, method='predict', return_train_preds=False, return_models=False):
+def _dml_cv_predict(
+    estimator, x, y, smpls=None, n_jobs=None, est_params=None, method="predict", return_train_preds=False, return_models=False
+):
     n_obs = x.shape[0]
 
     smpls_is_partition = _check_is_partition(smpls, n_obs)
     fold_specific_params = (est_params is not None) & (not isinstance(est_params, dict))
     fold_specific_target = isinstance(y, list)
-    manual_cv_predict = (not smpls_is_partition) | return_train_preds | fold_specific_params | fold_specific_target \
-        | return_models
+    manual_cv_predict = (
+        (not smpls_is_partition) | return_train_preds | fold_specific_params | fold_specific_target | return_models
+    )
 
-    res = {'models': None}
+    res = {"models": None}
     if not manual_cv_predict:
         if est_params is None:
             # if there are no parameters set we redirect to the standard method
@@ -65,25 +64,24 @@ def _dml_cv_predict(estimator, x, y, smpls=None,
             assert isinstance(est_params, dict)
             # if no fold-specific parameters we redirect to the standard method
             # warnings.warn("Using the same (hyper-)parameters for all folds")
-            preds = cross_val_predict(clone(estimator).set_params(**est_params), x, y, cv=smpls, n_jobs=n_jobs,
-                                      method=method)
-        if method == 'predict_proba':
-            res['preds'] = preds[:, 1]
+            preds = cross_val_predict(clone(estimator).set_params(**est_params), x, y, cv=smpls, n_jobs=n_jobs, method=method)
+        if method == "predict_proba":
+            res["preds"] = preds[:, 1]
         else:
-            res['preds'] = preds
-        res['targets'] = np.copy(y)
+            res["preds"] = preds
+        res["targets"] = np.copy(y)
     else:
         if not smpls_is_partition:
-            assert not fold_specific_target, 'combination of fold-specific y and no cross-fitting not implemented yet'
+            assert not fold_specific_target, "combination of fold-specific y and no cross-fitting not implemented yet"
             assert len(smpls) == 1
 
-        if method == 'predict_proba':
+        if method == "predict_proba":
             assert not fold_specific_target  # fold_specific_target only needed for PLIV.partialXZ
             y = np.asarray(y)
             le = LabelEncoder()
             y = le.fit_transform(y)
 
-        parallel = Parallel(n_jobs=n_jobs, verbose=0, pre_dispatch='2*n_jobs')
+        parallel = Parallel(n_jobs=n_jobs, verbose=0, pre_dispatch="2*n_jobs")
 
         if fold_specific_target:
             y_list = list()
@@ -96,19 +94,22 @@ def _dml_cv_predict(estimator, x, y, smpls=None,
             y_list = [y] * len(smpls)
 
         if est_params is None:
-            fitted_models = parallel(delayed(_fit)(
-                clone(estimator), x, y_list[idx], train_index, idx)
-                                     for idx, (train_index, test_index) in enumerate(smpls))
+            fitted_models = parallel(
+                delayed(_fit)(clone(estimator), x, y_list[idx], train_index, idx)
+                for idx, (train_index, test_index) in enumerate(smpls)
+            )
         elif isinstance(est_params, dict):
             # warnings.warn("Using the same (hyper-)parameters for all folds")
-            fitted_models = parallel(delayed(_fit)(
-                clone(estimator).set_params(**est_params), x, y_list[idx], train_index, idx)
-                                     for idx, (train_index, test_index) in enumerate(smpls))
+            fitted_models = parallel(
+                delayed(_fit)(clone(estimator).set_params(**est_params), x, y_list[idx], train_index, idx)
+                for idx, (train_index, test_index) in enumerate(smpls)
+            )
         else:
-            assert len(est_params) == len(smpls), 'provide one parameter setting per fold'
-            fitted_models = parallel(delayed(_fit)(
-                clone(estimator).set_params(**est_params[idx]), x, y_list[idx], train_index, idx)
-                                     for idx, (train_index, test_index) in enumerate(smpls))
+            assert len(est_params) == len(smpls), "provide one parameter setting per fold"
+            fitted_models = parallel(
+                delayed(_fit)(clone(estimator).set_params(**est_params[idx]), x, y_list[idx], train_index, idx)
+                for idx, (train_index, test_index) in enumerate(smpls)
+            )
 
         preds = np.full(n_obs, np.nan)
         targets = np.full(n_obs, np.nan)
@@ -117,7 +118,7 @@ def _dml_cv_predict(estimator, x, y, smpls=None,
         for idx, (train_index, test_index) in enumerate(smpls):
             assert idx == fitted_models[idx][1]
             pred_fun = getattr(fitted_models[idx][0], method)
-            if method == 'predict_proba':
+            if method == "predict_proba":
                 preds[test_index] = pred_fun(x[test_index, :])[:, 1]
             else:
                 preds[test_index] = pred_fun(x[test_index, :])
@@ -132,58 +133,60 @@ def _dml_cv_predict(estimator, x, y, smpls=None,
                 train_preds.append(pred_fun(x[train_index, :]))
                 train_targets.append(y[train_index])
 
-        res['preds'] = preds
-        res['targets'] = targets
+        res["preds"] = preds
+        res["targets"] = targets
         if return_train_preds:
-            res['train_preds'] = train_preds
-            res['train_targets'] = train_targets
+            res["train_preds"] = train_preds
+            res["train_targets"] = train_targets
         if return_models:
             fold_ids = [xx[1] for xx in fitted_models]
             if not np.all(fold_ids == np.arange(len(smpls))):
-                raise RuntimeError('export of fitted models failed')
-            res['models'] = [xx[0] for xx in fitted_models]
+                raise RuntimeError("export of fitted models failed")
+            res["models"] = [xx[0] for xx in fitted_models]
 
     return res
 
 
-def _dml_tune(y, x, train_inds,
-              learner, param_grid, scoring_method,
-              n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search):
+def _dml_tune(
+    y, x, train_inds, learner, param_grid, scoring_method, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+):
     tune_res = list()
     for train_index in train_inds:
         tune_resampling = KFold(n_splits=n_folds_tune, shuffle=True)
-        if search_mode == 'grid_search':
-            g_grid_search = GridSearchCV(learner, param_grid,
-                                         scoring=scoring_method,
-                                         cv=tune_resampling, n_jobs=n_jobs_cv)
+        if search_mode == "grid_search":
+            g_grid_search = GridSearchCV(learner, param_grid, scoring=scoring_method, cv=tune_resampling, n_jobs=n_jobs_cv)
         else:
-            assert search_mode == 'randomized_search'
-            g_grid_search = RandomizedSearchCV(learner, param_grid,
-                                               scoring=scoring_method,
-                                               cv=tune_resampling, n_jobs=n_jobs_cv,
-                                               n_iter=n_iter_randomized_search)
+            assert search_mode == "randomized_search"
+            g_grid_search = RandomizedSearchCV(
+                learner,
+                param_grid,
+                scoring=scoring_method,
+                cv=tune_resampling,
+                n_jobs=n_jobs_cv,
+                n_iter=n_iter_randomized_search,
+            )
         tune_res.append(g_grid_search.fit(x[train_index, :], y[train_index]))
 
     return tune_res
 
 
 def _draw_weights(method, n_rep_boot, n_obs):
-    if method == 'Bayes':
-        weights = np.random.exponential(scale=1.0, size=(n_rep_boot, n_obs)) - 1.
-    elif method == 'normal':
+    if method == "Bayes":
+        weights = np.random.exponential(scale=1.0, size=(n_rep_boot, n_obs)) - 1.0
+    elif method == "normal":
         weights = np.random.normal(loc=0.0, scale=1.0, size=(n_rep_boot, n_obs))
-    elif method == 'wild':
+    elif method == "wild":
         xx = np.random.normal(loc=0.0, scale=1.0, size=(n_rep_boot, n_obs))
         yy = np.random.normal(loc=0.0, scale=1.0, size=(n_rep_boot, n_obs))
         weights = xx / np.sqrt(2) + (np.power(yy, 2) - 1) / 2
     else:
-        raise ValueError('invalid boot method')
+        raise ValueError("invalid boot method")
 
     return weights
 
 
 def _trimm(preds, trimming_rule, trimming_threshold):
-    if trimming_rule == 'truncate':
+    if trimming_rule == "truncate":
         preds[preds < trimming_threshold] = trimming_threshold
         preds[preds > 1 - trimming_threshold] = 1 - trimming_threshold
     return preds
@@ -191,9 +194,10 @@ def _trimm(preds, trimming_rule, trimming_threshold):
 
 def _normalize_ipw(propensity, treatment):
     mean_treat1 = np.mean(np.divide(treatment, propensity))
-    mean_treat0 = np.mean(np.divide(1.0-treatment, 1.0-propensity))
-    normalized_weights = np.multiply(treatment, np.multiply(propensity, mean_treat1)) \
-        + np.multiply(1.0-treatment, 1.0 - np.multiply(1.0-propensity, mean_treat0))
+    mean_treat0 = np.mean(np.divide(1.0 - treatment, 1.0 - propensity))
+    normalized_weights = np.multiply(treatment, np.multiply(propensity, mean_treat1)) + np.multiply(
+        1.0 - treatment, 1.0 - np.multiply(1.0 - propensity, mean_treat0)
+    )
 
     return normalized_weights
 
@@ -231,14 +235,14 @@ def _get_bracket_guess(score, coef_start, coef_bounds):
         b_guess = (a, b)
         f_a = score(b_guess[0])
         f_b = score(b_guess[1])
-        s_different = (np.sign(f_a) != np.sign(f_b))
+        s_different = np.sign(f_a) != np.sign(f_b)
         delta += 0.1
     return s_different, b_guess
 
 
 def _default_kde(u, weights):
     dens = KDEUnivariate(u)
-    dens.fit(kernel='gau', bw='silverman', weights=weights, fft=False)
+    dens.fit(kernel="gau", bw="silverman", weights=weights, fft=False)
 
     return dens.evaluate(0)
 
@@ -247,9 +251,7 @@ def _solve_ipw_score(ipw_score, bracket_guess):
     def abs_ipw_score(theta):
         return abs(ipw_score(theta))
 
-    res = minimize_scalar(abs_ipw_score,
-                          bracket=bracket_guess,
-                          method='brent')
+    res = minimize_scalar(abs_ipw_score, bracket=bracket_guess, method="brent")
     ipw_est = res.x
     return ipw_est
 
@@ -271,9 +273,7 @@ def _aggregate_coefs_and_ses(all_coefs, all_ses, var_scaling_factors):
     return coefs, ses
 
 
-def _var_est(psi, psi_deriv, smpls, is_cluster_data,
-             cluster_vars=None, smpls_cluster=None, n_folds_per_cluster=None):
-
+def _var_est(psi, psi_deriv, smpls, is_cluster_data, cluster_vars=None, smpls_cluster=None, n_folds_per_cluster=None):
     if not is_cluster_data:
         # psi and psi_deriv should be of shape (n_obs, ...)
         var_scaling_factor = psi.shape[0]
@@ -299,7 +299,7 @@ def _var_est(psi, psi_deriv, smpls, is_cluster_data,
                 I_k = test_cluster_inds[0]
                 const = 1 / len(I_k)
                 for cluster_value in I_k:
-                    ind_cluster = (first_cluster_var == cluster_value)
+                    ind_cluster = first_cluster_var == cluster_value
                     gamma_hat += const * np.sum(np.outer(psi[ind_cluster], psi[ind_cluster]))
                 j_hat += np.sum(psi_deriv[test_inds]) / len(I_k)
 
diff --git a/doubleml/utils/_plots.py b/doubleml/utils/_plots.py
index 791203444..67b449b38 100644
--- a/doubleml/utils/_plots.py
+++ b/doubleml/utils/_plots.py
@@ -2,79 +2,91 @@
 import plotly.graph_objects as go
 
 
-def _sensitivity_contour_plot(x,
-                              y,
-                              contour_values,
-                              unadjusted_value,
-                              scenario_x,
-                              scenario_y,
-                              scenario_value,
-                              include_scenario,
-                              benchmarks=None,
-                              fill=True):
-
+def _sensitivity_contour_plot(
+    x,
+    y,
+    contour_values,
+    unadjusted_value,
+    scenario_x,
+    scenario_y,
+    scenario_value,
+    include_scenario,
+    benchmarks=None,
+    fill=True,
+):
     if fill:
-        text_col = 'white'
-        contours_coloring = 'heatmap'
+        text_col = "white"
+        contours_coloring = "heatmap"
     else:
-        text_col = 'black'
-        contours_coloring = 'lines'
+        text_col = "black"
+        contours_coloring = "lines"
 
     # create figure
-    axis_names = ['cf_d', 'cf_y ', 'Bound']
+    axis_names = ["cf_d", "cf_y ", "Bound"]
     fig = go.Figure()
     # basic contour plot
-    hov_temp = axis_names[0] + ': %{x:.3f}' + '<br>' + axis_names[1] + ': %{y:.3f}' + '</b>' +\
-        '<br>' + axis_names[2]
-    fig.add_trace(go.Contour(z=contour_values,
-                             x=x,
-                             y=y,
-                             hovertemplate=hov_temp + ': %{z:.3f}' + '</b>',
-                             contours=dict(coloring=contours_coloring,
-                                           showlabels=True,
-                                           labelfont=dict(size=12, color=text_col)),
-                             name='Contour'))
+    hov_temp = axis_names[0] + ": %{x:.3f}" + "<br>" + axis_names[1] + ": %{y:.3f}" + "</b>" + "<br>" + axis_names[2]
+    fig.add_trace(
+        go.Contour(
+            z=contour_values,
+            x=x,
+            y=y,
+            hovertemplate=hov_temp + ": %{z:.3f}" + "</b>",
+            contours=dict(coloring=contours_coloring, showlabels=True, labelfont=dict(size=12, color=text_col)),
+            name="Contour",
+        )
+    )
 
     if include_scenario:
-        fig.add_trace(go.Scatter(x=[scenario_x],
-                                 y=[scenario_y],
-                                 mode="markers+text",
-                                 marker=dict(size=10, color='red', line=dict(width=2, color=text_col)),
-                                 hovertemplate=hov_temp + f': {round(scenario_value, 3)}' + '</b>',
-                                 name='Scenario',
-                                 textfont=dict(color=text_col, size=14),
-                                 text=['<b>Scenario</b>'],
-                                 textposition="top right",
-                                 showlegend=False))
+        fig.add_trace(
+            go.Scatter(
+                x=[scenario_x],
+                y=[scenario_y],
+                mode="markers+text",
+                marker=dict(size=10, color="red", line=dict(width=2, color=text_col)),
+                hovertemplate=hov_temp + f": {round(scenario_value, 3)}" + "</b>",
+                name="Scenario",
+                textfont=dict(color=text_col, size=14),
+                text=["<b>Scenario</b>"],
+                textposition="top right",
+                showlegend=False,
+            )
+        )
 
     # add unadjusted
-    fig.add_trace(go.Scatter(x=[0],
-                             y=[0],
-                             mode="markers+text",
-                             marker=dict(size=10, color='red', line=dict(width=2, color=text_col)),
-                             hovertemplate=hov_temp + f': {round(unadjusted_value, 3)}' + '</b>',
-                             name='Unadjusted',
-                             text=['<b>Unadjusted</b>'],
-                             textfont=dict(color=text_col, size=14),
-                             textposition="top right",
-                             showlegend=False))
+    fig.add_trace(
+        go.Scatter(
+            x=[0],
+            y=[0],
+            mode="markers+text",
+            marker=dict(size=10, color="red", line=dict(width=2, color=text_col)),
+            hovertemplate=hov_temp + f": {round(unadjusted_value, 3)}" + "</b>",
+            name="Unadjusted",
+            text=["<b>Unadjusted</b>"],
+            textfont=dict(color=text_col, size=14),
+            textposition="top right",
+            showlegend=False,
+        )
+    )
 
     # add benchmarks
     if benchmarks is not None:
-        fig.add_trace(go.Scatter(x=benchmarks['cf_d'],
-                                 y=benchmarks['cf_y'],
-                                 customdata=benchmarks['value'].reshape(-1, 1),
-                                 mode="markers+text",
-                                 marker=dict(size=10, color='red', line=dict(width=2, color=text_col)),
-                                 hovertemplate=hov_temp + ': %{customdata[0]:.3f}' + '</b>',
-                                 name="Benchmarks",
-                                 textfont=dict(color=text_col, size=14),
-                                 text=list(map(lambda s: "<b>" + s + "</b>", benchmarks['name'])),
-                                 textposition="top right",
-                                 showlegend=False))
-    fig.update_layout(title=None,
-                      xaxis_title=axis_names[0],
-                      yaxis_title=axis_names[1])
+        fig.add_trace(
+            go.Scatter(
+                x=benchmarks["cf_d"],
+                y=benchmarks["cf_y"],
+                customdata=benchmarks["value"].reshape(-1, 1),
+                mode="markers+text",
+                marker=dict(size=10, color="red", line=dict(width=2, color=text_col)),
+                hovertemplate=hov_temp + ": %{customdata[0]:.3f}" + "</b>",
+                name="Benchmarks",
+                textfont=dict(color=text_col, size=14),
+                text=list(map(lambda s: "<b>" + s + "</b>", benchmarks["name"])),
+                textposition="top right",
+                showlegend=False,
+            )
+        )
+    fig.update_layout(title=None, xaxis_title=axis_names[0], yaxis_title=axis_names[1])
 
     fig.update_xaxes(range=[0, np.max(x)])
     fig.update_yaxes(range=[0, np.max(y)])
diff --git a/doubleml/utils/blp.py b/doubleml/utils/blp.py
index 24bd807b7..13844ee77 100644
--- a/doubleml/utils/blp.py
+++ b/doubleml/utils/blp.py
@@ -1,10 +1,10 @@
-import statsmodels.api as sm
-import numpy as np
-import pandas as pd
 import warnings
 
-from scipy.stats import norm
+import numpy as np
+import pandas as pd
+import statsmodels.api as sm
 from scipy.linalg import sqrtm
+from scipy.stats import norm
 
 
 class DoubleMLBLP:
@@ -26,26 +26,20 @@ class DoubleMLBLP:
         Default is ``False``.
     """
 
-    def __init__(self,
-                 orth_signal,
-                 basis,
-                 is_gate=False):
-
+    def __init__(self, orth_signal, basis, is_gate=False):
         if not isinstance(orth_signal, np.ndarray):
-            raise TypeError('The signal must be of np.ndarray type. '
-                            f'Signal of type {str(type(orth_signal))} was passed.')
+            raise TypeError(f"The signal must be of np.ndarray type. Signal of type {str(type(orth_signal))} was passed.")
 
         if orth_signal.ndim != 1:
-            raise ValueError('The signal must be of one dimensional. '
-                             f'Signal of dimensions {str(orth_signal.ndim)} was passed.')
+            raise ValueError(
+                f"The signal must be of one dimensional. Signal of dimensions {str(orth_signal.ndim)} was passed."
+            )
 
         if not isinstance(basis, pd.DataFrame):
-            raise TypeError('The basis must be of DataFrame type. '
-                            f'Basis of type {str(type(basis))} was passed.')
+            raise TypeError(f"The basis must be of DataFrame type. Basis of type {str(type(basis))} was passed.")
 
         if not basis.columns.is_unique:
-            raise ValueError('Invalid pd.DataFrame: '
-                             'Contains duplicate column names.')
+            raise ValueError("Invalid pd.DataFrame: Contains duplicate column names.")
 
         self._orth_signal = orth_signal
         self._basis = basis
@@ -57,10 +51,9 @@ def __init__(self,
 
     def __str__(self):
         class_name = self.__class__.__name__
-        header = f'================== {class_name} Object ==================\n'
+        header = f"================== {class_name} Object ==================\n"
         fit_summary = str(self.summary)
-        res = header + \
-            '\n------------------ Fit summary ------------------\n' + fit_summary
+        res = header + "\n------------------ Fit summary ------------------\n" + fit_summary
         return res
 
     @property
@@ -96,21 +89,22 @@ def summary(self):
         """
         A summary for the best linear predictor effect after calling :meth:`fit`.
         """
-        col_names = ['coef', 'std err', 't', 'P>|t|', '[0.025', '0.975]']
+        col_names = ["coef", "std err", "t", "P>|t|", "[0.025", "0.975]"]
         if self.blp_model is None:
             df_summary = pd.DataFrame(columns=col_names)
         else:
-            summary_stats = {'coef': self.blp_model.params,
-                             'std err': self.blp_model.bse,
-                             't': self.blp_model.tvalues,
-                             'P>|t|': self.blp_model.pvalues,
-                             '[0.025': self.blp_model.conf_int()[0],
-                             '0.975]': self.blp_model.conf_int()[1]}
-            df_summary = pd.DataFrame(summary_stats,
-                                      columns=col_names)
+            summary_stats = {
+                "coef": self.blp_model.params,
+                "std err": self.blp_model.bse,
+                "t": self.blp_model.tvalues,
+                "P>|t|": self.blp_model.pvalues,
+                "[0.025": self.blp_model.conf_int()[0],
+                "0.975]": self.blp_model.conf_int()[1],
+            }
+            df_summary = pd.DataFrame(summary_stats, columns=col_names)
         return df_summary
 
-    def fit(self, cov_type='HC0', **kwargs):
+    def fit(self, cov_type="HC0", **kwargs):
         """
         Estimate DoubleMLBLP models.
 
@@ -164,25 +158,23 @@ def confint(self, basis=None, joint=False, level=0.95, n_rep_boot=500):
             A data frame with the confidence interval(s).
         """
         if not isinstance(joint, bool):
-            raise TypeError('joint must be True or False. '
-                            f'Got {str(joint)}.')
+            raise TypeError(f"joint must be True or False. Got {str(joint)}.")
 
         if not isinstance(level, float):
-            raise TypeError('The confidence level must be of float type. '
-                            f'{str(level)} of type {str(type(level))} was passed.')
+            raise TypeError(f"The confidence level must be of float type. {str(level)} of type {str(type(level))} was passed.")
         if (level <= 0) | (level >= 1):
-            raise ValueError('The confidence level must be in (0,1). '
-                             f'{str(level)} was passed.')
+            raise ValueError(f"The confidence level must be in (0,1). {str(level)} was passed.")
 
         if not isinstance(n_rep_boot, int):
-            raise TypeError('The number of bootstrap replications must be of int type. '
-                            f'{str(n_rep_boot)} of type {str(type(n_rep_boot))} was passed.')
+            raise TypeError(
+                "The number of bootstrap replications must be of int type. "
+                f"{str(n_rep_boot)} of type {str(type(n_rep_boot))} was passed."
+            )
         if n_rep_boot < 1:
-            raise ValueError('The number of bootstrap replications must be positive. '
-                             f'{str(n_rep_boot)} was passed.')
+            raise ValueError(f"The number of bootstrap replications must be positive. {str(n_rep_boot)} was passed.")
 
         if self._blp_model is None:
-            raise ValueError('Apply fit() before confint().')
+            raise ValueError("Apply fit() before confint().")
 
         alpha = 1 - level
         gate_names = None
@@ -194,23 +186,26 @@ def confint(self, basis=None, joint=False, level=0.95, n_rep_boot=500):
                 gate_names = list(self._basis.columns.values)
             else:
                 if joint:
-                    warnings.warn('Returning pointwise confidence intervals for basis coefficients.', UserWarning)
+                    warnings.warn("Returning pointwise confidence intervals for basis coefficients.", UserWarning)
                 # return the confidence intervals for the basis coefficients
-                ci = np.vstack((
-                    self.blp_model.conf_int(alpha=alpha/2)[0],
-                    self.blp_model.params,
-                    self.blp_model.conf_int(alpha=alpha/2)[1])
-                    ).T
+                ci = np.vstack(
+                    (
+                        self.blp_model.conf_int(alpha=alpha / 2)[0],
+                        self.blp_model.params,
+                        self.blp_model.conf_int(alpha=alpha / 2)[1],
+                    )
+                ).T
                 df_ci = pd.DataFrame(
                     ci,
-                    columns=['{:.1f} %'.format(alpha/2 * 100), 'effect', '{:.1f} %'.format((1-alpha/2) * 100)],
-                    index=self._basis.columns)
+                    columns=["{:.1f} %".format(alpha / 2 * 100), "effect", "{:.1f} %".format((1 - alpha / 2) * 100)],
+                    index=self._basis.columns,
+                )
                 return df_ci
 
         elif not (basis.shape[1] == self._basis.shape[1]):
-            raise ValueError('Invalid basis: DataFrame has to have the exact same number and ordering of columns.')
+            raise ValueError("Invalid basis: DataFrame has to have the exact same number and ordering of columns.")
         elif not list(basis.columns.values) == list(self._basis.columns.values):
-            raise ValueError('Invalid basis: DataFrame has to have the exact same number and ordering of columns.')
+            raise ValueError("Invalid basis: DataFrame has to have the exact same number and ordering of columns.")
 
         # blp of the orthogonal signal
         g_hat = self._blp_model.predict(basis)
@@ -222,8 +217,7 @@ def confint(self, basis=None, joint=False, level=0.95, n_rep_boot=500):
         if joint:
             # calculate the maximum t-statistic with bootstrap
             normal_samples = np.random.normal(size=[basis.shape[1], n_rep_boot])
-            bootstrap_samples = np.multiply(np.dot(np_basis, np.dot(sqrtm(self._blp_omega), normal_samples)).T,
-                                            (1.0 / blp_se))
+            bootstrap_samples = np.multiply(np.dot(np_basis, np.dot(sqrtm(self._blp_omega), normal_samples)).T, (1.0 / blp_se))
 
             max_t_stat = np.quantile(np.max(np.abs(bootstrap_samples), axis=0), q=level)
 
@@ -239,9 +233,11 @@ def confint(self, basis=None, joint=False, level=0.95, n_rep_boot=500):
             g_hat_upper = g_hat + norm.ppf(q=1 - alpha / 2) * blp_se
 
         ci = np.vstack((g_hat_lower, g_hat, g_hat_upper)).T
-        df_ci = pd.DataFrame(ci,
-                             columns=['{:.1f} %'.format(alpha/2 * 100), 'effect', '{:.1f} %'.format((1-alpha/2) * 100)],
-                             index=basis.index)
+        df_ci = pd.DataFrame(
+            ci,
+            columns=["{:.1f} %".format(alpha / 2 * 100), "effect", "{:.1f} %".format((1 - alpha / 2) * 100)],
+            index=basis.index,
+        )
 
         if self._is_gate and gate_names is not None:
             df_ci.index = gate_names
diff --git a/doubleml/utils/gain_statistics.py b/doubleml/utils/gain_statistics.py
index 2fa233b33..482d45cc1 100644
--- a/doubleml/utils/gain_statistics.py
+++ b/doubleml/utils/gain_statistics.py
@@ -25,56 +25,76 @@ def gain_statistics(dml_long, dml_short):
     sensitivity_elements_short = dml_short.framework.sensitivity_elements
 
     if not isinstance(sensitivity_elements_long, dict):
-        raise TypeError("dml_long does not contain the necessary sensitivity elements. "
-                        "Expected dict for dml_long.framework.sensitivity_elements.")
-    expected_keys = ['sigma2', 'nu2']
+        raise TypeError(
+            "dml_long does not contain the necessary sensitivity elements. "
+            "Expected dict for dml_long.framework.sensitivity_elements."
+        )
+    expected_keys = ["sigma2", "nu2"]
     if not all(key in sensitivity_elements_long.keys() for key in expected_keys):
-        raise ValueError("dml_long does not contain the necessary sensitivity elements. "
-                         "Required keys are: " + str(expected_keys))
+        raise ValueError(
+            "dml_long does not contain the necessary sensitivity elements. Required keys are: " + str(expected_keys)
+        )
     if not isinstance(sensitivity_elements_short, dict):
-        raise TypeError("dml_short does not contain the necessary sensitivity elements. "
-                        "Expected dict for dml_short.framework.sensitivity_elements.")
+        raise TypeError(
+            "dml_short does not contain the necessary sensitivity elements. "
+            "Expected dict for dml_short.framework.sensitivity_elements."
+        )
     if not all(key in sensitivity_elements_short.keys() for key in expected_keys):
-        raise ValueError("dml_short does not contain the necessary sensitivity elements. "
-                         "Required keys are: " + str(expected_keys))
+        raise ValueError(
+            "dml_short does not contain the necessary sensitivity elements. Required keys are: " + str(expected_keys)
+        )
 
     for key in expected_keys:
         if not isinstance(sensitivity_elements_long[key], np.ndarray):
-            raise TypeError("dml_long does not contain the necessary sensitivity elements. "
-                            f"Expected numpy.ndarray for key {key}.")
+            raise TypeError(
+                f"dml_long does not contain the necessary sensitivity elements. Expected numpy.ndarray for key {key}."
+            )
         if not isinstance(sensitivity_elements_short[key], np.ndarray):
-            raise TypeError("dml_short does not contain the necessary sensitivity elements. "
-                            f"Expected numpy.ndarray for key {key}.")
+            raise TypeError(
+                f"dml_short does not contain the necessary sensitivity elements. Expected numpy.ndarray for key {key}."
+            )
         if len(sensitivity_elements_long[key].shape) != 3 or sensitivity_elements_long[key].shape[0] != 1:
-            raise ValueError("dml_long does not contain the necessary sensitivity elements. "
-                             f"Expected 3 dimensions of shape (1, n_coef, n_rep) for key {key}.")
+            raise ValueError(
+                "dml_long does not contain the necessary sensitivity elements. "
+                f"Expected 3 dimensions of shape (1, n_coef, n_rep) for key {key}."
+            )
         if len(sensitivity_elements_short[key].shape) != 3 or sensitivity_elements_short[key].shape[0] != 1:
-            raise ValueError("dml_short does not contain the necessary sensitivity elements. "
-                             f"Expected 3 dimensions of shape (1, n_coef, n_rep) for key {key}.")
+            raise ValueError(
+                "dml_short does not contain the necessary sensitivity elements. "
+                f"Expected 3 dimensions of shape (1, n_coef, n_rep) for key {key}."
+            )
         if not np.array_equal(sensitivity_elements_long[key].shape, sensitivity_elements_short[key].shape):
-            raise ValueError("dml_long and dml_short do not contain the same shape of sensitivity elements. "
-                             "Shapes of " + key + " are: " + str(sensitivity_elements_long[key].shape) +
-                             " and " + str(sensitivity_elements_short[key].shape))
+            raise ValueError(
+                "dml_long and dml_short do not contain the same shape of sensitivity elements. "
+                "Shapes of "
+                + key
+                + " are: "
+                + str(sensitivity_elements_long[key].shape)
+                + " and "
+                + str(sensitivity_elements_short[key].shape)
+            )
 
     if not isinstance(dml_long.all_coef, np.ndarray):
         raise TypeError("dml_long.all_coef does not contain the necessary coefficients. Expected numpy.ndarray.")
     if not isinstance(dml_short.all_coef, np.ndarray):
         raise TypeError("dml_short.all_coef does not contain the necessary coefficients. Expected numpy.ndarray.")
 
-    expected_shape = (sensitivity_elements_long['sigma2'].shape[1], sensitivity_elements_long['sigma2'].shape[2])
+    expected_shape = (sensitivity_elements_long["sigma2"].shape[1], sensitivity_elements_long["sigma2"].shape[2])
     if dml_long.all_coef.shape != expected_shape:
-        raise ValueError("dml_long.all_coef does not contain the necessary coefficients. Expected shape: " +
-                         str(expected_shape))
+        raise ValueError(
+            "dml_long.all_coef does not contain the necessary coefficients. Expected shape: " + str(expected_shape)
+        )
     if dml_short.all_coef.shape != expected_shape:
-        raise ValueError("dml_short.all_coef does not contain the necessary coefficients. Expected shape: " +
-                         str(expected_shape))
+        raise ValueError(
+            "dml_short.all_coef does not contain the necessary coefficients. Expected shape: " + str(expected_shape)
+        )
 
     # save elements for readability
     var_y = np.var(dml_long._dml_data.y)
-    var_y_residuals_long = np.squeeze(sensitivity_elements_long['sigma2'], axis=0)
-    nu2_long = np.squeeze(sensitivity_elements_long['nu2'], axis=0)
-    var_y_residuals_short = np.squeeze(sensitivity_elements_short['sigma2'], axis=0)
-    nu2_short = np.squeeze(sensitivity_elements_short['nu2'], axis=0)
+    var_y_residuals_long = np.squeeze(sensitivity_elements_long["sigma2"], axis=0)
+    nu2_long = np.squeeze(sensitivity_elements_long["nu2"], axis=0)
+    var_y_residuals_short = np.squeeze(sensitivity_elements_short["sigma2"], axis=0)
+    nu2_short = np.squeeze(sensitivity_elements_short["nu2"], axis=0)
 
     # compute nonparametric R2
     R2_y_long = 1.0 - np.divide(var_y_residuals_long, var_y)
@@ -96,11 +116,9 @@ def gain_statistics(dml_long, dml_short):
     var_riesz = nu2_long - nu2_short
     denom = np.sqrt(np.multiply(var_g, var_riesz), out=np.zeros_like(var_g), where=(var_g > 0) & (var_riesz > 0))
     rho_sign = np.sign(all_delta_theta)
-    rho_values = np.clip(np.divide(np.absolute(all_delta_theta),
-                                   denom,
-                                   out=np.ones_like(all_delta_theta),
-                                   where=denom != 0),
-                         0.0, 1.0)
+    rho_values = np.clip(
+        np.divide(np.absolute(all_delta_theta), denom, out=np.ones_like(all_delta_theta), where=denom != 0), 0.0, 1.0
+    )
     all_rho_benchmark = np.multiply(rho_values, rho_sign)
     rho_benchmark = np.median(all_rho_benchmark, axis=1)
     benchmark_dict = {
diff --git a/doubleml/utils/global_learner.py b/doubleml/utils/global_learner.py
index 1a38062a3..0949f9562 100644
--- a/doubleml/utils/global_learner.py
+++ b/doubleml/utils/global_learner.py
@@ -1,5 +1,4 @@
-from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin, is_regressor, is_classifier, clone
-
+from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin, clone, is_classifier, is_regressor
 from sklearn.utils.multiclass import unique_labels
 
 
@@ -12,10 +11,10 @@ class GlobalRegressor(BaseEstimator, RegressorMixin):
     base_estimator: regressor implementing ``fit()`` and ``predict()``
     Regressor that is used when ``fit()`` ``predict()`` and ``predict_proba()`` are being called.
     """
-    def __init__(self, base_estimator):
 
+    def __init__(self, base_estimator):
         if not is_regressor(base_estimator):
-            raise ValueError(f'base_estimator must be a regressor. Got {base_estimator.__class__.__name__} instead.')
+            raise ValueError(f"base_estimator must be a regressor. Got {base_estimator.__class__.__name__} instead.")
 
         self.base_estimator = base_estimator
 
@@ -60,10 +59,10 @@ class GlobalClassifier(BaseEstimator, ClassifierMixin):
     base_estimator: classifier implementing ``fit()`` and ``predict_proba()``
     Classifier that is used when ``fit()``, ``predict()`` and ``predict_proba()`` are being called.
     """
-    def __init__(self, base_estimator):
 
+    def __init__(self, base_estimator):
         if not is_classifier(base_estimator):
-            raise ValueError(f'base_estimator must be a classifier. Got {base_estimator.__class__.__name__} instead.')
+            raise ValueError(f"base_estimator must be a classifier. Got {base_estimator.__class__.__name__} instead.")
 
         self.base_estimator = base_estimator
 
diff --git a/doubleml/utils/policytree.py b/doubleml/utils/policytree.py
index f3aaa6321..ca771ad62 100644
--- a/doubleml/utils/policytree.py
+++ b/doubleml/utils/policytree.py
@@ -1,6 +1,5 @@
 import numpy as np
 import pandas as pd
-
 from sklearn.tree import DecisionTreeClassifier, plot_tree
 from sklearn.utils.validation import check_is_fitted
 
@@ -29,46 +28,37 @@ class DoubleMLPolicyTree:
 
     """
 
-    def __init__(self,
-                 orth_signal,
-                 features,
-                 depth=2,
-                 **tree_params):
-
+    def __init__(self, orth_signal, features, depth=2, **tree_params):
         if not isinstance(orth_signal, np.ndarray):
-            raise TypeError('The signal must be of np.ndarray type. '
-                            f'Signal of type {str(type(orth_signal))} was passed.')
+            raise TypeError(f"The signal must be of np.ndarray type. Signal of type {str(type(orth_signal))} was passed.")
 
         if orth_signal.ndim != 1:
-            raise ValueError('The signal must be of one dimensional. '
-                             f'Signal of dimensions {str(orth_signal.ndim)} was passed.')
+            raise ValueError(
+                f"The signal must be of one dimensional. Signal of dimensions {str(orth_signal.ndim)} was passed."
+            )
 
         if not isinstance(features, pd.DataFrame):
-            raise TypeError('The features must be of DataFrame type. '
-                            f'Features of type {str(type(features))} was passed.')
+            raise TypeError(f"The features must be of DataFrame type. Features of type {str(type(features))} was passed.")
 
         if not features.columns.is_unique:
-            raise ValueError('Invalid pd.DataFrame: '
-                             'Contains duplicate column names.')
+            raise ValueError("Invalid pd.DataFrame: Contains duplicate column names.")
 
         self._orth_signal = orth_signal
         self._features = features
         self._depth = depth
         self._tree_params = tree_params
 
-        self._tree_params.setdefault("ccp_alpha", .01)
+        self._tree_params.setdefault("ccp_alpha", 0.01)
         self._tree_params.setdefault("min_samples_leaf", 8)
 
         # initialize tree
-        self._policy_tree = DecisionTreeClassifier(max_depth=self._depth,
-                                                   **self._tree_params)
+        self._policy_tree = DecisionTreeClassifier(max_depth=self._depth, **self._tree_params)
 
     def __str__(self):
         class_name = self.__class__.__name__
-        header = f'================== {class_name} Object ==================\n'
+        header = f"================== {class_name} Object ==================\n"
         fit_summary = str(self.summary)
-        res = header + \
-            '\n------------------ Summary ------------------\n' + fit_summary
+        res = header + "\n------------------ Summary ------------------\n" + fit_summary
         return res
 
     @property
@@ -113,8 +103,7 @@ def fit(self):
 
         # fit the tree with target binary score, sample weights absolute score and
         # provided feature variables
-        self._policy_tree.fit(X=self._features, y=bin_signal,
-                              sample_weight=abs_signal)
+        self._policy_tree.fit(X=self._features, y=bin_signal, sample_weight=abs_signal)
 
         return self
 
@@ -126,10 +115,15 @@ def plot_tree(self):
         -------
         self : object
         """
-        check_is_fitted(self._policy_tree, msg='Policy Tree not yet fitted. Call fit before plot_tree.')
-
-        artists = plot_tree(self.policy_tree, feature_names=list(self._features.keys()), filled=True,
-                            class_names=["No Treatment", "Treatment"], impurity=False)
+        check_is_fitted(self._policy_tree, msg="Policy Tree not yet fitted. Call fit before plot_tree.")
+
+        artists = plot_tree(
+            self.policy_tree,
+            feature_names=list(self._features.keys()),
+            filled=True,
+            class_names=["No Treatment", "Treatment"],
+            impurity=False,
+        )
         return artists
 
     def predict(self, features):
@@ -147,15 +141,15 @@ def predict(self, features):
         -------
         self : object
         """
-        check_is_fitted(self._policy_tree, msg='Policy Tree not yet fitted. Call fit before predict.')
+        check_is_fitted(self._policy_tree, msg="Policy Tree not yet fitted. Call fit before predict.")
 
         if not isinstance(features, pd.DataFrame):
-            raise TypeError('The features must be of DataFrame type. '
-                            f'Features of type {str(type(features))} was passed.')
+            raise TypeError(f"The features must be of DataFrame type. Features of type {str(type(features))} was passed.")
 
         if not set(features.keys()) == set(self._features.keys()):
-            raise KeyError(f'The features must have the keys {self._features.keys()}. '
-                           f'Features with keys {features.keys()} were passed.')
+            raise KeyError(
+                f"The features must have the keys {self._features.keys()}. Features with keys {features.keys()} were passed."
+            )
 
         predictions = self.policy_tree.predict(features)
 
diff --git a/doubleml/utils/resampling.py b/doubleml/utils/resampling.py
index 63aec0eb1..188d2f248 100644
--- a/doubleml/utils/resampling.py
+++ b/doubleml/utils/resampling.py
@@ -1,22 +1,18 @@
 import numpy as np
-
 from sklearn.model_selection import KFold, RepeatedKFold, RepeatedStratifiedKFold
 
 
 class DoubleMLResampling:
-    def __init__(self,
-                 n_folds,
-                 n_rep,
-                 n_obs,
-                 stratify=None):
+    def __init__(self, n_folds, n_rep, n_obs, stratify=None):
         self.n_folds = n_folds
         self.n_rep = n_rep
         self.n_obs = n_obs
         self.stratify = stratify
 
         if n_folds < 2:
-            raise ValueError('n_folds must be greater than 1. '
-                             'You can use set_sample_splitting with a tuple to only use one fold.')
+            raise ValueError(
+                "n_folds must be greater than 1. You can use set_sample_splitting with a tuple to only use one fold."
+            )
 
         if self.stratify is None:
             self.resampling = RepeatedKFold(n_splits=n_folds, n_repeats=n_rep)
@@ -25,19 +21,12 @@ def __init__(self,
 
     def split_samples(self):
         all_smpls = [(train, test) for train, test in self.resampling.split(X=np.zeros(self.n_obs), y=self.stratify)]
-        smpls = [all_smpls[(i_repeat * self.n_folds):((i_repeat + 1) * self.n_folds)]
-                 for i_repeat in range(self.n_rep)]
+        smpls = [all_smpls[(i_repeat * self.n_folds) : ((i_repeat + 1) * self.n_folds)] for i_repeat in range(self.n_rep)]
         return smpls
 
 
 class DoubleMLClusterResampling:
-    def __init__(self,
-                 n_folds,
-                 n_rep,
-                 n_obs,
-                 n_cluster_vars,
-                 cluster_vars):
-
+    def __init__(self, n_folds, n_rep, n_obs, n_cluster_vars, cluster_vars):
         self.n_folds = n_folds
         self.n_rep = n_rep
         self.n_obs = n_obs
@@ -57,14 +46,16 @@ def split_samples(self):
                 this_cluster_var = self.cluster_vars[:, i_var]
                 clusters = np.unique(this_cluster_var)
                 n_clusters = len(clusters)
-                smpls_cluster_vars.append([(clusters[train], clusters[test])
-                                           for train, test in self.resampling.split(np.zeros(n_clusters))])
+                smpls_cluster_vars.append(
+                    [(clusters[train], clusters[test]) for train, test in self.resampling.split(np.zeros(n_clusters))]
+                )
 
             smpls = []
             smpls_cluster = []
             # build the cartesian product
-            cart = np.array(np.meshgrid(*[np.arange(self.n_folds)
-                                          for i in range(self.n_cluster_vars)])).T.reshape(-1, self.n_cluster_vars)
+            cart = np.array(np.meshgrid(*[np.arange(self.n_folds) for i in range(self.n_cluster_vars)])).T.reshape(
+                -1, self.n_cluster_vars
+            )
             for i_smpl in range(cart.shape[0]):
                 ind_train = np.full(self.n_obs, True)
                 ind_test = np.full(self.n_obs, True)
diff --git a/doubleml/utils/tests/_utils_blp_manual.py b/doubleml/utils/tests/_utils_blp_manual.py
index c64545aa3..923d9eea3 100644
--- a/doubleml/utils/tests/_utils_blp_manual.py
+++ b/doubleml/utils/tests/_utils_blp_manual.py
@@ -1,8 +1,8 @@
 import numpy as np
+import pandas as pd
 import statsmodels.api as sm
 from scipy.linalg import sqrtm
 from scipy.stats import norm
-import pandas as pd
 
 
 def fit_blp(orth_signal, basis, cov_type, **kwargs):
@@ -38,8 +38,7 @@ def blp_confint(blp_model, basis, joint=False, level=0.95, n_rep_boot=500):
         g_hat_upper = g_hat + norm.ppf(q=1 - alpha / 2) * blp_se
 
     ci = np.vstack((g_hat_lower, g_hat, g_hat_upper)).T
-    df_ci = pd.DataFrame(ci,
-                         columns=['{:.1f} %'.format(alpha / 2 * 100), 'effect',
-                                  '{:.1f} %'.format((1 - alpha / 2) * 100)],
-                         index=basis.index)
+    df_ci = pd.DataFrame(
+        ci, columns=["{:.1f} %".format(alpha / 2 * 100), "effect", "{:.1f} %".format((1 - alpha / 2) * 100)], index=basis.index
+    )
     return df_ci
diff --git a/doubleml/utils/tests/_utils_pt_manual.py b/doubleml/utils/tests/_utils_pt_manual.py
index 2af0d02f4..dabaf2c76 100644
--- a/doubleml/utils/tests/_utils_pt_manual.py
+++ b/doubleml/utils/tests/_utils_pt_manual.py
@@ -3,10 +3,8 @@
 
 
 def fit_policytree(orth_signal, features, depth):
-    policytree_model = DecisionTreeClassifier(max_depth=depth,
-                                              ccp_alpha=.01,
-                                              min_samples_leaf=8).fit(X=features,
-                                                                      y=(np.sign(orth_signal) + 1) / 2,
-                                                                      sample_weight=np.abs(orth_signal))
+    policytree_model = DecisionTreeClassifier(max_depth=depth, ccp_alpha=0.01, min_samples_leaf=8).fit(
+        X=features, y=(np.sign(orth_signal) + 1) / 2, sample_weight=np.abs(orth_signal)
+    )
 
     return policytree_model
diff --git a/doubleml/utils/tests/test_blp.py b/doubleml/utils/tests/test_blp.py
index 38c1fff42..d2faa4401 100644
--- a/doubleml/utils/tests/test_blp.py
+++ b/doubleml/utils/tests/test_blp.py
@@ -1,44 +1,42 @@
+import copy
+
 import numpy as np
 import pandas as pd
 import pytest
-import copy
 
 import doubleml as dml
-from ._utils_blp_manual import fit_blp, blp_confint
+
+from ._utils_blp_manual import blp_confint, fit_blp
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def ci_joint(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[0.95, 0.9])
+@pytest.fixture(scope="module", params=[0.95, 0.9])
 def ci_level(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=["nonrobust", "HC0", "HC1", "HC2", "HC3"])
+@pytest.fixture(scope="module", params=["nonrobust", "HC0", "HC1", "HC2", "HC3"])
 def cov_type(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
 def use_t(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_blp_fixture(ci_joint, ci_level, cov_type, use_t):
     n = 50
-    kwargs = {'cov_type': cov_type, 'use_t': use_t}
+    kwargs = {"cov_type": cov_type, "use_t": use_t}
 
     np.random.seed(42)
     random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 3)))
-    random_signal = np.random.normal(0, 1, size=(n, ))
+    random_signal = np.random.normal(0, 1, size=(n,))
 
     blp = dml.DoubleMLBLP(random_signal, random_basis)
 
@@ -50,72 +48,67 @@ def dml_blp_fixture(ci_joint, ci_level, cov_type, use_t):
     ci_1 = blp.confint(random_basis, joint=ci_joint, level=ci_level, n_rep_boot=1000)
     np.random.seed(42)
     ci_2 = blp.confint(joint=ci_joint, level=ci_level, n_rep_boot=1000)
-    expected_ci_2 = np.vstack((
-        blp.blp_model.conf_int(alpha=(1-ci_level)/2)[0],
-        blp.blp_model.params,
-        blp.blp_model.conf_int(alpha=(1-ci_level)/2)[1])).T
+    expected_ci_2 = np.vstack(
+        (
+            blp.blp_model.conf_int(alpha=(1 - ci_level) / 2)[0],
+            blp.blp_model.params,
+            blp.blp_model.conf_int(alpha=(1 - ci_level) / 2)[1],
+        )
+    ).T
 
     np.random.seed(42)
     ci_manual = blp_confint(blp_manual, random_basis, joint=ci_joint, level=ci_level, n_rep_boot=1000)
 
-    res_dict = {'coef': blp.blp_model.params,
-                'coef_manual': blp_manual.params,
-                'values': blp.blp_model.fittedvalues,
-                'values_manual':  blp_manual.fittedvalues,
-                'omega': blp.blp_omega,
-                'omega_manual': blp_manual.cov_params().to_numpy(),
-                'basis': blp.basis,
-                'signal': blp.orth_signal,
-                'ci_1': ci_1,
-                'ci_2': ci_2,
-                'expected_ci_2': expected_ci_2,
-                'ci_manual': ci_manual,
-                'blp_model': blp,
-                'unfitted_blp_model': blp_obj}
+    res_dict = {
+        "coef": blp.blp_model.params,
+        "coef_manual": blp_manual.params,
+        "values": blp.blp_model.fittedvalues,
+        "values_manual": blp_manual.fittedvalues,
+        "omega": blp.blp_omega,
+        "omega_manual": blp_manual.cov_params().to_numpy(),
+        "basis": blp.basis,
+        "signal": blp.orth_signal,
+        "ci_1": ci_1,
+        "ci_2": ci_2,
+        "expected_ci_2": expected_ci_2,
+        "ci_manual": ci_manual,
+        "blp_model": blp,
+        "unfitted_blp_model": blp_obj,
+    }
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_blp_coef(dml_blp_fixture):
-    assert np.allclose(dml_blp_fixture['coef'],
-                       dml_blp_fixture['coef_manual'],
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_blp_fixture["coef"], dml_blp_fixture["coef_manual"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_blp_values(dml_blp_fixture):
-    assert np.allclose(dml_blp_fixture['values'],
-                       dml_blp_fixture['values_manual'],
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_blp_fixture["values"], dml_blp_fixture["values_manual"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_blp_omega(dml_blp_fixture):
-    assert np.allclose(dml_blp_fixture['omega'],
-                       dml_blp_fixture['omega_manual'],
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_blp_fixture["omega"], dml_blp_fixture["omega_manual"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_blp_ci_2(dml_blp_fixture):
-    assert np.allclose(dml_blp_fixture['expected_ci_2'],
-                       dml_blp_fixture['ci_2'],
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_blp_fixture["expected_ci_2"], dml_blp_fixture["ci_2"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_blp_ci_1(dml_blp_fixture):
-    assert np.allclose(dml_blp_fixture['ci_1'],
-                       dml_blp_fixture['ci_manual'],
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(dml_blp_fixture["ci_1"], dml_blp_fixture["ci_manual"], rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_blp_return_types(dml_blp_fixture):
-    assert isinstance(dml_blp_fixture['blp_model'].__str__(), str)
-    assert isinstance(dml_blp_fixture['blp_model'].summary, pd.DataFrame)
-    assert isinstance(dml_blp_fixture['unfitted_blp_model'].summary, pd.DataFrame)
+    assert isinstance(dml_blp_fixture["blp_model"].__str__(), str)
+    assert isinstance(dml_blp_fixture["blp_model"].summary, pd.DataFrame)
+    assert isinstance(dml_blp_fixture["unfitted_blp_model"].summary, pd.DataFrame)
 
 
 @pytest.mark.ci
@@ -123,14 +116,12 @@ def test_dml_blp_defaults():
     n = 50
     np.random.seed(42)
     random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 3)))
-    random_signal = np.random.normal(0, 1, size=(n, ))
+    random_signal = np.random.normal(0, 1, size=(n,))
 
     blp = dml.DoubleMLBLP(random_signal, random_basis)
     blp.fit()
 
-    assert np.allclose(blp.blp_omega,
-                       blp.blp_model.cov_HC0,
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(blp.blp_omega, blp.blp_model.cov_HC0, rtol=1e-9, atol=1e-4)
 
     assert blp._is_gate is False
 
@@ -143,41 +134,40 @@ def test_doubleml_exception_blp():
     msg = "The signal must be of np.ndarray type. Signal of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml.DoubleMLBLP(orth_signal=1, basis=random_basis)
-    msg = 'The signal must be of one dimensional. Signal of dimensions 2 was passed.'
+    msg = "The signal must be of one dimensional. Signal of dimensions 2 was passed."
     with pytest.raises(ValueError, match=msg):
         dml.DoubleMLBLP(orth_signal=np.array([[1], [2]]), basis=random_basis)
     msg = "The basis must be of DataFrame type. Basis of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml.DoubleMLBLP(orth_signal=signal, basis=1)
-    msg = 'Invalid pd.DataFrame: Contains duplicate column names.'
+    msg = "Invalid pd.DataFrame: Contains duplicate column names."
     with pytest.raises(ValueError, match=msg):
-        dml.DoubleMLBLP(orth_signal=signal, basis=pd.DataFrame(np.array([[1, 2], [4, 5]]),
-                                                               columns=['a_1', 'a_1']))
+        dml.DoubleMLBLP(orth_signal=signal, basis=pd.DataFrame(np.array([[1, 2], [4, 5]]), columns=["a_1", "a_1"]))
 
     dml_blp_confint = dml.DoubleMLBLP(orth_signal=signal, basis=random_basis)
-    msg = r'Apply fit\(\) before confint\(\).'
+    msg = r"Apply fit\(\) before confint\(\)."
     with pytest.raises(ValueError, match=msg):
         dml_blp_confint.confint(random_basis)
 
     dml_blp_confint.fit()
-    msg = 'joint must be True or False. Got 1.'
+    msg = "joint must be True or False. Got 1."
     with pytest.raises(TypeError, match=msg):
         dml_blp_confint.confint(random_basis, joint=1)
     msg = "The confidence level must be of float type. 5% of type <class 'str'> was passed."
     with pytest.raises(TypeError, match=msg):
-        dml_blp_confint.confint(random_basis, level='5%')
-    msg = r'The confidence level must be in \(0,1\). 0.0 was passed.'
+        dml_blp_confint.confint(random_basis, level="5%")
+    msg = r"The confidence level must be in \(0,1\). 0.0 was passed."
     with pytest.raises(ValueError, match=msg):
-        dml_blp_confint.confint(random_basis, level=0.)
+        dml_blp_confint.confint(random_basis, level=0.0)
     msg = "The number of bootstrap replications must be of int type. 500 of type <class 'str'> was passed."
     with pytest.raises(TypeError, match=msg):
-        dml_blp_confint.confint(random_basis, n_rep_boot='500')
-    msg = 'The number of bootstrap replications must be positive. 0 was passed.'
+        dml_blp_confint.confint(random_basis, n_rep_boot="500")
+    msg = "The number of bootstrap replications must be positive. 0 was passed."
     with pytest.raises(ValueError, match=msg):
         dml_blp_confint.confint(random_basis, n_rep_boot=0)
-    msg = 'Invalid basis: DataFrame has to have the exact same number and ordering of columns.'
+    msg = "Invalid basis: DataFrame has to have the exact same number and ordering of columns."
     with pytest.raises(ValueError, match=msg):
-        dml_blp_confint.confint(basis=pd.DataFrame(np.array([[1], [4]]), columns=['a_1']))
-    msg = 'Invalid basis: DataFrame has to have the exact same number and ordering of columns.'
+        dml_blp_confint.confint(basis=pd.DataFrame(np.array([[1], [4]]), columns=["a_1"]))
+    msg = "Invalid basis: DataFrame has to have the exact same number and ordering of columns."
     with pytest.raises(ValueError, match=msg):
-        dml_blp_confint.confint(basis=pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6]]), columns=['x_1', 'x_2', 'x_3']))
+        dml_blp_confint.confint(basis=pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6]]), columns=["x_1", "x_2", "x_3"]))
diff --git a/doubleml/utils/tests/test_dummy_learners.py b/doubleml/utils/tests/test_dummy_learners.py
index c23088faa..846166ff7 100644
--- a/doubleml/utils/tests/test_dummy_learners.py
+++ b/doubleml/utils/tests/test_dummy_learners.py
@@ -1,8 +1,9 @@
-import pytest
 import numpy as np
-from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
+import pytest
 from sklearn.base import clone
 
+from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor
+
 
 @pytest.fixture(scope="module")
 def dl_fixture():
diff --git a/doubleml/utils/tests/test_exceptions_gain_statistics.py b/doubleml/utils/tests/test_exceptions_gain_statistics.py
index 734185eb4..c434c689b 100644
--- a/doubleml/utils/tests/test_exceptions_gain_statistics.py
+++ b/doubleml/utils/tests/test_exceptions_gain_statistics.py
@@ -1,15 +1,15 @@
-import pytest
 import numpy as np
+import pytest
 
 from doubleml.utils.gain_statistics import gain_statistics
 
 
-class test_framework():
+class test_framework:
     def __init__(self, sensitivity_elements):
         self.sensitivity_elements = sensitivity_elements
 
 
-class test_dml_class():
+class test_dml_class:
     def __init__(self, sensitivity_elements, all_coef):
         self.framework = test_framework(sensitivity_elements)
         self.all_coef = all_coef
@@ -24,17 +24,16 @@ def __init__(self, sensitivity_elements, all_coef):
 def test_doubleml_exception_data():
     dml_correct = test_dml_class(
         sensitivity_elements={
-            'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)),
-            'nu2': np.random.normal(size=(n_obs, n_rep, n_coef))
+            "sigma2": np.random.normal(size=(n_obs, n_rep, n_coef)),
+            "nu2": np.random.normal(size=(n_obs, n_rep, n_coef)),
         },
-        all_coef=np.random.normal(size=(n_rep, n_coef))
+        all_coef=np.random.normal(size=(n_rep, n_coef)),
     )
 
     # incorrect types
     dml_incorrect = test_dml_class(
-            sensitivity_elements=np.random.normal(size=(n_obs, n_rep, n_coef)),
-            all_coef=np.random.normal(size=(n_rep, n_coef))
-        )
+        sensitivity_elements=np.random.normal(size=(n_obs, n_rep, n_coef)), all_coef=np.random.normal(size=(n_rep, n_coef))
+    )
     msg = r"dml_long does not contain the necessary sensitivity elements\. "
     msg += r"Expected dict for dml_long\.framework\.sensitivity_elements\."
     with pytest.raises(TypeError, match=msg):
@@ -46,11 +45,11 @@ def test_doubleml_exception_data():
 
     # incorrect keys
     dml_incorrect = test_dml_class(
-            sensitivity_elements={
-                'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)),
-            },
-            all_coef=np.random.normal(size=(n_rep, n_coef))
-        )
+        sensitivity_elements={
+            "sigma2": np.random.normal(size=(n_obs, n_rep, n_coef)),
+        },
+        all_coef=np.random.normal(size=(n_rep, n_coef)),
+    )
     msg = r"dml_long does not contain the necessary sensitivity elements\. Required keys are: \['sigma2', 'nu2'\]"
     with pytest.raises(ValueError, match=msg):
         _ = gain_statistics(dml_incorrect, dml_correct)
@@ -60,12 +59,9 @@ def test_doubleml_exception_data():
 
     # incorrect type for keys
     dml_incorrect = test_dml_class(
-            sensitivity_elements={
-                'sigma2': {},
-                'nu2': np.random.normal(size=(n_obs, n_rep, n_coef))
-            },
-            all_coef=np.random.normal(size=(n_rep, n_coef))
-        )
+        sensitivity_elements={"sigma2": {}, "nu2": np.random.normal(size=(n_obs, n_rep, n_coef))},
+        all_coef=np.random.normal(size=(n_rep, n_coef)),
+    )
     msg = r"dml_long does not contain the necessary sensitivity elements\. Expected numpy\.ndarray for key sigma2\."
     with pytest.raises(TypeError, match=msg):
         _ = gain_statistics(dml_incorrect, dml_correct)
@@ -74,11 +70,8 @@ def test_doubleml_exception_data():
         _ = gain_statistics(dml_correct, dml_incorrect)
 
     dml_incorrect = test_dml_class(
-        sensitivity_elements={
-            'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)),
-            'nu2': {}
-        },
-        all_coef=np.random.normal(size=(n_rep, n_coef))
+        sensitivity_elements={"sigma2": np.random.normal(size=(n_obs, n_rep, n_coef)), "nu2": {}},
+        all_coef=np.random.normal(size=(n_rep, n_coef)),
     )
     msg = r"dml_long does not contain the necessary sensitivity elements\. Expected numpy\.ndarray for key nu2\."
     with pytest.raises(TypeError, match=msg):
@@ -89,45 +82,53 @@ def test_doubleml_exception_data():
 
     # incorrect shape for keys
     dml_incorrect = test_dml_class(
-            sensitivity_elements={
-                'sigma2': np.random.normal(size=(n_obs + 1, n_rep, n_coef)),
-                'nu2': np.random.normal(size=(n_obs, n_rep, n_coef))
-            },
-            all_coef=np.random.normal(size=(n_rep, n_coef))
-        )
-    msg = (r"dml_long does not contain the necessary sensitivity elements\. "
-           r"Expected 3 dimensions of shape \(1, n_coef, n_rep\) for key sigma2\.")
+        sensitivity_elements={
+            "sigma2": np.random.normal(size=(n_obs + 1, n_rep, n_coef)),
+            "nu2": np.random.normal(size=(n_obs, n_rep, n_coef)),
+        },
+        all_coef=np.random.normal(size=(n_rep, n_coef)),
+    )
+    msg = (
+        r"dml_long does not contain the necessary sensitivity elements\. "
+        r"Expected 3 dimensions of shape \(1, n_coef, n_rep\) for key sigma2\."
+    )
     with pytest.raises(ValueError, match=msg):
         _ = gain_statistics(dml_incorrect, dml_correct)
-    msg = (r"dml_short does not contain the necessary sensitivity elements\. "
-           r"Expected 3 dimensions of shape \(1, n_coef, n_rep\) for key sigma2\.")
+    msg = (
+        r"dml_short does not contain the necessary sensitivity elements\. "
+        r"Expected 3 dimensions of shape \(1, n_coef, n_rep\) for key sigma2\."
+    )
     with pytest.raises(ValueError, match=msg):
         _ = gain_statistics(dml_correct, dml_incorrect)
 
     dml_incorrect = test_dml_class(
-            sensitivity_elements={
-                'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)),
-                'nu2': np.random.normal(size=(n_obs + 1, n_rep, n_coef))
-            },
-            all_coef=np.random.normal(size=(n_rep, n_coef))
-        )
-    msg = (r"dml_long does not contain the necessary sensitivity elements\. "
-           r"Expected 3 dimensions of shape \(1, n_coef, n_rep\) for key nu2\.")
+        sensitivity_elements={
+            "sigma2": np.random.normal(size=(n_obs, n_rep, n_coef)),
+            "nu2": np.random.normal(size=(n_obs + 1, n_rep, n_coef)),
+        },
+        all_coef=np.random.normal(size=(n_rep, n_coef)),
+    )
+    msg = (
+        r"dml_long does not contain the necessary sensitivity elements\. "
+        r"Expected 3 dimensions of shape \(1, n_coef, n_rep\) for key nu2\."
+    )
     with pytest.raises(ValueError, match=msg):
         _ = gain_statistics(dml_incorrect, dml_correct)
-    msg = (r"dml_short does not contain the necessary sensitivity elements\. "
-           r"Expected 3 dimensions of shape \(1, n_coef, n_rep\) for key nu2\.")
+    msg = (
+        r"dml_short does not contain the necessary sensitivity elements\. "
+        r"Expected 3 dimensions of shape \(1, n_coef, n_rep\) for key nu2\."
+    )
     with pytest.raises(ValueError, match=msg):
         _ = gain_statistics(dml_correct, dml_incorrect)
 
     # conflicting shape for keys
     dml_incorrect = test_dml_class(
-            sensitivity_elements={
-                'sigma2': np.random.normal(size=(n_obs, n_rep + 1, n_coef)),
-                'nu2': np.random.normal(size=(n_obs, n_rep, n_coef))
-            },
-            all_coef=np.random.normal(size=(n_rep, n_coef))
-        )
+        sensitivity_elements={
+            "sigma2": np.random.normal(size=(n_obs, n_rep + 1, n_coef)),
+            "nu2": np.random.normal(size=(n_obs, n_rep, n_coef)),
+        },
+        all_coef=np.random.normal(size=(n_rep, n_coef)),
+    )
     msg = r"dml_long and dml_short do not contain the same shape of sensitivity elements\. "
     msg += r"Shapes of sigma2 are: \(1, 4, 5\) and \(1, 3, 5\)"
     with pytest.raises(ValueError, match=msg):
@@ -138,12 +139,12 @@ def test_doubleml_exception_data():
         _ = gain_statistics(dml_correct, dml_incorrect)
 
     dml_incorrect = test_dml_class(
-            sensitivity_elements={
-                'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)),
-                'nu2': np.random.normal(size=(n_obs, n_rep + 1, n_coef))
-            },
-            all_coef=np.random.normal(size=(n_rep, n_coef))
-        )
+        sensitivity_elements={
+            "sigma2": np.random.normal(size=(n_obs, n_rep, n_coef)),
+            "nu2": np.random.normal(size=(n_obs, n_rep + 1, n_coef)),
+        },
+        all_coef=np.random.normal(size=(n_rep, n_coef)),
+    )
     msg = r"dml_long and dml_short do not contain the same shape of sensitivity elements\. "
     msg += r"Shapes of nu2 are: \(1, 4, 5\) and \(1, 3, 5\)"
     with pytest.raises(ValueError, match=msg):
@@ -155,12 +156,12 @@ def test_doubleml_exception_data():
 
     # incorrect type for all_coef
     dml_incorrect = test_dml_class(
-            sensitivity_elements={
-                'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)),
-                'nu2': np.random.normal(size=(n_obs, n_rep, n_coef))
-            },
-            all_coef={}
-        )
+        sensitivity_elements={
+            "sigma2": np.random.normal(size=(n_obs, n_rep, n_coef)),
+            "nu2": np.random.normal(size=(n_obs, n_rep, n_coef)),
+        },
+        all_coef={},
+    )
     msg = r"dml_long\.all_coef does not contain the necessary coefficients\. Expected numpy\.ndarray\."
     with pytest.raises(TypeError, match=msg):
         _ = gain_statistics(dml_incorrect, dml_correct)
@@ -170,12 +171,12 @@ def test_doubleml_exception_data():
 
     # incorrect shape for all_coef
     dml_incorrect = test_dml_class(
-            sensitivity_elements={
-                'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)),
-                'nu2': np.random.normal(size=(n_obs, n_rep, n_coef))
-            },
-            all_coef=np.random.normal(size=(n_rep, n_coef + 1))
-        )
+        sensitivity_elements={
+            "sigma2": np.random.normal(size=(n_obs, n_rep, n_coef)),
+            "nu2": np.random.normal(size=(n_obs, n_rep, n_coef)),
+        },
+        all_coef=np.random.normal(size=(n_rep, n_coef + 1)),
+    )
     msg = r"dml_long\.all_coef does not contain the necessary coefficients\. Expected shape: \(3, 5\)"
     with pytest.raises(ValueError, match=msg):
         _ = gain_statistics(dml_incorrect, dml_correct)
diff --git a/doubleml/utils/tests/test_exceptions_global_learners.py b/doubleml/utils/tests/test_exceptions_global_learners.py
index ccd393222..0f601d70a 100644
--- a/doubleml/utils/tests/test_exceptions_global_learners.py
+++ b/doubleml/utils/tests/test_exceptions_global_learners.py
@@ -1,7 +1,7 @@
 import pytest
-from sklearn.linear_model import LogisticRegression, LinearRegression
+from sklearn.linear_model import LinearRegression, LogisticRegression
 
-from doubleml.utils import GlobalRegressor, GlobalClassifier
+from doubleml.utils import GlobalClassifier, GlobalRegressor
 
 
 @pytest.mark.ci
diff --git a/doubleml/utils/tests/test_global_learners.py b/doubleml/utils/tests/test_global_learners.py
index 9cae0941d..f549f71c6 100644
--- a/doubleml/utils/tests/test_global_learners.py
+++ b/doubleml/utils/tests/test_global_learners.py
@@ -1,28 +1,29 @@
-import pytest
 import numpy as np
-from doubleml.utils import GlobalRegressor, GlobalClassifier
+import pytest
 from sklearn.base import clone
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 from sklearn.linear_model import LinearRegression, LogisticRegression
-from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
 
+from doubleml.utils import GlobalClassifier, GlobalRegressor
 
-@pytest.fixture(scope='module',
-                params=[LinearRegression(),
-                        RandomForestRegressor(n_estimators=10, max_depth=2, random_state=42)])
+
+@pytest.fixture(
+    scope="module", params=[LinearRegression(), RandomForestRegressor(n_estimators=10, max_depth=2, random_state=42)]
+)
 def regressor(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[LogisticRegression(random_state=42),
-                        RandomForestClassifier(n_estimators=10, max_depth=2, random_state=42)])
+@pytest.fixture(
+    scope="module",
+    params=[LogisticRegression(random_state=42), RandomForestClassifier(n_estimators=10, max_depth=2, random_state=42)],
+)
 def classifier(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
 def gl_fixture(regressor, classifier):
-
     global_reg = GlobalRegressor(base_estimator=regressor)
     weighted_reg = clone(regressor)
     unweighted_reg = clone(regressor)
@@ -76,7 +77,7 @@ def gl_fixture(regressor, classifier):
         "unweighted_clas_pred": unweighted_clas_pred,
         "global_clas_pred_proba": global_clas_pred_proba,
         "weighted_clas_pred_proba": weighted_clas_pred_proba,
-        "unweighted_clas_pred_proba": unweighted_clas_pred_proba
+        "unweighted_clas_pred_proba": unweighted_clas_pred_proba,
     }
 
     return result_dict
diff --git a/doubleml/utils/tests/test_policytree.py b/doubleml/utils/tests/test_policytree.py
index 25f51a387..28c2ab7c2 100644
--- a/doubleml/utils/tests/test_policytree.py
+++ b/doubleml/utils/tests/test_policytree.py
@@ -1,27 +1,27 @@
+import copy
+
 import numpy as np
 import pandas as pd
 import pytest
-import copy
+from sklearn.exceptions import NotFittedError
+from sklearn.tree import DecisionTreeClassifier
 
 import doubleml as dml
 
 from ._utils_pt_manual import fit_policytree
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.exceptions import NotFittedError
 
 
-@pytest.fixture(scope='module',
-                params=[1, 2, 3])
+@pytest.fixture(scope="module", params=[1, 2, 3])
 def depth(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def dml_policytree_fixture(depth):
     n = 50
     np.random.seed(42)
     random_x_var = pd.DataFrame(np.random.normal(0, 1, size=(n, 3)))
-    random_signal = np.random.normal(0, 1, size=(n, ))
+    random_signal = np.random.normal(0, 1, size=(n,))
 
     policy_tree = dml.DoubleMLPolicyTree(random_signal, random_x_var, depth)
 
@@ -31,61 +31,65 @@ def dml_policytree_fixture(depth):
     np.random.seed(42)
     policy_tree_manual = fit_policytree(random_signal, random_x_var, depth)
 
-    res_dict = {'tree': policy_tree.policy_tree.tree_,
-                'tree_manual': policy_tree_manual.tree_,
-                'features': policy_tree.features,
-                'signal': policy_tree.orth_signal,
-                'policytree_model': policy_tree,
-                'unfitted_policytree_model': policy_tree_obj}
+    res_dict = {
+        "tree": policy_tree.policy_tree.tree_,
+        "tree_manual": policy_tree_manual.tree_,
+        "features": policy_tree.features,
+        "signal": policy_tree.orth_signal,
+        "policytree_model": policy_tree,
+        "unfitted_policytree_model": policy_tree_obj,
+    }
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_dml_policytree_treshold(dml_policytree_fixture):
-    assert np.allclose(dml_policytree_fixture['tree'].threshold,
-                       dml_policytree_fixture['tree_manual'].threshold,
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(
+        dml_policytree_fixture["tree"].threshold, dml_policytree_fixture["tree_manual"].threshold, rtol=1e-9, atol=1e-4
+    )
 
 
 @pytest.mark.ci
 def test_dml_policytree_children(dml_policytree_fixture):
-    assert np.allclose(dml_policytree_fixture['tree'].children_left,
-                       dml_policytree_fixture['tree_manual'].children_left,
-                       rtol=1e-9, atol=1e-4)
-    assert np.allclose(dml_policytree_fixture['tree'].children_right,
-                       dml_policytree_fixture['tree_manual'].children_right,
-                       rtol=1e-9, atol=1e-4)
+    assert np.allclose(
+        dml_policytree_fixture["tree"].children_left, dml_policytree_fixture["tree_manual"].children_left, rtol=1e-9, atol=1e-4
+    )
+    assert np.allclose(
+        dml_policytree_fixture["tree"].children_right,
+        dml_policytree_fixture["tree_manual"].children_right,
+        rtol=1e-9,
+        atol=1e-4,
+    )
 
 
 @pytest.mark.ci
 def test_dml_policytree_return_types(dml_policytree_fixture):
-    assert isinstance(dml_policytree_fixture['policytree_model'].__str__(), str)
-    assert isinstance(dml_policytree_fixture['policytree_model'].summary, pd.DataFrame)
-    assert isinstance(dml_policytree_fixture['policytree_model'].policy_tree, DecisionTreeClassifier)
+    assert isinstance(dml_policytree_fixture["policytree_model"].__str__(), str)
+    assert isinstance(dml_policytree_fixture["policytree_model"].summary, pd.DataFrame)
+    assert isinstance(dml_policytree_fixture["policytree_model"].policy_tree, DecisionTreeClassifier)
 
 
 @pytest.mark.ci
 def test_doubleml_exception_policytree():
-    random_features = pd.DataFrame(np.random.normal(0, 1, size=(2, 3)), columns=['a', 'b', 'c'])
+    random_features = pd.DataFrame(np.random.normal(0, 1, size=(2, 3)), columns=["a", "b", "c"])
     signal = np.array([1, 2])
 
     msg = "The signal must be of np.ndarray type. Signal of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml.DoubleMLPolicyTree(orth_signal=1, features=random_features)
-    msg = 'The signal must be of one dimensional. Signal of dimensions 2 was passed.'
+    msg = "The signal must be of one dimensional. Signal of dimensions 2 was passed."
     with pytest.raises(ValueError, match=msg):
         dml.DoubleMLPolicyTree(orth_signal=np.array([[1], [2]]), features=random_features)
     msg = "The features must be of DataFrame type. Features of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml.DoubleMLPolicyTree(orth_signal=signal, features=1)
-    msg = 'Invalid pd.DataFrame: Contains duplicate column names.'
+    msg = "Invalid pd.DataFrame: Contains duplicate column names."
     with pytest.raises(ValueError, match=msg):
-        dml.DoubleMLPolicyTree(orth_signal=signal, features=pd.DataFrame(np.array([[1, 2], [4, 5]]),
-                                                                         columns=['a_1', 'a_1']))
+        dml.DoubleMLPolicyTree(orth_signal=signal, features=pd.DataFrame(np.array([[1, 2], [4, 5]]), columns=["a_1", "a_1"]))
 
     dml_policytree_predict = dml.DoubleMLPolicyTree(orth_signal=signal, features=random_features)
-    msg = 'Policy Tree not yet fitted. Call fit before predict.'
+    msg = "Policy Tree not yet fitted. Call fit before predict."
     with pytest.raises(NotFittedError, match=msg):
         dml_policytree_predict.predict(random_features)
 
@@ -93,12 +97,14 @@ def test_doubleml_exception_policytree():
     msg = "The features must be of DataFrame type. Features of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml_policytree_predict.predict(features=1)
-    msg = (r'The features must have the keys Index\(\[\'a\', \'b\', \'c\'\], dtype\=\'object\'\). '
-           r'Features with keys Index\(\[\'d\'\], dtype=\'object\'\) were passed.')
+    msg = (
+        r"The features must have the keys Index\(\[\'a\', \'b\', \'c\'\], dtype\=\'object\'\). "
+        r"Features with keys Index\(\[\'d\'\], dtype=\'object\'\) were passed."
+    )
     with pytest.raises(KeyError, match=msg):
         dml_policytree_predict.predict(features=pd.DataFrame({"d": [3, 4]}))
 
     dml_policytree_plot = dml.DoubleMLPolicyTree(orth_signal=signal, features=random_features)
-    msg = 'Policy Tree not yet fitted. Call fit before plot_tree.'
+    msg = "Policy Tree not yet fitted. Call fit before plot_tree."
     with pytest.raises(NotFittedError, match=msg):
         dml_policytree_plot.plot_tree()
diff --git a/doubleml/utils/tests/test_var_est_and_aggregation.py b/doubleml/utils/tests/test_var_est_and_aggregation.py
index c209c513b..969864b4b 100644
--- a/doubleml/utils/tests/test_var_est_and_aggregation.py
+++ b/doubleml/utils/tests/test_var_est_and_aggregation.py
@@ -1,22 +1,20 @@
-import pytest
 import numpy as np
+import pytest
 
-from doubleml.utils._estimation import _var_est, _aggregate_coefs_and_ses
+from doubleml.utils._estimation import _aggregate_coefs_and_ses, _var_est
 
 
-@pytest.fixture(scope='module',
-                params=[1, 3])
+@pytest.fixture(scope="module", params=[1, 3])
 def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope='module',
-                params=[1, 5])
+@pytest.fixture(scope="module", params=[1, 5])
 def n_coefs(request):
     return request.param
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def test_var_est_and_aggr_fixture(n_rep, n_coefs):
     np.random.seed(42)
 
@@ -28,18 +26,12 @@ def test_var_est_and_aggr_fixture(n_rep, n_coefs):
     for i_coef in range(n_coefs):
         n_obs = np.random.randint(100, 200)
         for i_rep in range(n_rep):
-
             psi = np.random.normal(size=(n_obs))
             psi_deriv = np.ones((n_obs))
 
             all_thetas[i_coef, i_rep] = np.mean(psi)
 
-            var_estimate, var_scaling_factor = _var_est(
-                psi=psi,
-                psi_deriv=psi_deriv,
-                smpls=None,
-                is_cluster_data=False
-            )
+            var_estimate, var_scaling_factor = _var_est(psi=psi, psi_deriv=psi_deriv, smpls=None, is_cluster_data=False)
 
             all_ses[i_coef, i_rep] = np.sqrt(var_estimate)
             all_var_scaling_factors[i_coef, i_rep] = var_scaling_factor
@@ -48,8 +40,9 @@ def test_var_est_and_aggr_fixture(n_rep, n_coefs):
     for i_coef in range(n_coefs):
         for i_rep in range(n_rep):
             theta_deviation = np.square(all_thetas[i_coef, i_rep] - expected_theta[i_coef])
-            expected_all_var[i_coef, i_rep] = np.square(all_ses[i_coef, i_rep]) + \
-                np.divide(theta_deviation, all_var_scaling_factors[i_coef, i_rep])
+            expected_all_var[i_coef, i_rep] = np.square(all_ses[i_coef, i_rep]) + np.divide(
+                theta_deviation, all_var_scaling_factors[i_coef, i_rep]
+            )
 
     expected_se = np.sqrt(np.median(expected_all_var, axis=1))
 
@@ -68,36 +61,24 @@ def test_var_est_and_aggr_fixture(n_rep, n_coefs):
     )
 
     result_dict = {
-        'theta': theta,
-        'se': se,
-        'theta_2': theta_2,
-        'se_2': se_2,
-        'expected_theta': expected_theta,
-        'expected_se': expected_se,
-        'all_var_scaling_factors': all_var_scaling_factors,
+        "theta": theta,
+        "se": se,
+        "theta_2": theta_2,
+        "se_2": se_2,
+        "expected_theta": expected_theta,
+        "expected_se": expected_se,
+        "all_var_scaling_factors": all_var_scaling_factors,
     }
     return result_dict
 
 
 @pytest.mark.ci
 def test_aggregate_theta(test_var_est_and_aggr_fixture):
-    assert np.allclose(
-        test_var_est_and_aggr_fixture['theta'],
-        test_var_est_and_aggr_fixture['expected_theta']
-    )
-    assert np.allclose(
-        test_var_est_and_aggr_fixture['theta_2'],
-        test_var_est_and_aggr_fixture['expected_theta']
-    )
+    assert np.allclose(test_var_est_and_aggr_fixture["theta"], test_var_est_and_aggr_fixture["expected_theta"])
+    assert np.allclose(test_var_est_and_aggr_fixture["theta_2"], test_var_est_and_aggr_fixture["expected_theta"])
 
 
 @pytest.mark.ci
 def test_aggregate_se(test_var_est_and_aggr_fixture):
-    assert np.allclose(
-        test_var_est_and_aggr_fixture['se'],
-        test_var_est_and_aggr_fixture['expected_se']
-    )
-    assert np.allclose(
-        test_var_est_and_aggr_fixture['se_2'],
-        test_var_est_and_aggr_fixture['expected_se']
-    )
+    assert np.allclose(test_var_est_and_aggr_fixture["se"], test_var_est_and_aggr_fixture["expected_se"])
+    assert np.allclose(test_var_est_and_aggr_fixture["se_2"], test_var_est_and_aggr_fixture["expected_se"])
diff --git a/pyproject.toml b/pyproject.toml
index aa35b6212..a8d359f37 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,10 +41,44 @@ rdd = [
 dev = [
     "pytest",
     "xgboost",
-    "lightgbm"
+    "lightgbm",
+    "black>=24.3.0",
+    "ruff>=0.5.1",
+    "pre-commit>=4.0.1",
+]
+
+[tool.black]
+line-length = 127
+target-version = ['py39', 'py310', 'py311']
+preview = true
+exclude = '''
+/(
+    \.eggs         # exclude a few common directories in the
+  | \.git          # root of the project
+  | \.mypy_cache
+  | \.vscode
+  | build
+  | dist
+  | doc/_build
+)/
+'''
+
+[tool.ruff]
+# max line length for black
+line-length = 127
+target-version = "py39"
+
+
+[tool.ruff.lint]
+# all rules can be found here: https://beta.ruff.rs/docs/rules/
+select = ["E", "F", "W", "I"]
+ignore = [
+    # Use `is` and `is not` for type comparisons, or `isinstance()` for
+    # isinstance checks
+    "E721",
 ]
 
 [project.urls]
 Documentation = "https://docs.doubleml.org"
 Source = "https://github.com/DoubleML/doubleml-for-py"
-"Bug Tracker" = "https://github.com/DoubleML/doubleml-for-py/issues"
\ No newline at end of file
+"Bug Tracker" = "https://github.com/DoubleML/doubleml-for-py/issues"
diff --git a/pytest.ini b/pytest.ini
index 696d3343a..3582830cb 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -14,4 +14,4 @@ filterwarnings =
     ignore:.*Propensity score is close to 0 or 1. Trimming is at 0.01 and 0.99 is applied.*:UserWarning
     ignore:.*Sensitivity analysis not implemented for callable scores.*:UserWarning
     ignore:.*Subsample has not common support. Results are based on adjusted propensities.*:UserWarning
-    ignore:.*Treatment probability within bandwidth left from cutoff higher than right from cutoff.\nTreatment assignment might be based on the wrong side of the cutoff.*:UserWarning
\ No newline at end of file
+    ignore:.*Treatment probability within bandwidth left from cutoff higher than right from cutoff.\nTreatment assignment might be based on the wrong side of the cutoff.*:UserWarning