From 4969a4df9c5a0af3338a5a78bba3a459974ab35e Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Fri, 7 Jun 2024 17:18:19 +0200
Subject: [PATCH 01/38] add first elements for senstivity in framework class

---
 doubleml/double_ml_framework.py | 34 +++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 9ae9fb59a..6e453d81b 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -47,6 +47,12 @@ def __init__(
         if "is_cluster_data" in doubleml_dict.keys():
             self._is_cluster_data = doubleml_dict['is_cluster_data']
 
+        self._sensitivity_implemented = False
+        self._sensitivity_elements = None
+        if "sensitivity_elements" in doubleml_dict.keys():
+            self._sensitivity_elements = doubleml_dict['sensitivity_elements']
+            self._sensitivity_implemented = True
+
         # check if all sizes match
         _check_framework_shapes(self)
         # initialize bootstrap distribution
@@ -170,6 +176,24 @@ def boot_t_stat(self):
         """
         return self._boot_t_stat
 
+    @property
+    def sensitivity_elements(self):
+        """
+        Values of the sensitivity components.
+        If available (e.g., PLR, IRM) a dictionary with entries ``sigma2``, ``nu2``, ``psi_sigma2``
+        and ``psi_nu2``.
+        """
+        return self._sensitivity_elements
+
+    @property
+    def sensitivity_params(self):
+        """
+        Values of the sensitivity parameters after calling :meth:`sesitivity_analysis`;
+        If available (e.g., PLR, IRM) a dictionary with entries ``theta``, ``se``, ``ci``, ``rv``
+        and ``rva``.
+        """
+        return self._sensitivity_params
+
     def __add__(self, other):
 
         if isinstance(other, DoubleMLFramework):
@@ -504,4 +528,14 @@ def _check_framework_shapes(self):
         raise ValueError(('The shape of scaled_psi does not match the expected '
                           f'shape ({self._n_obs}, {self._n_thetas}, {self._n_rep}).'))
 
+    if self._sensitivity_implemented:
+        if self._sensitivity_elements['sigma2'].shape != (self._n_thetas,):
+            raise ValueError(f'The shape of sigma2 does not match the expected shape ({self._n_thetas},).')
+        if self._sensitivity_elements['nu2'].shape != (self._n_thetas,):
+            raise ValueError(f'The shape of nu2 does not match the expected shape ({self._n_thetas},).')
+        if self._sensitivity_elements['psi_sigma2'].shape != (self._n_obs, self._n_thetas):
+            raise ValueError(f'The shape of psi_sigma2 does not match the expected shape ({self._n_obs}, {self._n_thetas}).')
+        if self._sensitivity_elements['psi_nu2'].shape != (self._n_obs, self._n_thetas):
+            raise ValueError(f'The shape of psi_nu2 does not match the expected shape ({self._n_obs}, {self._n_thetas}).')
+
     return None

From 3add93e9a70889af85becb135ecba761ece77138 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Sun, 16 Jun 2024 18:44:04 +0200
Subject: [PATCH 02/38] add sensitivity elements to framework

---
 doubleml/double_ml.py           | 25 ++++++++++++++++++-------
 doubleml/double_ml_framework.py | 32 ++++++++++++++++++++++----------
 2 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 8ea773a41..a3c86091f 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -531,15 +531,26 @@ def construct_framework(self):
         scaled_psi_reshape = np.transpose(scaled_psi, (0, 2, 1))
 
         doubleml_dict = {
-            "thetas": self.coef,
-            "all_thetas": self.all_coef,
-            "ses": self.se,
-            "all_ses": self.all_se,
-            "var_scaling_factors": self._var_scaling_factors,
-            "scaled_psi": scaled_psi_reshape,
-            "is_cluster_data": self._is_cluster_data
+            'thetas': self.coef,
+            'all_thetas': self.all_coef,
+            'ses': self.se,
+            'all_ses': self.all_se,
+            'var_scaling_factors': self._var_scaling_factors,
+            'scaled_psi': scaled_psi_reshape,
+            'is_cluster_data': self._is_cluster_data
         }
 
+        if self._sensitivity_implemented:
+            # reshape sensitivity elements to (n_obs, n_coefs, n_rep)
+            doubleml_dict.update({
+                'sensitivity_elements': {
+                    'sigma2': np.transpose(self.sensitivity_elements['sigma2'], (0, 2, 1)),
+                    'nu2': np.transpose(self.sensitivity_elements['nu2'], (0, 2, 1)),
+                    'psi_sigma2': np.transpose(self.sensitivity_elements['psi_sigma2'], (0, 2, 1)),
+                    'psi_nu2': np.transpose(self.sensitivity_elements['psi_nu2'], (0, 2, 1))
+                }
+            })
+
         doubleml_framework = DoubleMLFramework(doubleml_dict)
         return doubleml_framework
 
diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 6e453d81b..2688fe1e2 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -27,7 +27,8 @@ def __init__(
         self._is_cluster_data = False
 
         # check input
-        assert isinstance(doubleml_dict, dict), "doubleml_dict must be a dictionary."
+        if not isinstance(doubleml_dict, dict):
+            raise TypeError('doubleml_dict must be a dictionary.')
         expected_keys = ['thetas', 'ses', 'all_thetas', 'all_ses', 'var_scaling_factors', 'scaled_psi']
         if not all(key in doubleml_dict.keys() for key in expected_keys):
             raise ValueError('The dict must contain the following keys: ' + ', '.join(expected_keys))
@@ -50,6 +51,14 @@ def __init__(
         self._sensitivity_implemented = False
         self._sensitivity_elements = None
         if "sensitivity_elements" in doubleml_dict.keys():
+            # check input
+            if not isinstance(doubleml_dict['sensitivity_elements'], dict):
+                raise TypeError('sensitivity_elements must be a dictionary.')
+            expected_keys_sensitivity = ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2']
+            if not all(key in doubleml_dict['sensitivity_elements'].keys() for key in expected_keys_sensitivity):
+                raise ValueError('The sensitivity_elements dict must contain the following '
+                                 'keys: ' + ', '.join(expected_keys_sensitivity))
+
             self._sensitivity_elements = doubleml_dict['sensitivity_elements']
             self._sensitivity_implemented = True
 
@@ -512,6 +521,7 @@ def concat(objs):
 
 
 def _check_framework_shapes(self):
+    score_dim = (self._n_obs, self._n_thetas, self.n_rep)
     # check if all sizes match
     if self._thetas.shape != (self._n_thetas,):
         raise ValueError(f'The shape of thetas does not match the expected shape ({self._n_thetas},).')
@@ -524,18 +534,20 @@ def _check_framework_shapes(self):
     if self._var_scaling_factors.shape != (self._n_thetas,):
         raise ValueError(f'The shape of var_scaling_factors does not match the expected shape ({self._n_thetas},).')
     # dimension of scaled_psi is n_obs x n_thetas x n_rep (per default)
-    if self._scaled_psi.shape != (self._n_obs, self._n_thetas, self._n_rep):
+    if self._scaled_psi.shape != score_dim:
         raise ValueError(('The shape of scaled_psi does not match the expected '
                           f'shape ({self._n_obs}, {self._n_thetas}, {self._n_rep}).'))
 
     if self._sensitivity_implemented:
-        if self._sensitivity_elements['sigma2'].shape != (self._n_thetas,):
-            raise ValueError(f'The shape of sigma2 does not match the expected shape ({self._n_thetas},).')
-        if self._sensitivity_elements['nu2'].shape != (self._n_thetas,):
-            raise ValueError(f'The shape of nu2 does not match the expected shape ({self._n_thetas},).')
-        if self._sensitivity_elements['psi_sigma2'].shape != (self._n_obs, self._n_thetas):
-            raise ValueError(f'The shape of psi_sigma2 does not match the expected shape ({self._n_obs}, {self._n_thetas}).')
-        if self._sensitivity_elements['psi_nu2'].shape != (self._n_obs, self._n_thetas):
-            raise ValueError(f'The shape of psi_nu2 does not match the expected shape ({self._n_obs}, {self._n_thetas}).')
+        if self._sensitivity_elements['sigma2'].shape != (1, self._n_thetas, self.n_rep):
+            raise ValueError(f'The shape of sigma2 does not match the expected shape (1, {self._n_thetas}, {self._n_rep}).')
+        if self._sensitivity_elements['nu2'].shape != (1, self._n_thetas, self.n_rep):
+            raise ValueError(f'The shape of nu2 does not match the expected shape (1, {self._n_thetas}, {self._n_rep}).')
+        if self._sensitivity_elements['psi_sigma2'].shape != score_dim:
+            raise ValueError(('The shape of psi_sigma2 does not match the expected '
+                              f'shape ({self._n_obs}, {self._n_thetas}, {self._n_rep}).'))
+        if self._sensitivity_elements['psi_nu2'].shape != score_dim:
+            raise ValueError(('The shape of psi_nu2 does not match the expected '
+                              f'shape ({self._n_obs}, {self._n_thetas}, {self._n_rep}).'))
 
     return None

From d95bdef18cfae9e1fe1e79202390a5ef77b04bfb Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Sun, 16 Jun 2024 18:44:13 +0200
Subject: [PATCH 03/38] extend exception tests

---
 doubleml/tests/test_framework.py            | 44 ++++++++++++++++-----
 doubleml/tests/test_framework_exceptions.py | 21 +++++++++-
 2 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/doubleml/tests/test_framework.py b/doubleml/tests/test_framework.py
index 761a65c5e..6fdd62aec 100644
--- a/doubleml/tests/test_framework.py
+++ b/doubleml/tests/test_framework.py
@@ -163,7 +163,7 @@ def dml_framework_from_doubleml_fixture(n_rep):
     ml_g = LinearRegression()
     ml_m = LogisticRegression()
 
-    dml_irm_obj = DoubleMLIRM(dml_data, ml_g, ml_m)
+    dml_irm_obj = DoubleMLIRM(dml_data, ml_g, ml_m, n_rep=n_rep)
     dml_irm_obj.fit()
     dml_framework_obj = dml_irm_obj.construct_framework()
 
@@ -179,7 +179,7 @@ def dml_framework_from_doubleml_fixture(n_rep):
 
     # substract objects
     dml_data_2 = make_irm_data()
-    dml_irm_obj_2 = DoubleMLIRM(dml_data_2, ml_g, ml_m)
+    dml_irm_obj_2 = DoubleMLIRM(dml_data_2, ml_g, ml_m, n_rep=n_rep)
     dml_irm_obj_2.fit()
     dml_framework_obj_2 = dml_irm_obj_2.construct_framework()
 
@@ -218,6 +218,7 @@ def dml_framework_from_doubleml_fixture(n_rep):
         'ci_joint_sub_obj': ci_joint_sub_obj,
         'ci_joint_mul_obj': ci_joint_mul_obj,
         'ci_joint_concat': ci_joint_concat,
+        'n_rep': n_rep,
     }
     return result_dict
 
@@ -257,14 +258,19 @@ def test_dml_framework_from_doubleml_se(dml_framework_from_doubleml_fixture):
         dml_framework_from_doubleml_fixture['dml_framework_obj_add_obj'].all_ses,
         2*dml_framework_from_doubleml_fixture['dml_obj'].all_se
     )
-    scaling = np.array([dml_framework_from_doubleml_fixture['dml_obj']._var_scaling_factors]).reshape(-1, 1)
-    sub_var = np.mean(
-        np.square(dml_framework_from_doubleml_fixture['dml_obj'].psi - dml_framework_from_doubleml_fixture['dml_obj_2'].psi),
-        axis=0)
-    assert np.allclose(
-        dml_framework_from_doubleml_fixture['dml_framework_obj_sub_obj'].all_ses,
-        np.sqrt(sub_var / scaling)
-    )
+
+    if dml_framework_from_doubleml_fixture['n_rep'] == 1:
+        # formula only valid for n_rep = 1
+        scaling = np.array([dml_framework_from_doubleml_fixture['dml_obj']._var_scaling_factors]).reshape(-1, 1)
+        sub_var = np.mean(
+            np.square(dml_framework_from_doubleml_fixture['dml_obj'].psi
+                      - dml_framework_from_doubleml_fixture['dml_obj_2'].psi),
+            axis=0)
+        assert np.allclose(
+            dml_framework_from_doubleml_fixture['dml_framework_obj_sub_obj'].all_ses,
+            np.sqrt(sub_var / scaling)
+        )
+
     assert np.allclose(
         dml_framework_from_doubleml_fixture['dml_framework_obj_mul_obj'].all_ses,
         2*dml_framework_from_doubleml_fixture['dml_obj'].all_se
@@ -288,3 +294,21 @@ def test_dml_framework_from_doubleml_ci(dml_framework_from_doubleml_fixture):
     assert isinstance(dml_framework_from_doubleml_fixture['ci_joint_mul_obj'], pd.DataFrame)
     assert isinstance(dml_framework_from_doubleml_fixture['ci_concat'], pd.DataFrame)
     assert isinstance(dml_framework_from_doubleml_fixture['ci_joint_concat'], pd.DataFrame)
+
+
+@pytest.mark.ci
+def test_dml_framework_sensitivity(dml_framework_from_doubleml_fixture):
+    n_rep = dml_framework_from_doubleml_fixture['dml_framework_obj'].n_rep
+    n_thetas = dml_framework_from_doubleml_fixture['dml_framework_obj'].n_thetas
+    n_obs = dml_framework_from_doubleml_fixture['dml_framework_obj'].n_obs
+
+    assert dml_framework_from_doubleml_fixture['dml_framework_obj']._sensitivity_implemented
+
+    assert dml_framework_from_doubleml_fixture['dml_framework_obj']._sensitivity_elements['sigma2'].shape == \
+        (1, n_thetas, n_rep)
+    assert dml_framework_from_doubleml_fixture['dml_framework_obj']._sensitivity_elements['nu2'].shape == \
+        (1, n_thetas, n_rep)
+    assert dml_framework_from_doubleml_fixture['dml_framework_obj']._sensitivity_elements['psi_sigma2'].shape == \
+        (n_obs, n_thetas, n_rep)
+    assert dml_framework_from_doubleml_fixture['dml_framework_obj']._sensitivity_elements['psi_nu2'].shape == \
+        (n_obs, n_thetas, n_rep)
diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index c061e1a9b..e2f4cc85d 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -61,9 +61,28 @@ def test_input_exceptions():
         DoubleMLFramework(test_dict)
 
     msg = "doubleml_dict must be a dictionary."
-    with pytest.raises(AssertionError, match=msg):
+    with pytest.raises(TypeError, match=msg):
         DoubleMLFramework(1.0)
 
+    msg = "sensitivity_elements must be a dictionary."
+    with pytest.raises(TypeError, match=msg):
+        test_dict = doubleml_dict.copy()
+        test_dict['sensitivity_elements'] = 1
+        DoubleMLFramework(test_dict)
+
+    msg = 'The sensitivity_elements dict must contain the following keys: sigma2, nu2, psi_sigma2, psi_nu2'
+    with pytest.raises(ValueError, match=msg):
+        test_dict = doubleml_dict.copy()
+        test_dict['sensitivity_elements'] = {'sensitivities': np.ones(shape=(n_obs, n_thetas, n_rep))}
+        DoubleMLFramework(test_dict)
+
+    msg = 'The shape of sigma2 does not match the expected shape \(10, 5\)\.'
+    with pytest.raises(ValueError, match=msg):
+        test_dict = doubleml_dict.copy()
+        test_dict['sensitivity_elements'] = {
+            'sigma2': np.ones(shape=(n_obs, n_rep))}
+        DoubleMLFramework(test_dict)
+
 
 def test_operation_exceptions():
     # addition

From b308d6e8b20882271bb692ae9df6a8e5e86cf9fa Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 17 Jun 2024 08:36:14 +0200
Subject: [PATCH 04/38] add tests for shapes

---
 doubleml/tests/test_framework_exceptions.py | 34 ++++++++++++++++++---
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index e2f4cc85d..32899e805 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -1,5 +1,6 @@
 import pytest
 import numpy as np
+import copy
 
 from doubleml.double_ml_framework import DoubleMLFramework, concat
 from ._utils import generate_dml_dict
@@ -12,6 +13,14 @@
 psi_a = np.ones(shape=(n_obs, n_thetas, n_rep))
 psi_b = np.random.normal(size=(n_obs, n_thetas, n_rep))
 doubleml_dict = generate_dml_dict(psi_a, psi_b)
+# add sensitivity elements
+doubleml_dict['sensitivity_elements'] = {
+    'sigma2': np.ones(shape=(1, n_thetas, n_rep)),
+    'nu2': np.ones(shape=(1, n_thetas, n_rep)),
+    'psi_sigma2': np.ones(shape=(n_obs, n_thetas, n_rep)),
+    'psi_nu2': np.ones(shape=(n_obs, n_thetas, n_rep)),
+}
+
 
 # combine objects and estimate parameters
 dml_framework_obj_1 = DoubleMLFramework(doubleml_dict)
@@ -76,11 +85,28 @@ def test_input_exceptions():
         test_dict['sensitivity_elements'] = {'sensitivities': np.ones(shape=(n_obs, n_thetas, n_rep))}
         DoubleMLFramework(test_dict)
 
-    msg = 'The shape of sigma2 does not match the expected shape \(10, 5\)\.'
+    msg = r'The shape of sigma2 does not match the expected shape \(1, 2, 5\)\.'
     with pytest.raises(ValueError, match=msg):
-        test_dict = doubleml_dict.copy()
-        test_dict['sensitivity_elements'] = {
-            'sigma2': np.ones(shape=(n_obs, n_rep))}
+        test_dict = copy.deepcopy(doubleml_dict)
+        test_dict['sensitivity_elements']['sigma2'] = np.ones(shape=(n_obs, n_rep))
+        DoubleMLFramework(test_dict)
+
+    msg = r'The shape of nu2 does not match the expected shape \(1, 2, 5\)\.'
+    with pytest.raises(ValueError, match=msg):
+        test_dict = copy.deepcopy(doubleml_dict)
+        test_dict['sensitivity_elements']['nu2'] = np.ones(shape=(n_obs, n_rep))
+        DoubleMLFramework(test_dict)
+
+    msg = r'The shape of psi_sigma2 does not match the expected shape \(10, 2, 5\)\.'
+    with pytest.raises(ValueError, match=msg):
+        test_dict = copy.deepcopy(doubleml_dict)
+        test_dict['sensitivity_elements']['psi_sigma2'] = np.ones(shape=(n_obs, n_thetas, n_rep, 3))
+        DoubleMLFramework(test_dict)
+
+    msg = r'The shape of psi_nu2 does not match the expected shape \(10, 2, 5\)\.'
+    with pytest.raises(ValueError, match=msg):
+        test_dict = copy.deepcopy(doubleml_dict)
+        test_dict['sensitivity_elements']['psi_nu2'] = np.ones(shape=(n_obs, n_thetas, n_rep, 3))
         DoubleMLFramework(test_dict)
 
 

From ebc8b39b5cbe3d6bbdc0dc151e099eb0bb08650d Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 17 Jun 2024 12:55:34 +0200
Subject: [PATCH 05/38] add riesz representer to sensitivity_elements

---
 doubleml/did/did.py               |  4 +++-
 doubleml/did/did_cs.py            |  4 +++-
 doubleml/double_ml.py             |  9 +++++----
 doubleml/double_ml_framework.py   |  5 +++--
 doubleml/irm/irm.py               |  4 +++-
 doubleml/plm/plr.py               | 10 +++++++---
 doubleml/tests/test_exceptions.py |  2 +-
 7 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/doubleml/did/did.py b/doubleml/did/did.py
index 1fd85f46d..7d9bc7e75 100644
--- a/doubleml/did/did.py
+++ b/doubleml/did/did.py
@@ -339,7 +339,9 @@ def _sensitivity_element_est(self, preds):
         element_dict = {'sigma2': sigma2,
                         'nu2': nu2,
                         'psi_sigma2': psi_sigma2,
-                        'psi_nu2': psi_nu2}
+                        'psi_nu2': psi_nu2,
+                        'riesz_rep': rr,
+                        }
         return element_dict
 
     def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py
index 4b84fb1a4..4e34a2b92 100644
--- a/doubleml/did/did_cs.py
+++ b/doubleml/did/did_cs.py
@@ -471,7 +471,9 @@ def _sensitivity_element_est(self, preds):
         element_dict = {'sigma2': sigma2,
                         'nu2': nu2,
                         'psi_sigma2': psi_sigma2,
-                        'psi_nu2': psi_nu2}
+                        'psi_nu2': psi_nu2,
+                        'riesz_rep': rr,
+                        }
         return element_dict
 
     def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index a3c86091f..0ee3d5d30 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -332,8 +332,8 @@ def psi_elements(self):
     def sensitivity_elements(self):
         """
         Values of the sensitivity components after calling :meth:`fit`;
-        If available (e.g., PLR, IRM) a dictionary with entries ``sigma2``, ``nu2``, ``psi_sigma2``
-        and ``psi_nu2``.
+        If available (e.g., PLR, IRM) a dictionary with entries ``sigma2``, ``nu2``, ``psi_sigma2``, ``psi_nu2``
+        and ``riesz_rep``.
         """
         return self._sensitivity_elements
 
@@ -1368,14 +1368,15 @@ def _sensitivity_element_est(self, preds):
 
     @property
     def _sensitivity_element_names(self):
-        return ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2']
+        return ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2', 'riesz_rep']
 
     # the dimensions will usually be (n_obs, n_rep, n_coefs) to be equal to the score dimensions psi
     def _initialize_sensitivity_elements(self, score_dim):
         sensitivity_elements = {'sigma2': np.full((1, score_dim[1], score_dim[2]), np.nan),
                                 'nu2': np.full((1, score_dim[1], score_dim[2]), np.nan),
                                 'psi_sigma2': np.full(score_dim, np.nan),
-                                'psi_nu2': np.full(score_dim, np.nan)}
+                                'psi_nu2': np.full(score_dim, np.nan),
+                                'riesz_rep': np.full(score_dim, np.nan)}
         return sensitivity_elements
 
     def _get_sensitivity_elements(self, i_rep, i_treat):
diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 2688fe1e2..9124a2205 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -189,8 +189,8 @@ def boot_t_stat(self):
     def sensitivity_elements(self):
         """
         Values of the sensitivity components.
-        If available (e.g., PLR, IRM) a dictionary with entries ``sigma2``, ``nu2``, ``psi_sigma2``
-        and ``psi_nu2``.
+        If available (e.g., PLR, IRM) a dictionary with entries ``sigma2``, ``nu2``, ``psi_sigma2``, ``psi_nu2``
+        and ``riesz_rep``.
         """
         return self._sensitivity_elements
 
@@ -226,6 +226,7 @@ def __add__(self, other):
             thetas, ses = _aggregate_coefs_and_ses(all_thetas, all_ses, var_scaling_factors)
 
             is_cluster_data = self._is_cluster_data or other._is_cluster_data
+
             doubleml_dict = {
                 'thetas': thetas,
                 'ses': ses,
diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py
index 5dd2c1b08..82d22d1dd 100644
--- a/doubleml/irm/irm.py
+++ b/doubleml/irm/irm.py
@@ -400,7 +400,9 @@ def _sensitivity_element_est(self, preds):
         element_dict = {'sigma2': sigma2,
                         'nu2': nu2,
                         'psi_sigma2': psi_sigma2,
-                        'psi_nu2': psi_nu2}
+                        'psi_nu2': psi_nu2,
+                        'riesz_rep': rr,
+                        }
         return element_dict
 
     def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
diff --git a/doubleml/plm/plr.py b/doubleml/plm/plr.py
index ab758c90c..3b7d90f5b 100644
--- a/doubleml/plm/plr.py
+++ b/doubleml/plm/plr.py
@@ -279,13 +279,17 @@ def _sensitivity_element_est(self, preds):
         sigma2 = np.mean(sigma2_score_element)
         psi_sigma2 = sigma2_score_element - sigma2
 
-        nu2 = np.divide(1.0, np.mean(np.square(d - m_hat)))
-        psi_nu2 = nu2 - np.multiply(np.square(d-m_hat), np.square(nu2))
+        treatment_residual = d - m_hat
+        nu2 = np.divide(1.0, np.mean(np.square(treatment_residual)))
+        psi_nu2 = nu2 - np.multiply(np.square(treatment_residual), np.square(nu2))
+        rr = np.multiply(treatment_residual, nu2)
 
         element_dict = {'sigma2': sigma2,
                         'nu2': nu2,
                         'psi_sigma2': psi_sigma2,
-                        'psi_nu2': psi_nu2}
+                        'psi_nu2': psi_nu2,
+                        'riesz_rep': rr,
+                        }
         return element_dict
 
     def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py
index 84ac55a14..57ebba8f3 100644
--- a/doubleml/tests/test_exceptions.py
+++ b/doubleml/tests/test_exceptions.py
@@ -1106,7 +1106,7 @@ def test_doubleml_sensitivity_inputs():
         _ = dml_irm._set_sensitivity_elements(sensitivity_elements=sensitivity_elements, i_rep=0, i_treat=0)
 
     # test variances
-    sensitivity_elements = dict({'sigma2': 1.0, 'nu2': -2.4, 'psi_sigma2': 1.0, 'psi_nu2': 1.0})
+    sensitivity_elements = dict({'sigma2': 1.0, 'nu2': -2.4, 'psi_sigma2': 1.0, 'psi_nu2': 1.0, 'riesz_rep': 1.0})
     _ = dml_irm._set_sensitivity_elements(sensitivity_elements=sensitivity_elements, i_rep=0, i_treat=0)
     msg = ('sensitivity_elements sigma2 and nu2 have to be positive. '
            r'Got sigma2 \[\[\[1.\]\]\] and nu2 \[\[\[-2.4\]\]\]. '

From 33b68b31529e324b7571700942638b38e8bd03d8 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 17 Jun 2024 13:52:48 +0200
Subject: [PATCH 06/38] add sensitivity calculation for framework operations

---
 doubleml/double_ml_framework.py | 54 +++++++++++++++++++++++++++++++--
 1 file changed, 52 insertions(+), 2 deletions(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 9124a2205..1f79cc7be 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -236,8 +236,25 @@ def __add__(self, other):
                 'scaled_psi': scaled_psi,
                 'is_cluster_data': is_cluster_data,
             }
-            new_obj = DoubleMLFramework(doubleml_dict)
 
+            # sensitivity combination only available for same outcome and cond. expectation (e.g. IRM)
+            if self._sensitivity_implemented and other._sensitivity_implemented:
+                nu2_score_element = self._sensitivity_elements['psi_nu2'] + other._sensitivity_elements['psi_nu2'] - \
+                     np.multiply(2.0, np.multiply(self._sensitivity_elements['riesz_rep'],
+                                                  self._sensitivity_elements['riesz_rep']))
+                nu2 = np.mean(nu2_score_element)
+                psi_nu2 = nu2_score_element - nu2
+
+                sensitivity_elements = {
+                    'sigma2': self._sensitivity_elements['sigma2'],
+                    'nu2': nu2,
+                    'psi_sigma2': self._sensitivity_elements['psi_sigma2'],
+                    'psi_nu2': psi_nu2,
+                    'riesz_rep': self._sensitivity_elements['riesz_rep'] + other._sensitivity_elements['riesz_rep'],
+                }
+                doubleml_dict['sensitivity_elements'] = sensitivity_elements
+
+            new_obj = DoubleMLFramework(doubleml_dict)
         else:
             raise TypeError(f"Unsupported operand type: {type(other)}")
 
@@ -278,8 +295,25 @@ def __sub__(self, other):
                 'scaled_psi': scaled_psi,
                 'is_cluster_data': is_cluster_data,
             }
-            new_obj = DoubleMLFramework(doubleml_dict)
 
+            # sensitivity combination only available for same outcome and cond. expectation (e.g. IRM)
+            if self._sensitivity_implemented and other._sensitivity_implemented:
+                nu2_score_element = self._sensitivity_elements['psi_nu2'] - other._sensitivity_elements['psi_nu2'] + \
+                     np.multiply(2.0, np.multiply(self._sensitivity_elements['riesz_rep'],
+                                                  self._sensitivity_elements['riesz_rep']))
+                nu2 = np.mean(nu2_score_element)
+                psi_nu2 = nu2_score_element - nu2
+
+                sensitivity_elements = {
+                    'sigma2': self._sensitivity_elements['sigma2'],
+                    'nu2': nu2,
+                    'psi_sigma2': self._sensitivity_elements['psi_sigma2'],
+                    'psi_nu2': psi_nu2,
+                    'riesz_rep': self._sensitivity_elements['riesz_rep'] - other._sensitivity_elements['riesz_rep'],
+                }
+                doubleml_dict['sensitivity_elements'] = sensitivity_elements
+
+            new_obj = DoubleMLFramework(doubleml_dict)
         else:
             raise TypeError(f"Unsupported operand type: {type(other)}")
 
@@ -309,6 +343,22 @@ def __mul__(self, other):
                 'scaled_psi': scaled_psi,
                 'is_cluster_data': is_cluster_data,
             }
+
+            # sensitivity combination only available for linear models
+            if self._sensitivity_implemented:
+                nu2_score_element = np.multiply(np.square(other), self._sensitivity_elements['psi_nu2'])
+                nu2 = np.mean(nu2_score_element)
+                psi_nu2 = nu2_score_element - nu2
+
+                sensitivity_elements = {
+                    'sigma2': self._sensitivity_elements['sigma2'],
+                    'nu2': nu2,
+                    'psi_sigma2': self._sensitivity_elements['psi_sigma2'],
+                    'psi_nu2': psi_nu2,
+                    'riesz_rep': np.multiply(other, self._sensitivity_elements['riesz_rep']),
+                }
+                doubleml_dict['sensitivity_elements'] = sensitivity_elements
+
             new_obj = DoubleMLFramework(doubleml_dict)
         else:
             raise TypeError(f"Unsupported operand type: {type(other)}")

From e6db5b4bdd29213a521ff080e5dc34b9563bac31 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 17 Jun 2024 15:22:20 +0200
Subject: [PATCH 07/38] add riesz_rep to framework

---
 doubleml/double_ml.py           | 3 ++-
 doubleml/double_ml_framework.py | 5 ++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 0ee3d5d30..f1f383b1b 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -547,7 +547,8 @@ def construct_framework(self):
                     'sigma2': np.transpose(self.sensitivity_elements['sigma2'], (0, 2, 1)),
                     'nu2': np.transpose(self.sensitivity_elements['nu2'], (0, 2, 1)),
                     'psi_sigma2': np.transpose(self.sensitivity_elements['psi_sigma2'], (0, 2, 1)),
-                    'psi_nu2': np.transpose(self.sensitivity_elements['psi_nu2'], (0, 2, 1))
+                    'psi_nu2': np.transpose(self.sensitivity_elements['psi_nu2'], (0, 2, 1)),
+                    'riesz_rep': np.transpose(self.sensitivity_elements['riesz_rep'], (0, 2, 1))
                 }
             })
 
diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 1f79cc7be..68ddce967 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -54,7 +54,7 @@ def __init__(
             # check input
             if not isinstance(doubleml_dict['sensitivity_elements'], dict):
                 raise TypeError('sensitivity_elements must be a dictionary.')
-            expected_keys_sensitivity = ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2']
+            expected_keys_sensitivity = ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2', 'riesz_rep']
             if not all(key in doubleml_dict['sensitivity_elements'].keys() for key in expected_keys_sensitivity):
                 raise ValueError('The sensitivity_elements dict must contain the following '
                                  'keys: ' + ', '.join(expected_keys_sensitivity))
@@ -600,5 +600,8 @@ def _check_framework_shapes(self):
         if self._sensitivity_elements['psi_nu2'].shape != score_dim:
             raise ValueError(('The shape of psi_nu2 does not match the expected '
                               f'shape ({self._n_obs}, {self._n_thetas}, {self._n_rep}).'))
+        if self._sensitivity_elements['riesz_rep'].shape != score_dim:
+            raise ValueError(('The shape of riesz_rep does not match the expected '
+                              f'shape ({self._n_obs}, {self._n_thetas}, {self._n_rep}).'))
 
     return None

From 40d8a6f5ee325f004199369a78cc5754f1098910 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 17 Jun 2024 16:09:32 +0200
Subject: [PATCH 08/38] add returntype tests for senstivity_elements

---
 doubleml/tests/test_return_types.py | 40 +++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/doubleml/tests/test_return_types.py b/doubleml/tests/test_return_types.py
index 1d03439a4..975c203e7 100644
--- a/doubleml/tests/test_return_types.py
+++ b/doubleml/tests/test_return_types.py
@@ -397,7 +397,20 @@ def test_rmses():
 
 @pytest.mark.ci
 def test_sensitivity():
+
+    var_keys = ['sigma2', 'nu2']
+    score_keys = ['psi_sigma2', 'psi_nu2', 'riesz_rep']
     benchmarks = {'cf_y': [0.1, 0.2], 'cf_d': [0.15, 0.2], 'name': ["test1", "test2"]}
+
+    # PLR
+    assert isinstance(plr_obj.sensitivity_elements, dict)
+    for key in var_keys:
+        assert isinstance(plr_obj.sensitivity_elements[key], np.ndarray)
+        assert plr_obj.sensitivity_elements[key].shape == (1, n_rep, n_treat)
+    for key in score_keys:
+        assert isinstance(plr_obj.sensitivity_elements[key], np.ndarray)
+        assert plr_obj.sensitivity_elements[key].shape == (n_obs, n_rep, n_treat)
+
     assert isinstance(plr_obj.sensitivity_summary, str)
     plr_obj.sensitivity_analysis()
     assert isinstance(plr_obj.sensitivity_summary, str)
@@ -408,6 +421,15 @@ def test_sensitivity():
     plr_benchmark = plr_obj.sensitivity_benchmark(benchmarking_set=["X1"])
     assert isinstance(plr_benchmark, pd.DataFrame)
 
+    # DID
+    assert isinstance(irm_obj.sensitivity_elements, dict)
+    for key in var_keys:
+        assert isinstance(irm_obj.sensitivity_elements[key], np.ndarray)
+        assert irm_obj.sensitivity_elements[key].shape == (1, n_rep, n_treat)
+    for key in score_keys:
+        assert isinstance(irm_obj.sensitivity_elements[key], np.ndarray)
+        assert irm_obj.sensitivity_elements[key].shape == (n_obs, n_rep, n_treat)
+
     assert isinstance(irm_obj.sensitivity_summary, str)
     irm_obj.sensitivity_analysis()
     assert isinstance(irm_obj.sensitivity_summary, str)
@@ -418,6 +440,15 @@ def test_sensitivity():
     irm_benchmark = irm_obj.sensitivity_benchmark(benchmarking_set=["X1"])
     assert isinstance(irm_benchmark, pd.DataFrame)
 
+    # DID
+    assert isinstance(did_obj.sensitivity_elements, dict)
+    for key in var_keys:
+        assert isinstance(did_obj.sensitivity_elements[key], np.ndarray)
+        assert did_obj.sensitivity_elements[key].shape == (1, n_rep, n_treat)
+    for key in score_keys:
+        assert isinstance(did_obj.sensitivity_elements[key], np.ndarray)
+        assert did_obj.sensitivity_elements[key].shape == (n_obs, n_rep, n_treat)
+
     assert isinstance(did_obj.sensitivity_summary, str)
     did_obj.sensitivity_analysis()
     assert isinstance(did_obj.sensitivity_summary, str)
@@ -428,6 +459,15 @@ def test_sensitivity():
     did_benchmark = did_obj.sensitivity_benchmark(benchmarking_set=['Z1'])
     assert isinstance(did_benchmark, pd.DataFrame)
 
+    # DIDCS
+    assert isinstance(did_cs_obj.sensitivity_elements, dict)
+    for key in var_keys:
+        assert isinstance(did_cs_obj.sensitivity_elements[key], np.ndarray)
+        assert did_cs_obj.sensitivity_elements[key].shape == (1, n_rep, n_treat)
+    for key in score_keys:
+        assert isinstance(did_cs_obj.sensitivity_elements[key], np.ndarray)
+        assert did_cs_obj.sensitivity_elements[key].shape == (n_obs, n_rep, n_treat)
+
     assert isinstance(did_cs_obj.sensitivity_summary, str)
     did_cs_obj.sensitivity_analysis()
     assert isinstance(did_cs_obj.sensitivity_summary, str)

From 26d65a9642e7f628871a3ac2990ab543049e9540 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 17 Jun 2024 16:09:47 +0200
Subject: [PATCH 09/38] fix nu2 dimension

---
 doubleml/double_ml_framework.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 68ddce967..d50475da4 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -242,7 +242,7 @@ def __add__(self, other):
                 nu2_score_element = self._sensitivity_elements['psi_nu2'] + other._sensitivity_elements['psi_nu2'] - \
                      np.multiply(2.0, np.multiply(self._sensitivity_elements['riesz_rep'],
                                                   self._sensitivity_elements['riesz_rep']))
-                nu2 = np.mean(nu2_score_element)
+                nu2 = np.mean(nu2_score_element, axis=0, keepdims=True)
                 psi_nu2 = nu2_score_element - nu2
 
                 sensitivity_elements = {
@@ -301,7 +301,7 @@ def __sub__(self, other):
                 nu2_score_element = self._sensitivity_elements['psi_nu2'] - other._sensitivity_elements['psi_nu2'] + \
                      np.multiply(2.0, np.multiply(self._sensitivity_elements['riesz_rep'],
                                                   self._sensitivity_elements['riesz_rep']))
-                nu2 = np.mean(nu2_score_element)
+                nu2 = np.mean(nu2_score_element, axis=0, keepdims=True)
                 psi_nu2 = nu2_score_element - nu2
 
                 sensitivity_elements = {
@@ -347,7 +347,7 @@ def __mul__(self, other):
             # sensitivity combination only available for linear models
             if self._sensitivity_implemented:
                 nu2_score_element = np.multiply(np.square(other), self._sensitivity_elements['psi_nu2'])
-                nu2 = np.mean(nu2_score_element)
+                nu2 = np.mean(nu2_score_element, axis=0, keepdims=True)
                 psi_nu2 = nu2_score_element - nu2
 
                 sensitivity_elements = {

From 225434642211b31e4d342793ef51098fbdd7129c Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 17 Jun 2024 16:10:28 +0200
Subject: [PATCH 10/38] add exception tests for riesz rep in framework

---
 doubleml/tests/test_framework_exceptions.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index 32899e805..a47d12a06 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -19,6 +19,7 @@
     'nu2': np.ones(shape=(1, n_thetas, n_rep)),
     'psi_sigma2': np.ones(shape=(n_obs, n_thetas, n_rep)),
     'psi_nu2': np.ones(shape=(n_obs, n_thetas, n_rep)),
+    'riesz_rep': np.ones(shape=(n_obs, n_thetas, n_rep))
 }
 
 
@@ -109,6 +110,12 @@ def test_input_exceptions():
         test_dict['sensitivity_elements']['psi_nu2'] = np.ones(shape=(n_obs, n_thetas, n_rep, 3))
         DoubleMLFramework(test_dict)
 
+    msg = r'The shape of riesz_rep does not match the expected shape \(10, 2, 5\)\.'
+    with pytest.raises(ValueError, match=msg):
+        test_dict = copy.deepcopy(doubleml_dict)
+        test_dict['sensitivity_elements']['riesz_rep'] = np.ones(shape=(n_obs, n_thetas, n_rep, 3))
+        DoubleMLFramework(test_dict)
+
 
 def test_operation_exceptions():
     # addition

From 6ef2c96932205e6bea50554653aa3bbbdfcddc29 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 17 Jun 2024 16:30:30 +0200
Subject: [PATCH 11/38] extend sensitivity framework tests

---
 doubleml/tests/test_framework.py | 33 ++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/doubleml/tests/test_framework.py b/doubleml/tests/test_framework.py
index 6fdd62aec..9d3affba2 100644
--- a/doubleml/tests/test_framework.py
+++ b/doubleml/tests/test_framework.py
@@ -302,13 +302,26 @@ def test_dml_framework_sensitivity(dml_framework_from_doubleml_fixture):
     n_thetas = dml_framework_from_doubleml_fixture['dml_framework_obj'].n_thetas
     n_obs = dml_framework_from_doubleml_fixture['dml_framework_obj'].n_obs
 
-    assert dml_framework_from_doubleml_fixture['dml_framework_obj']._sensitivity_implemented
-
-    assert dml_framework_from_doubleml_fixture['dml_framework_obj']._sensitivity_elements['sigma2'].shape == \
-        (1, n_thetas, n_rep)
-    assert dml_framework_from_doubleml_fixture['dml_framework_obj']._sensitivity_elements['nu2'].shape == \
-        (1, n_thetas, n_rep)
-    assert dml_framework_from_doubleml_fixture['dml_framework_obj']._sensitivity_elements['psi_sigma2'].shape == \
-        (n_obs, n_thetas, n_rep)
-    assert dml_framework_from_doubleml_fixture['dml_framework_obj']._sensitivity_elements['psi_nu2'].shape == \
-        (n_obs, n_thetas, n_rep)
+    object_list = ['dml_framework_obj',
+                   'dml_framework_obj_add_obj',
+                   'dml_framework_obj_sub_obj',
+                   'dml_framework_obj_mul_obj']
+    var_keys = ['sigma2', 'nu2']
+    score_keys = ['psi_sigma2', 'psi_nu2', 'riesz_rep']
+
+    for obj in object_list:
+        assert dml_framework_from_doubleml_fixture[obj]._sensitivity_implemented
+        for key in var_keys:
+            assert dml_framework_from_doubleml_fixture[obj]._sensitivity_elements[key].shape == \
+                (1, n_thetas, n_rep)
+        for key in score_keys:
+            assert dml_framework_from_doubleml_fixture[obj]._sensitivity_elements[key].shape == \
+                (n_obs, n_thetas, n_rep)
+
+    # separate test for concat
+    for key in var_keys:
+        assert dml_framework_from_doubleml_fixture['dml_framework_obj_concat']._sensitivity_elements[key].shape == \
+            (1, 2, n_rep)
+    for key in score_keys:
+        assert dml_framework_from_doubleml_fixture['dml_framework_obj_concat']._sensitivity_elements[key].shape == \
+            (n_obs, 2, n_rep)

From 2443ccdd1cd294c381e3b6af1abfa5d1ab195c5e Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 17 Jun 2024 16:30:46 +0200
Subject: [PATCH 12/38] add senstivity_framework to concat

---
 doubleml/double_ml_framework.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index d50475da4..2d63e869a 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -563,6 +563,15 @@ def concat(objs):
         'scaled_psi': scaled_psi,
         'is_cluster_data': is_cluster_data,
     }
+
+    if all(obj._sensitivity_implemented for obj in objs):
+        sensitivity_elements = {}
+        for key in ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2', 'riesz_rep']:
+            assert all(key in obj._sensitivity_elements.keys() for obj in objs)
+            sensitivity_elements[key] = np.concatenate([obj._sensitivity_elements[key] for obj in objs], axis=1)
+
+        doubleml_dict['sensitivity_elements'] = sensitivity_elements
+
     new_obj = DoubleMLFramework(doubleml_dict)
 
     # check internal consistency of new object

From 747e9cde56dcf61ca768c4dfa99698d332f40eb5 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 17 Jun 2024 16:48:07 +0200
Subject: [PATCH 13/38] add first _calc_sensitivity_analysis()

---
 doubleml/double_ml_framework.py             | 4 ++++
 doubleml/tests/test_framework.py            | 1 +
 doubleml/tests/test_framework_exceptions.py | 8 ++++++++
 3 files changed, 13 insertions(+)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 2d63e869a..835339aa8 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -368,6 +368,10 @@ def __mul__(self, other):
     def __rmul__(self, other):
         return self.__mul__(other)
 
+    def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
+        if not self._sensitivity_implemented:
+            raise NotImplementedError('Sensitivity analysis is not implemented for this model.')
+
     def confint(self, joint=False, level=0.95):
         """
         Confidence intervals for DoubleML models.
diff --git a/doubleml/tests/test_framework.py b/doubleml/tests/test_framework.py
index 9d3affba2..500965a86 100644
--- a/doubleml/tests/test_framework.py
+++ b/doubleml/tests/test_framework.py
@@ -170,6 +170,7 @@ def dml_framework_from_doubleml_fixture(n_rep):
     ci = dml_framework_obj.confint(joint=False, level=0.95)
     dml_framework_obj.bootstrap(method='normal')
     ci_joint = dml_framework_obj.confint(joint=True, level=0.95)
+    dml_framework_obj._calc_sensitivity_analysis()
 
     # add objects
     dml_framework_obj_add_obj = dml_framework_obj + dml_framework_obj
diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index a47d12a06..78bc4e392 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -213,3 +213,11 @@ def test_p_adjust_exceptions():
     msg = r'Apply bootstrap\(\) before p_adjust\("rw"\)\.'
     with pytest.raises(ValueError, match=msg):
         _ = dml_framework_obj_1.p_adjust(method='rw')
+
+
+@pytest.mark.ci
+def test_sensitivity_exceptions():
+    dml_framework_no_sensitivity = DoubleMLFramework(generate_dml_dict(psi_a, psi_b))
+    msg = 'Sensitivity analysis is not implemented for this model.'
+    with pytest.raises(NotImplementedError, match=msg):
+        _ = dml_framework_no_sensitivity._calc_sensitivity_analysis(cf_y=0.1, cf_d=0.1, rho=1.0, level=0.95)

From 92b869906a85bee8f243bf334246a11258f0d8af Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 17 Jun 2024 17:01:47 +0200
Subject: [PATCH 14/38] add basic sensitivity calculations

---
 doubleml/double_ml_framework.py             | 59 +++++++++++++-
 doubleml/tests/test_framework_exceptions.py | 90 +++++++++++++++++++++
 2 files changed, 148 insertions(+), 1 deletion(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 835339aa8..8ad0e68ee 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -5,7 +5,8 @@
 from statsmodels.stats.multitest import multipletests
 
 from .utils._estimation import _draw_weights, _aggregate_coefs_and_ses
-from .utils._checks import _check_bootstrap, _check_framework_compatibility
+from .utils._checks import _check_bootstrap, _check_framework_compatibility, _check_in_zero_one, \
+    _check_float, _check_integer
 
 
 class DoubleMLFramework():
@@ -372,6 +373,62 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
         if not self._sensitivity_implemented:
             raise NotImplementedError('Sensitivity analysis is not implemented for this model.')
 
+        # input checks
+        _check_in_zero_one(cf_y, 'cf_y', include_one=False)
+        _check_in_zero_one(cf_d, 'cf_d', include_one=False)
+        if not isinstance(rho, float):
+            raise TypeError(f'rho must be of float type. '
+                            f'{str(rho)} of type {str(type(rho))} was passed.')
+        _check_in_zero_one(abs(rho), 'The absolute value of rho')
+        _check_in_zero_one(level, 'The confidence level', include_zero=False, include_one=False)
+
+    def _calc_robustness_value(self, null_hypothesis, level, rho, idx_treatment):
+        _check_float(null_hypothesis, "null_hypothesis")
+        _check_integer(idx_treatment, "idx_treatment", lower_bound=0, upper_bound=self._n_thetas-1)
+
+        cf_y = 0.03
+        cf_d = 0.03
+        sensitivity_dict = self._calc_sensitivity_analysis(cf_y=cf_y, cf_d=cf_d, rho=rho, level=level)
+
+    def sensitivity_analysis(self, cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95, null_hypothesis=0.0):
+        """
+        Performs a sensitivity analysis to account for unobserved confounders.
+
+        The evaluated scenario is stored as a dictionary in the property ``sensitivity_params``.
+
+        Parameters
+        ----------
+        cf_y : float
+            Percentage of the residual variation of the outcome explained by latent/confounding variables.
+            Default is ``0.03``.
+
+        cf_d : float
+            Percentage gains in the variation of the Riesz representer generated by latent/confounding variables.
+            Default is ``0.03``.
+
+        rho : float
+            The correlation between the differences in short and long representations in the main regression and
+            Riesz representer. Has to be in [-1,1]. The absolute value determines the adversarial strength of the
+            confounding (maximizes at 1.0).
+            Default is ``1.0``.
+
+        level : float
+            The confidence level.
+            Default is ``0.95``.
+
+        null_hypothesis : float or numpy.ndarray
+            Null hypothesis for the effect. Determines the robustness values.
+            If it is a single float uses the same null hypothesis for all estimated parameters.
+            Else the array has to be of shape (n_thetas,).
+            Default is ``0.0``.
+
+        Returns
+        -------
+        self : object
+        """
+        # compute sensitivity analysis
+        sensitivity_dict = self._calc_sensitivity_analysis(cf_y=cf_y, cf_d=cf_d, rho=rho, level=level)
+
     def confint(self, joint=False, level=0.95):
         """
         Confidence intervals for DoubleML models.
diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index 78bc4e392..4a29125fc 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -221,3 +221,93 @@ def test_sensitivity_exceptions():
     msg = 'Sensitivity analysis is not implemented for this model.'
     with pytest.raises(NotImplementedError, match=msg):
         _ = dml_framework_no_sensitivity._calc_sensitivity_analysis(cf_y=0.1, cf_d=0.1, rho=1.0, level=0.95)
+
+    # test cf_y
+    msg = "cf_y must be of float type. 1 of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_1.sensitivity_analysis(cf_y=1)
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_1._calc_sensitivity_analysis(cf_y=1, cf_d=0.03, rho=1.0, level=0.95)
+
+    msg = r'cf_y must be in \[0,1\). 1.0 was passed.'
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_1.sensitivity_analysis(cf_y=1.0)
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_1._calc_sensitivity_analysis(cf_y=1.0, cf_d=0.03, rho=1.0, level=0.95)
+
+    # test cf_d
+    msg = "cf_d must be of float type. 1 of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_1.sensitivity_analysis(cf_y=0.1, cf_d=1)
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_1._calc_sensitivity_analysis(cf_y=0.1, cf_d=1, rho=1.0, level=0.95)
+
+    msg = r'cf_d must be in \[0,1\). 1.0 was passed.'
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_1.sensitivity_analysis(cf_y=0.1, cf_d=1.0)
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_1._calc_sensitivity_analysis(cf_y=0.1, cf_d=1.0, rho=1.0, level=0.95)
+
+    # test rho
+    msg = "rho must be of float type. 1 of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_1.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1)
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_1._calc_sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1, level=0.95)
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_1._calc_robustness_value(rho=1, null_hypothesis=0.0, level=0.95, idx_treatment=0)
+
+    msg = "rho must be of float type. 1 of type <class 'str'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_1.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho="1")
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_1._calc_sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho="1", level=0.95)
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_1._calc_robustness_value(rho="1", null_hypothesis=0.0, level=0.95, idx_treatment=0)
+
+    msg = r'The absolute value of rho must be in \[0,1\]. 1.1 was passed.'
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_1.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.1)
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_1._calc_sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.1, level=0.95)
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_1._calc_robustness_value(rho=1.1, null_hypothesis=0.0, level=0.95, idx_treatment=0)
+
+    # test level
+    msg = "The confidence level must be of float type. 1 of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_1.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.0, level=1)
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_1._calc_sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.0, level=1)
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_1._calc_robustness_value(rho=1.0, level=1, null_hypothesis=0.0, idx_treatment=0)
+
+    msg = r'The confidence level must be in \(0,1\). 1.0 was passed.'
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_1.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.0, level=1.0)
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_1._calc_sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.0, level=1.0)
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_1._calc_robustness_value(rho=1.0, level=1.0, null_hypothesis=0.0, idx_treatment=0)
+
+    msg = r'The confidence level must be in \(0,1\). 0.0 was passed.'
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_1.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.0, level=0.0)
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_1._calc_sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.0, level=0.0)
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_1._calc_robustness_value(rho=1.0, level=0.0, null_hypothesis=0.0, idx_treatment=0)
+
+    # test null_hypothesis
+    msg = "null_hypothesis has to be of type float or np.ndarry. 1 of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_1.sensitivity_analysis(null_hypothesis=1)
+    msg = r"null_hypothesis is numpy.ndarray but does not have the required shape \(1,\). Array of shape \(2,\) was passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_1.sensitivity_analysis(null_hypothesis=np.array([1, 2]))
+    msg = "null_hypothesis must be of float type. 1 of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_1._calc_robustness_value(null_hypothesis=1, level=0.95, rho=1.0, idx_treatment=0)
+    msg = r"null_hypothesis must be of float type. \[1\] of type <class 'numpy.ndarray'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_1._calc_robustness_value(null_hypothesis=np.array([1]), level=0.95, rho=1.0, idx_treatment=0)

From 6c19e7dfe90d28772e0e2120946cef25dfa676e7 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 17 Jun 2024 17:53:12 +0200
Subject: [PATCH 15/38] update sensitivity_initialisation

---
 doubleml/double_ml_framework.py             | 58 ++++++++++++++++-----
 doubleml/tests/test_framework_exceptions.py |  9 ++++
 2 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 8ad0e68ee..da48ba2c2 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -49,19 +49,8 @@ def __init__(
         if "is_cluster_data" in doubleml_dict.keys():
             self._is_cluster_data = doubleml_dict['is_cluster_data']
 
-        self._sensitivity_implemented = False
-        self._sensitivity_elements = None
-        if "sensitivity_elements" in doubleml_dict.keys():
-            # check input
-            if not isinstance(doubleml_dict['sensitivity_elements'], dict):
-                raise TypeError('sensitivity_elements must be a dictionary.')
-            expected_keys_sensitivity = ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2', 'riesz_rep']
-            if not all(key in doubleml_dict['sensitivity_elements'].keys() for key in expected_keys_sensitivity):
-                raise ValueError('The sensitivity_elements dict must contain the following '
-                                 'keys: ' + ', '.join(expected_keys_sensitivity))
-
-            self._sensitivity_elements = doubleml_dict['sensitivity_elements']
-            self._sensitivity_implemented = True
+        # initialize sensitivity analysis
+        self._sensitivity_implemented, self._sensitivity_elements = self._check_and_set_sensitivity_elements(doubleml_dict)
 
         # check if all sizes match
         _check_framework_shapes(self)
@@ -382,6 +371,19 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
         _check_in_zero_one(abs(rho), 'The absolute value of rho')
         _check_in_zero_one(level, 'The confidence level', include_zero=False, include_one=False)
 
+        # set elements for readability
+        sigma2 = self.sensitivity_elements['sigma2']
+        nu2 = self.sensitivity_elements['nu2']
+        psi_sigma = self.sensitivity_elements['psi_sigma2']
+        psi_nu = self.sensitivity_elements['psi_nu2']
+        psi_scaled = self._scaled_psi
+
+        if (np.any(sigma2 < 0)) | (np.any(nu2 < 0)):
+            raise ValueError('sensitivity_elements sigma2 and nu2 have to be positive. '
+                             f"Got sigma2 {str(sigma2)} and nu2 {str(nu2)}. "
+                             'Most likely this is due to low quality learners (especially propensity scores).')
+
+
     def _calc_robustness_value(self, null_hypothesis, level, rho, idx_treatment):
         _check_float(null_hypothesis, "null_hypothesis")
         _check_integer(idx_treatment, "idx_treatment", lower_bound=0, upper_bound=self._n_thetas-1)
@@ -641,6 +643,36 @@ def concat(objs):
     return new_obj
 
 
+def _check_and_set_sensitivity_elements(self, doubleml_dict):
+    if not ("senstivity_elements" in doubleml_dict.keys()):
+        sensitivity_implemented = False
+        sensitivity_elements = None
+
+    else:
+        if not isinstance(doubleml_dict['sensitivity_elements'], dict):
+            raise TypeError('sensitivity_elements must be a dictionary.')
+        expected_keys_sensitivity = ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2', 'riesz_rep']
+        if not all(key in doubleml_dict['sensitivity_elements'].keys() for key in expected_keys_sensitivity):
+            raise ValueError('The sensitivity_elements dict must contain the following '
+                             'keys: ' + ', '.join(expected_keys_sensitivity))
+
+        for key in expected_keys_sensitivity:
+            if not isinstance(doubleml_dict['sensitivity_elements'][key], np.ndarray):
+                raise TypeError(f'The sensitivity element {key} must be a numpy array.')
+
+        # set sensitivity elements
+        sensitivity_implemented = True
+        sensitivity_elements = {
+            'sigma2': doubleml_dict['sensitivity_elements']['sigma2'],
+            'nu2': doubleml_dict['sensitivity_elements']['nu2'],
+            'psi_sigma2': doubleml_dict['sensitivity_elements']['psi_sigma2'],
+            'psi_nu2': doubleml_dict['sensitivity_elements']['psi_nu2'],
+            'riesz_rep': doubleml_dict['sensitivity_elements']['riesz_rep'],
+        }
+
+    return sensitivity_implemented, sensitivity_elements
+
+
 def _check_framework_shapes(self):
     score_dim = (self._n_obs, self._n_thetas, self.n_rep)
     # check if all sizes match
diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index 4a29125fc..547dcb8c6 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -311,3 +311,12 @@ def test_sensitivity_exceptions():
     msg = r"null_hypothesis must be of float type. \[1\] of type <class 'numpy.ndarray'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = dml_framework_obj_1._calc_robustness_value(null_hypothesis=np.array([1]), level=0.95, rho=1.0, idx_treatment=0)
+
+    # test variances
+    sensitivity_elements = dict({'sigma2': 1.0, 'nu2': -2.4, 'psi_sigma2': 1.0, 'psi_nu2': 1.0, 'riesz_rep': 1.0})
+    _ = dml_irm._set_sensitivity_elements(sensitivity_elements=sensitivity_elements, i_rep=0, i_treat=0)
+    msg = ('sensitivity_elements sigma2 and nu2 have to be positive. '
+           r'Got sigma2 \[\[\[1.\]\]\] and nu2 \[\[\[-2.4\]\]\]. '
+           r'Most likely this is due to low quality learners \(especially propensity scores\).')
+    with pytest.raises(ValueError, match=msg):
+        dml_irm.sensitivity_analysis()
\ No newline at end of file

From a659b67f345ef8d0766f502020daf75c11f54513 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 17 Jun 2024 18:20:00 +0200
Subject: [PATCH 16/38] add variance check

---
 doubleml/double_ml_framework.py             | 147 ++++++++++----------
 doubleml/tests/test_framework_exceptions.py |  32 +++--
 2 files changed, 92 insertions(+), 87 deletions(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index da48ba2c2..439aa8c74 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -53,7 +53,7 @@ def __init__(
         self._sensitivity_implemented, self._sensitivity_elements = self._check_and_set_sensitivity_elements(doubleml_dict)
 
         # check if all sizes match
-        _check_framework_shapes(self)
+        self._check_framework_shapes()
         # initialize bootstrap distribution
         self._boot_t_stat = None
         self._boot_method = None
@@ -197,8 +197,8 @@ def __add__(self, other):
 
         if isinstance(other, DoubleMLFramework):
             # internal consistency check
-            _check_framework_shapes(self)
-            _check_framework_shapes(other)
+            self._check_framework_shapes()
+            other._check_framework_shapes()
             _check_framework_compatibility(self, other, check_treatments=True)
 
             all_thetas = self._all_thetas + other._all_thetas
@@ -257,8 +257,8 @@ def __sub__(self, other):
 
         if isinstance(other, DoubleMLFramework):
             # internal consistency check
-            _check_framework_shapes(self)
-            _check_framework_shapes(other)
+            self._check_framework_shapes()
+            other._check_framework_shapes()
             _check_framework_compatibility(self, other, check_treatments=True)
 
             all_thetas = self._all_thetas - other._all_thetas
@@ -383,7 +383,6 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
                              f"Got sigma2 {str(sigma2)} and nu2 {str(nu2)}. "
                              'Most likely this is due to low quality learners (especially propensity scores).')
 
-
     def _calc_robustness_value(self, null_hypothesis, level, rho, idx_treatment):
         _check_float(null_hypothesis, "null_hypothesis")
         _check_integer(idx_treatment, "idx_treatment", lower_bound=0, upper_bound=self._n_thetas-1)
@@ -430,6 +429,7 @@ def sensitivity_analysis(self, cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95, null_h
         """
         # compute sensitivity analysis
         sensitivity_dict = self._calc_sensitivity_analysis(cf_y=cf_y, cf_d=cf_d, rho=rho, level=level)
+        _ = self._calc_robustness_value(null_hypothesis=null_hypothesis, level=level, rho=rho, idx_treatment=0)
 
     def confint(self, joint=False, level=0.95):
         """
@@ -592,6 +592,71 @@ def p_adjust(self, method='romano-wolf'):
 
         return df_p_vals, all_p_vals_corrected
 
+    def _check_and_set_sensitivity_elements(self, doubleml_dict):
+        if not ("sensitivity_elements" in doubleml_dict.keys()):
+            sensitivity_implemented = False
+            sensitivity_elements = None
+
+        else:
+            if not isinstance(doubleml_dict['sensitivity_elements'], dict):
+                raise TypeError('sensitivity_elements must be a dictionary.')
+            expected_keys_sensitivity = ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2', 'riesz_rep']
+            if not all(key in doubleml_dict['sensitivity_elements'].keys() for key in expected_keys_sensitivity):
+                raise ValueError('The sensitivity_elements dict must contain the following '
+                                 'keys: ' + ', '.join(expected_keys_sensitivity))
+
+            for key in expected_keys_sensitivity:
+                if not isinstance(doubleml_dict['sensitivity_elements'][key], np.ndarray):
+                    raise TypeError(f'The sensitivity element {key} must be a numpy array.')
+
+            # set sensitivity elements
+            sensitivity_implemented = True
+            sensitivity_elements = {
+                'sigma2': doubleml_dict['sensitivity_elements']['sigma2'],
+                'nu2': doubleml_dict['sensitivity_elements']['nu2'],
+                'psi_sigma2': doubleml_dict['sensitivity_elements']['psi_sigma2'],
+                'psi_nu2': doubleml_dict['sensitivity_elements']['psi_nu2'],
+                'riesz_rep': doubleml_dict['sensitivity_elements']['riesz_rep'],
+            }
+
+        return sensitivity_implemented, sensitivity_elements
+
+    def _check_framework_shapes(self):
+        score_dim = (self._n_obs, self._n_thetas, self.n_rep)
+        # check if all sizes match
+        if self._thetas.shape != (self._n_thetas,):
+            raise ValueError(f'The shape of thetas does not match the expected shape ({self._n_thetas},).')
+        if self._ses.shape != (self._n_thetas,):
+            raise ValueError(f'The shape of ses does not match the expected shape ({self._n_thetas},).')
+        if self._all_thetas.shape != (self._n_thetas, self._n_rep):
+            raise ValueError(f'The shape of all_thetas does not match the expected shape ({self._n_thetas}, {self._n_rep}).')
+        if self._all_ses.shape != (self._n_thetas, self._n_rep):
+            raise ValueError(f'The shape of all_ses does not match the expected shape ({self._n_thetas}, {self._n_rep}).')
+        if self._var_scaling_factors.shape != (self._n_thetas,):
+            raise ValueError(f'The shape of var_scaling_factors does not match the expected shape ({self._n_thetas},).')
+        # dimension of scaled_psi is n_obs x n_thetas x n_rep (per default)
+        if self._scaled_psi.shape != score_dim:
+            raise ValueError(('The shape of scaled_psi does not match the expected '
+                              f'shape ({self._n_obs}, {self._n_thetas}, {self._n_rep}).'))
+
+        if self._sensitivity_implemented:
+            if self._sensitivity_elements['sigma2'].shape != (1, self._n_thetas, self.n_rep):
+                raise ValueError('The shape of sigma2 does not match the expected shape '
+                                 f'(1, {self._n_thetas}, {self._n_rep}).')
+            if self._sensitivity_elements['nu2'].shape != (1, self._n_thetas, self.n_rep):
+                raise ValueError(f'The shape of nu2 does not match the expected shape (1, {self._n_thetas}, {self._n_rep}).')
+            if self._sensitivity_elements['psi_sigma2'].shape != score_dim:
+                raise ValueError(('The shape of psi_sigma2 does not match the expected '
+                                 f'shape ({self._n_obs}, {self._n_thetas}, {self._n_rep}).'))
+            if self._sensitivity_elements['psi_nu2'].shape != score_dim:
+                raise ValueError(('The shape of psi_nu2 does not match the expected '
+                                 f'shape ({self._n_obs}, {self._n_thetas}, {self._n_rep}).'))
+            if self._sensitivity_elements['riesz_rep'].shape != score_dim:
+                raise ValueError(('The shape of riesz_rep does not match the expected '
+                                 f'shape ({self._n_obs}, {self._n_thetas}, {self._n_rep}).'))
+
+        return None
+
 
 def concat(objs):
     """
@@ -604,7 +669,7 @@ def concat(objs):
         raise TypeError('All objects must be of type DoubleMLFramework.')
 
     # check on internal consitency of objects
-    _ = [_check_framework_shapes(obj) for obj in objs]
+    _ = [obj._check_framework_shapes() for obj in objs]
     # check if all objects are compatible in n_obs and n_rep
     _ = [_check_framework_compatibility(objs[0], obj, check_treatments=False) for obj in objs[1:]]
 
@@ -638,72 +703,6 @@ def concat(objs):
     new_obj = DoubleMLFramework(doubleml_dict)
 
     # check internal consistency of new object
-    _check_framework_shapes(new_obj)
+    new_obj._check_framework_shapes()
 
     return new_obj
-
-
-def _check_and_set_sensitivity_elements(self, doubleml_dict):
-    if not ("senstivity_elements" in doubleml_dict.keys()):
-        sensitivity_implemented = False
-        sensitivity_elements = None
-
-    else:
-        if not isinstance(doubleml_dict['sensitivity_elements'], dict):
-            raise TypeError('sensitivity_elements must be a dictionary.')
-        expected_keys_sensitivity = ['sigma2', 'nu2', 'psi_sigma2', 'psi_nu2', 'riesz_rep']
-        if not all(key in doubleml_dict['sensitivity_elements'].keys() for key in expected_keys_sensitivity):
-            raise ValueError('The sensitivity_elements dict must contain the following '
-                             'keys: ' + ', '.join(expected_keys_sensitivity))
-
-        for key in expected_keys_sensitivity:
-            if not isinstance(doubleml_dict['sensitivity_elements'][key], np.ndarray):
-                raise TypeError(f'The sensitivity element {key} must be a numpy array.')
-
-        # set sensitivity elements
-        sensitivity_implemented = True
-        sensitivity_elements = {
-            'sigma2': doubleml_dict['sensitivity_elements']['sigma2'],
-            'nu2': doubleml_dict['sensitivity_elements']['nu2'],
-            'psi_sigma2': doubleml_dict['sensitivity_elements']['psi_sigma2'],
-            'psi_nu2': doubleml_dict['sensitivity_elements']['psi_nu2'],
-            'riesz_rep': doubleml_dict['sensitivity_elements']['riesz_rep'],
-        }
-
-    return sensitivity_implemented, sensitivity_elements
-
-
-def _check_framework_shapes(self):
-    score_dim = (self._n_obs, self._n_thetas, self.n_rep)
-    # check if all sizes match
-    if self._thetas.shape != (self._n_thetas,):
-        raise ValueError(f'The shape of thetas does not match the expected shape ({self._n_thetas},).')
-    if self._ses.shape != (self._n_thetas,):
-        raise ValueError(f'The shape of ses does not match the expected shape ({self._n_thetas},).')
-    if self._all_thetas.shape != (self._n_thetas, self._n_rep):
-        raise ValueError(f'The shape of all_thetas does not match the expected shape ({self._n_thetas}, {self._n_rep}).')
-    if self._all_ses.shape != (self._n_thetas, self._n_rep):
-        raise ValueError(f'The shape of all_ses does not match the expected shape ({self._n_thetas}, {self._n_rep}).')
-    if self._var_scaling_factors.shape != (self._n_thetas,):
-        raise ValueError(f'The shape of var_scaling_factors does not match the expected shape ({self._n_thetas},).')
-    # dimension of scaled_psi is n_obs x n_thetas x n_rep (per default)
-    if self._scaled_psi.shape != score_dim:
-        raise ValueError(('The shape of scaled_psi does not match the expected '
-                          f'shape ({self._n_obs}, {self._n_thetas}, {self._n_rep}).'))
-
-    if self._sensitivity_implemented:
-        if self._sensitivity_elements['sigma2'].shape != (1, self._n_thetas, self.n_rep):
-            raise ValueError(f'The shape of sigma2 does not match the expected shape (1, {self._n_thetas}, {self._n_rep}).')
-        if self._sensitivity_elements['nu2'].shape != (1, self._n_thetas, self.n_rep):
-            raise ValueError(f'The shape of nu2 does not match the expected shape (1, {self._n_thetas}, {self._n_rep}).')
-        if self._sensitivity_elements['psi_sigma2'].shape != score_dim:
-            raise ValueError(('The shape of psi_sigma2 does not match the expected '
-                              f'shape ({self._n_obs}, {self._n_thetas}, {self._n_rep}).'))
-        if self._sensitivity_elements['psi_nu2'].shape != score_dim:
-            raise ValueError(('The shape of psi_nu2 does not match the expected '
-                              f'shape ({self._n_obs}, {self._n_thetas}, {self._n_rep}).'))
-        if self._sensitivity_elements['riesz_rep'].shape != score_dim:
-            raise ValueError(('The shape of riesz_rep does not match the expected '
-                              f'shape ({self._n_obs}, {self._n_thetas}, {self._n_rep}).'))
-
-    return None
diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index 547dcb8c6..bc5e22339 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -22,7 +22,6 @@
     'riesz_rep': np.ones(shape=(n_obs, n_thetas, n_rep))
 }
 
-
 # combine objects and estimate parameters
 dml_framework_obj_1 = DoubleMLFramework(doubleml_dict)
 
@@ -299,12 +298,6 @@ def test_sensitivity_exceptions():
         _ = dml_framework_obj_1._calc_robustness_value(rho=1.0, level=0.0, null_hypothesis=0.0, idx_treatment=0)
 
     # test null_hypothesis
-    msg = "null_hypothesis has to be of type float or np.ndarry. 1 of type <class 'int'> was passed."
-    with pytest.raises(TypeError, match=msg):
-        _ = dml_framework_obj_1.sensitivity_analysis(null_hypothesis=1)
-    msg = r"null_hypothesis is numpy.ndarray but does not have the required shape \(1,\). Array of shape \(2,\) was passed."
-    with pytest.raises(ValueError, match=msg):
-        _ = dml_framework_obj_1.sensitivity_analysis(null_hypothesis=np.array([1, 2]))
     msg = "null_hypothesis must be of float type. 1 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = dml_framework_obj_1._calc_robustness_value(null_hypothesis=1, level=0.95, rho=1.0, idx_treatment=0)
@@ -312,11 +305,24 @@ def test_sensitivity_exceptions():
     with pytest.raises(TypeError, match=msg):
         _ = dml_framework_obj_1._calc_robustness_value(null_hypothesis=np.array([1]), level=0.95, rho=1.0, idx_treatment=0)
 
+    sensitivity_dict = generate_dml_dict(psi_a, psi_b)
+    sensitivity_dict['sensitivity_elements'] = {
+        'sigma2': np.ones(shape=(1, n_thetas, n_rep)),
+        'nu2': -1.0 * np.ones(shape=(1, n_thetas, n_rep)),
+        'psi_sigma2': np.ones(shape=(n_obs, n_thetas, n_rep)),
+        'psi_nu2': np.ones(shape=(n_obs, n_thetas, n_rep)),
+        'riesz_rep': np.ones(shape=(n_obs, n_thetas, n_rep))
+    }
+    dml_framework_sensitivity = DoubleMLFramework(sensitivity_dict)
+
     # test variances
-    sensitivity_elements = dict({'sigma2': 1.0, 'nu2': -2.4, 'psi_sigma2': 1.0, 'psi_nu2': 1.0, 'riesz_rep': 1.0})
-    _ = dml_irm._set_sensitivity_elements(sensitivity_elements=sensitivity_elements, i_rep=0, i_treat=0)
-    msg = ('sensitivity_elements sigma2 and nu2 have to be positive. '
-           r'Got sigma2 \[\[\[1.\]\]\] and nu2 \[\[\[-2.4\]\]\]. '
-           r'Most likely this is due to low quality learners \(especially propensity scores\).')
+    msg = (
+        r'sensitivity_elements sigma2 and nu2 have to be positive\. '
+        r'Got sigma2 \[\[\[1\. 1\. 1\. 1\. 1\.\]\n\s+\[1\. 1\. 1\. 1\. 1\.\]\]\] '
+        r'and nu2 \[\[\[-1\. -1\. -1\. -1\. -1\.\]\n\s+\[-1\. -1\. -1\. -1\. -1\.\]\]\]\. '
+        r'Most likely this is due to low quality learners \(especially propensity scores\)\.'
+    )
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_sensitivity._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95)
     with pytest.raises(ValueError, match=msg):
-        dml_irm.sensitivity_analysis()
\ No newline at end of file
+        dml_framework_sensitivity.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.0, level=0.95)

From ceb911785c933292f0384c25e347ad40729a0f51 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 17 Jun 2024 18:47:55 +0200
Subject: [PATCH 17/38] further update framework

---
 doubleml/double_ml_framework.py  | 9 +++++++++
 doubleml/tests/test_framework.py | 3 ++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 439aa8c74..55e9b64b7 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -383,6 +383,15 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
                              f"Got sigma2 {str(sigma2)} and nu2 {str(nu2)}. "
                              'Most likely this is due to low quality learners (especially propensity scores).')
 
+        # elementwise operations
+        confounding_strength = np.multiply(np.abs(rho), np.sqrt(np.multiply(cf_y, np.divide(cf_d, 1.0-cf_d))))
+        sensitivity_scaling = np.sqrt(np.multiply(sigma2, nu2))
+
+        # sigma2 and nu2 are of shape (1, n_thetas, n_rep), whereas the all_thetas is of shape (n_thetas, n_rep)
+        all_theta_lower = self.all_coef - np.multiply(np.transpose(np.squeeze(S, axis=0)), confounding_strength)
+        all_theta_upper = self.all_coef + np.multiply(np.transpose(np.squeeze(S, axis=0)), confounding_strength)
+
+
     def _calc_robustness_value(self, null_hypothesis, level, rho, idx_treatment):
         _check_float(null_hypothesis, "null_hypothesis")
         _check_integer(idx_treatment, "idx_treatment", lower_bound=0, upper_bound=self._n_thetas-1)
diff --git a/doubleml/tests/test_framework.py b/doubleml/tests/test_framework.py
index 500965a86..9bfc2b95b 100644
--- a/doubleml/tests/test_framework.py
+++ b/doubleml/tests/test_framework.py
@@ -170,7 +170,7 @@ def dml_framework_from_doubleml_fixture(n_rep):
     ci = dml_framework_obj.confint(joint=False, level=0.95)
     dml_framework_obj.bootstrap(method='normal')
     ci_joint = dml_framework_obj.confint(joint=True, level=0.95)
-    dml_framework_obj._calc_sensitivity_analysis()
+    dml_framework_obj.sensitivity_analysis()
 
     # add objects
     dml_framework_obj_add_obj = dml_framework_obj + dml_framework_obj
@@ -326,3 +326,4 @@ def test_dml_framework_sensitivity(dml_framework_from_doubleml_fixture):
     for key in score_keys:
         assert dml_framework_from_doubleml_fixture['dml_framework_obj_concat']._sensitivity_elements[key].shape == \
             (n_obs, 2, n_rep)
+

From 86e2ef029e28d577f410e910b3544cd5ca3c6020 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 18 Jun 2024 07:25:17 +0200
Subject: [PATCH 18/38] move sensitivity test to extra file

---
 doubleml/tests/test_framework.py             | 33 --------
 doubleml/tests/test_framework_sensitivity.py | 85 ++++++++++++++++++++
 2 files changed, 85 insertions(+), 33 deletions(-)
 create mode 100644 doubleml/tests/test_framework_sensitivity.py

diff --git a/doubleml/tests/test_framework.py b/doubleml/tests/test_framework.py
index 9bfc2b95b..0a447420e 100644
--- a/doubleml/tests/test_framework.py
+++ b/doubleml/tests/test_framework.py
@@ -170,7 +170,6 @@ def dml_framework_from_doubleml_fixture(n_rep):
     ci = dml_framework_obj.confint(joint=False, level=0.95)
     dml_framework_obj.bootstrap(method='normal')
     ci_joint = dml_framework_obj.confint(joint=True, level=0.95)
-    dml_framework_obj.sensitivity_analysis()
 
     # add objects
     dml_framework_obj_add_obj = dml_framework_obj + dml_framework_obj
@@ -295,35 +294,3 @@ def test_dml_framework_from_doubleml_ci(dml_framework_from_doubleml_fixture):
     assert isinstance(dml_framework_from_doubleml_fixture['ci_joint_mul_obj'], pd.DataFrame)
     assert isinstance(dml_framework_from_doubleml_fixture['ci_concat'], pd.DataFrame)
     assert isinstance(dml_framework_from_doubleml_fixture['ci_joint_concat'], pd.DataFrame)
-
-
-@pytest.mark.ci
-def test_dml_framework_sensitivity(dml_framework_from_doubleml_fixture):
-    n_rep = dml_framework_from_doubleml_fixture['dml_framework_obj'].n_rep
-    n_thetas = dml_framework_from_doubleml_fixture['dml_framework_obj'].n_thetas
-    n_obs = dml_framework_from_doubleml_fixture['dml_framework_obj'].n_obs
-
-    object_list = ['dml_framework_obj',
-                   'dml_framework_obj_add_obj',
-                   'dml_framework_obj_sub_obj',
-                   'dml_framework_obj_mul_obj']
-    var_keys = ['sigma2', 'nu2']
-    score_keys = ['psi_sigma2', 'psi_nu2', 'riesz_rep']
-
-    for obj in object_list:
-        assert dml_framework_from_doubleml_fixture[obj]._sensitivity_implemented
-        for key in var_keys:
-            assert dml_framework_from_doubleml_fixture[obj]._sensitivity_elements[key].shape == \
-                (1, n_thetas, n_rep)
-        for key in score_keys:
-            assert dml_framework_from_doubleml_fixture[obj]._sensitivity_elements[key].shape == \
-                (n_obs, n_thetas, n_rep)
-
-    # separate test for concat
-    for key in var_keys:
-        assert dml_framework_from_doubleml_fixture['dml_framework_obj_concat']._sensitivity_elements[key].shape == \
-            (1, 2, n_rep)
-    for key in score_keys:
-        assert dml_framework_from_doubleml_fixture['dml_framework_obj_concat']._sensitivity_elements[key].shape == \
-            (n_obs, 2, n_rep)
-
diff --git a/doubleml/tests/test_framework_sensitivity.py b/doubleml/tests/test_framework_sensitivity.py
new file mode 100644
index 000000000..adaccb75f
--- /dev/null
+++ b/doubleml/tests/test_framework_sensitivity.py
@@ -0,0 +1,85 @@
+import pytest
+
+from doubleml.datasets import make_irm_data
+from doubleml.irm.irm import DoubleMLIRM
+from doubleml.double_ml_framework import concat
+
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+
+@pytest.fixture(scope='module',
+                params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope='module')
+def dml_framework_sensitivity_fixture(n_rep):
+    dml_data = make_irm_data()
+
+    ml_g = LinearRegression()
+    ml_m = LogisticRegression()
+
+    dml_irm_obj = DoubleMLIRM(dml_data, ml_g, ml_m, n_rep=n_rep)
+    dml_irm_obj.fit()
+    dml_framework_obj = dml_irm_obj.construct_framework()
+    dml_framework_obj.sensitivity_analysis()
+
+    # add objects
+    dml_framework_obj_add_obj = dml_framework_obj + dml_framework_obj
+
+    # substract objects
+    dml_data_2 = make_irm_data()
+    dml_irm_obj_2 = DoubleMLIRM(dml_data_2, ml_g, ml_m, n_rep=n_rep)
+    dml_irm_obj_2.fit()
+    dml_framework_obj_2 = dml_irm_obj_2.construct_framework()
+    dml_framework_obj_sub_obj = dml_framework_obj - dml_framework_obj_2
+
+    # multiply objects
+    dml_framework_obj_mul_obj = dml_framework_obj * 2
+
+    # concat objects
+    dml_framework_obj_concat = concat([dml_framework_obj, dml_framework_obj])
+
+    result_dict = {
+        'dml_obj': dml_irm_obj,
+        'dml_obj_2': dml_irm_obj_2,
+        'dml_framework_obj': dml_framework_obj,
+        'dml_framework_obj_add_obj': dml_framework_obj_add_obj,
+        'dml_framework_obj_sub_obj': dml_framework_obj_sub_obj,
+        'dml_framework_obj_mul_obj': dml_framework_obj_mul_obj,
+        'dml_framework_obj_concat': dml_framework_obj_concat,
+        'n_rep': n_rep,
+    }
+    return result_dict
+
+
+@pytest.mark.ci
+def test_dml_framework_sensitivity_shapes(dml_framework_sensitivity_fixture):
+    n_rep = dml_framework_sensitivity_fixture['dml_framework_obj'].n_rep
+    n_thetas = dml_framework_sensitivity_fixture['dml_framework_obj'].n_thetas
+    n_obs = dml_framework_sensitivity_fixture['dml_framework_obj'].n_obs
+
+    object_list = ['dml_framework_obj',
+                   'dml_framework_obj_add_obj',
+                   'dml_framework_obj_sub_obj',
+                   'dml_framework_obj_mul_obj']
+    var_keys = ['sigma2', 'nu2']
+    score_keys = ['psi_sigma2', 'psi_nu2', 'riesz_rep']
+
+    for obj in object_list:
+        assert dml_framework_sensitivity_fixture[obj]._sensitivity_implemented
+        for key in var_keys:
+            assert dml_framework_sensitivity_fixture[obj]._sensitivity_elements[key].shape == \
+                (1, n_thetas, n_rep)
+        for key in score_keys:
+            assert dml_framework_sensitivity_fixture[obj]._sensitivity_elements[key].shape == \
+                (n_obs, n_thetas, n_rep)
+
+    # separate test for concat
+    for key in var_keys:
+        assert dml_framework_sensitivity_fixture['dml_framework_obj_concat']._sensitivity_elements[key].shape == \
+            (1, 2, n_rep)
+    for key in score_keys:
+        assert dml_framework_sensitivity_fixture['dml_framework_obj_concat']._sensitivity_elements[key].shape == \
+            (n_obs, 2, n_rep)

From 1547913c11cff992a1d67d8dae2cdd2142ba38e2 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 18 Jun 2024 07:25:41 +0200
Subject: [PATCH 19/38] raise error for concat clustering

---
 doubleml/double_ml_framework.py             | 52 +++++++++++++++++++--
 doubleml/tests/test_framework_exceptions.py |  7 +++
 2 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 55e9b64b7..102ce6991 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -4,7 +4,7 @@
 from scipy.stats import norm
 from statsmodels.stats.multitest import multipletests
 
-from .utils._estimation import _draw_weights, _aggregate_coefs_and_ses
+from .utils._estimation import _draw_weights, _aggregate_coefs_and_ses, _var_est
 from .utils._checks import _check_bootstrap, _check_framework_compatibility, _check_in_zero_one, \
     _check_float, _check_integer
 
@@ -388,9 +388,49 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
         sensitivity_scaling = np.sqrt(np.multiply(sigma2, nu2))
 
         # sigma2 and nu2 are of shape (1, n_thetas, n_rep), whereas the all_thetas is of shape (n_thetas, n_rep)
-        all_theta_lower = self.all_coef - np.multiply(np.transpose(np.squeeze(S, axis=0)), confounding_strength)
-        all_theta_upper = self.all_coef + np.multiply(np.transpose(np.squeeze(S, axis=0)), confounding_strength)
+        all_theta_lower = self.all_thetas - np.multiply(np.squeeze(sensitivity_scaling, axis=0), confounding_strength)
+        all_theta_upper = self.all_thetas + np.multiply(np.squeeze(sensitivity_scaling, axis=0), confounding_strength)
 
+        psi_variances = np.multiply(sigma2, psi_nu) + np.multiply(nu2, psi_sigma)
+        psi_bias = np.multiply(np.divide(confounding_strength, np.multiply(2.0, sensitivity_scaling)), psi_variances)
+        psi_lower = psi_scaled - psi_bias
+        psi_upper = psi_scaled + psi_bias
+
+        # shape (n_thetas, n_reps); includes scaling with n^{-1/2}
+        all_sigma_lower = np.full_like(all_theta_lower, fill_value=np.nan)
+        all_sigma_upper = np.full_like(all_theta_upper, fill_value=np.nan)
+
+        for i_rep in range(self.n_rep):
+            for i_theta in range(self.n_thetas):
+
+                if not self._is_cluster_data:
+                    cluster_vars = None
+                    smpls_cluster = None
+                    n_folds_per_cluster = None
+                else:
+                    cluster_vars = self._dml_data.cluster_vars
+                    smpls_cluster = self._smpls_cluster[i_rep]
+                    n_folds_per_cluster = self._n_folds_per_cluster
+
+                sigma2_lower_hat, _ = _var_est(psi=psi_lower[:, i_theta, i_rep],
+                                               psi_deriv=np.ones_like(psi_lower[:, i_theta, i_rep]),
+                                               smpls=self._smpls[i_rep],
+                                               is_cluster_data=self._is_cluster_data,
+                                               cluster_vars=cluster_vars,
+                                               smpls_cluster=smpls_cluster,
+                                               n_folds_per_cluster=n_folds_per_cluster)
+                sigma2_upper_hat, _ = _var_est(psi=psi_upper[:, i_theta, i_rep],
+                                               psi_deriv=np.ones_like(psi_upper[:, i_theta, i_rep]),
+                                               smpls=self._smpls[i_rep],
+                                               is_cluster_data=self._is_cluster_data,
+                                               cluster_vars=cluster_vars,
+                                               smpls_cluster=smpls_cluster,
+                                               n_folds_per_cluster=n_folds_per_cluster)
+
+                all_sigma_lower[i_theta, i_rep] = np.sqrt(sigma2_lower_hat)
+                all_sigma_upper[i_theta, i_rep] = np.sqrt(sigma2_upper_hat)
+
+        return
 
     def _calc_robustness_value(self, null_hypothesis, level, rho, idx_treatment):
         _check_float(null_hypothesis, "null_hypothesis")
@@ -690,7 +730,11 @@ def concat(objs):
     thetas = np.concatenate([obj.thetas for obj in objs], axis=0)
     ses = np.concatenate([obj.ses for obj in objs], axis=0)
 
-    is_cluster_data = any(obj._is_cluster_data for obj in objs)
+    if any(obj._is_cluster_data for obj in objs):
+        raise NotImplementedError('concat not yet implemented with clustering.')
+    else:
+        is_cluster_data = False
+
     doubleml_dict = {
         'thetas': thetas,
         'ses': ses,
diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index bc5e22339..6dc43ccaf 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -202,6 +202,13 @@ def test_operation_exceptions():
         dml_framework_obj_2 = DoubleMLFramework(doubleml_dict_2)
         _ = concat([dml_framework_obj_1, dml_framework_obj_2])
 
+    msg = 'concat not yet implemented with clustering.'
+    with pytest.raises(NotImplementedError, match=msg):
+        doubleml_dict_cluster = generate_dml_dict(psi_a_2, psi_b_2)
+        doubleml_dict_cluster['is_cluster_data'] = True
+        dml_framework_obj_cluster = DoubleMLFramework(doubleml_dict_cluster)
+        _ = concat([dml_framework_obj_2, dml_framework_obj_cluster])
+
 
 @pytest.mark.ci
 def test_p_adjust_exceptions():

From 9e6344072fdb3051be6a050b7a61a60043fd9755 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 18 Jun 2024 07:39:36 +0200
Subject: [PATCH 20/38] add input check for cluster data

---
 doubleml/double_ml_framework.py             | 9 ++++++---
 doubleml/tests/test_framework_exceptions.py | 6 ++++++
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 102ce6991..93fc26a31 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -6,7 +6,7 @@
 
 from .utils._estimation import _draw_weights, _aggregate_coefs_and_ses, _var_est
 from .utils._checks import _check_bootstrap, _check_framework_compatibility, _check_in_zero_one, \
-    _check_float, _check_integer
+    _check_float, _check_integer, _check_bool
 
 
 class DoubleMLFramework():
@@ -47,6 +47,7 @@ def __init__(
         self._scaled_psi = doubleml_dict['scaled_psi']
 
         if "is_cluster_data" in doubleml_dict.keys():
+            _check_bool(doubleml_dict['is_cluster_data'], 'is_cluster_data')
             self._is_cluster_data = doubleml_dict['is_cluster_data']
 
         # initialize sensitivity analysis
@@ -404,24 +405,26 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
             for i_theta in range(self.n_thetas):
 
                 if not self._is_cluster_data:
+                    smpls = None
                     cluster_vars = None
                     smpls_cluster = None
                     n_folds_per_cluster = None
                 else:
+                    smpls = self._smpls[i_rep]
                     cluster_vars = self._dml_data.cluster_vars
                     smpls_cluster = self._smpls_cluster[i_rep]
                     n_folds_per_cluster = self._n_folds_per_cluster
 
                 sigma2_lower_hat, _ = _var_est(psi=psi_lower[:, i_theta, i_rep],
                                                psi_deriv=np.ones_like(psi_lower[:, i_theta, i_rep]),
-                                               smpls=self._smpls[i_rep],
+                                               smpls=smpls,
                                                is_cluster_data=self._is_cluster_data,
                                                cluster_vars=cluster_vars,
                                                smpls_cluster=smpls_cluster,
                                                n_folds_per_cluster=n_folds_per_cluster)
                 sigma2_upper_hat, _ = _var_est(psi=psi_upper[:, i_theta, i_rep],
                                                psi_deriv=np.ones_like(psi_upper[:, i_theta, i_rep]),
-                                               smpls=self._smpls[i_rep],
+                                               smpls=smpls,
                                                is_cluster_data=self._is_cluster_data,
                                                cluster_vars=cluster_vars,
                                                smpls_cluster=smpls_cluster,
diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index 6dc43ccaf..f39ae2b2c 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -115,6 +115,12 @@ def test_input_exceptions():
         test_dict['sensitivity_elements']['riesz_rep'] = np.ones(shape=(n_obs, n_thetas, n_rep, 3))
         DoubleMLFramework(test_dict)
 
+    msg = "s_cluster_data has to be boolean. 1.0 of type <class 'float'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        test_dict = copy.deepcopy(doubleml_dict)
+        test_dict['is_cluster_data'] = 1.0
+        DoubleMLFramework(test_dict)
+
 
 def test_operation_exceptions():
     # addition

From c36dcf0f186ed0455b56e9472ee6387b617e9b78 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 18 Jun 2024 07:51:40 +0200
Subject: [PATCH 21/38] further input check for cluster data

---
 doubleml/double_ml_framework.py             | 16 +++++++++++++---
 doubleml/tests/test_framework_exceptions.py |  8 +++++++-
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 93fc26a31..69671bf5a 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -46,9 +46,8 @@ def __init__(
         self._var_scaling_factors = doubleml_dict['var_scaling_factors']
         self._scaled_psi = doubleml_dict['scaled_psi']
 
-        if "is_cluster_data" in doubleml_dict.keys():
-            _check_bool(doubleml_dict['is_cluster_data'], 'is_cluster_data')
-            self._is_cluster_data = doubleml_dict['is_cluster_data']
+        # initialize cluster data
+        self._check_and_set_cluster_data(doubleml_dict)
 
         # initialize sensitivity analysis
         self._sensitivity_implemented, self._sensitivity_elements = self._check_and_set_sensitivity_elements(doubleml_dict)
@@ -644,6 +643,17 @@ def p_adjust(self, method='romano-wolf'):
 
         return df_p_vals, all_p_vals_corrected
 
+    def _check_and_set_cluster_data(self, doubleml_dict):
+        if "is_cluster_data" in doubleml_dict.keys():
+            _check_bool(doubleml_dict['is_cluster_data'], 'is_cluster_data')
+            self._is_cluster_data = doubleml_dict['is_cluster_data']
+
+        if self._is_cluster_data:
+            if not ("cluster_dict" in doubleml_dict.keys()):
+                raise ValueError('If is_cluster_data is True, cluster_dict must be provided.')
+
+        return
+
     def _check_and_set_sensitivity_elements(self, doubleml_dict):
         if not ("sensitivity_elements" in doubleml_dict.keys()):
             sensitivity_implemented = False
diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index f39ae2b2c..69520594b 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -115,12 +115,18 @@ def test_input_exceptions():
         test_dict['sensitivity_elements']['riesz_rep'] = np.ones(shape=(n_obs, n_thetas, n_rep, 3))
         DoubleMLFramework(test_dict)
 
-    msg = "s_cluster_data has to be boolean. 1.0 of type <class 'float'> was passed."
+    msg = "is_cluster_data has to be boolean. 1.0 of type <class 'float'> was passed."
     with pytest.raises(TypeError, match=msg):
         test_dict = copy.deepcopy(doubleml_dict)
         test_dict['is_cluster_data'] = 1.0
         DoubleMLFramework(test_dict)
 
+    msg = "If is_cluster_data is True, cluster_dict must be provided."
+    with pytest.raises(ValueError, match=msg):
+        test_dict = copy.deepcopy(doubleml_dict)
+        test_dict['is_cluster_data'] = True
+        DoubleMLFramework(test_dict)
+
 
 def test_operation_exceptions():
     # addition

From 47d9b76ffca607964e7f7fdfd02f913b1a2c076c Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 18 Jun 2024 07:58:10 +0200
Subject: [PATCH 22/38] update set sensitivity elements

---
 doubleml/double_ml_framework.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 69671bf5a..6e11920b7 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -50,7 +50,7 @@ def __init__(
         self._check_and_set_cluster_data(doubleml_dict)
 
         # initialize sensitivity analysis
-        self._sensitivity_implemented, self._sensitivity_elements = self._check_and_set_sensitivity_elements(doubleml_dict)
+        self._check_and_set_sensitivity_elements(doubleml_dict)
 
         # check if all sizes match
         self._check_framework_shapes()
@@ -681,7 +681,10 @@ def _check_and_set_sensitivity_elements(self, doubleml_dict):
                 'riesz_rep': doubleml_dict['sensitivity_elements']['riesz_rep'],
             }
 
-        return sensitivity_implemented, sensitivity_elements
+        self._sensitivity_implemented = sensitivity_implemented
+        self._sensitivity_elements = sensitivity_elements
+
+        return
 
     def _check_framework_shapes(self):
         score_dim = (self._n_obs, self._n_thetas, self.n_rep)

From ea7773a379e4dab7a2baa91e7ae707dc255b7b02 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 18 Jun 2024 08:36:26 +0200
Subject: [PATCH 23/38] add cluster dict to doubleml class

---
 doubleml/double_ml.py                       | 11 +++++++++++
 doubleml/double_ml_framework.py             | 10 ++++++++++
 doubleml/tests/test_framework_exceptions.py | 15 +++++++++++++++
 3 files changed, 36 insertions(+)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index f1f383b1b..be7bcc464 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -552,6 +552,17 @@ def construct_framework(self):
                 }
             })
 
+        if self._is_cluster_data:
+            doubleml_dict.update({
+                'is_cluster_data': True,
+                'cluster_dict': {
+                    'smpls': self._smpls,
+                    'smpls_cluster': self._smpls_cluster,
+                    'cluster_vars': self._dml_data.cluster_vars,
+                    'n_folds_per_cluster': self._n_folds_per_cluster,
+                }
+            })
+
         doubleml_framework = DoubleMLFramework(doubleml_dict)
         return doubleml_framework
 
diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 6e11920b7..c82797a4d 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -652,6 +652,16 @@ def _check_and_set_cluster_data(self, doubleml_dict):
             if not ("cluster_dict" in doubleml_dict.keys()):
                 raise ValueError('If is_cluster_data is True, cluster_dict must be provided.')
 
+            if not isinstance(doubleml_dict['cluster_dict'], dict):
+                raise TypeError('cluster_dict must be a dictionary.')
+
+            expected_keys_cluster = ['smpls', 'smpls_cluster', 'cluster_vars', 'n_folds_per_cluster']
+            if not all(key in doubleml_dict['cluster_dict'].keys() for key in expected_keys_cluster):
+                raise ValueError('The cluster_dict must contain the following keys: ' + ', '.join(expected_keys_cluster)
+                                 + '. Got: ' + ', '.join(doubleml_dict['cluster_dict'].keys()) + '.')
+
+            self._cluster_dict = doubleml_dict['cluster_dict']
+
         return
 
     def _check_and_set_sensitivity_elements(self, doubleml_dict):
diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index 69520594b..aa94b7bea 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -127,6 +127,21 @@ def test_input_exceptions():
         test_dict['is_cluster_data'] = True
         DoubleMLFramework(test_dict)
 
+    msg = "cluster_dict must be a dictionary."
+    with pytest.raises(TypeError, match=msg):
+        test_dict = copy.deepcopy(doubleml_dict)
+        test_dict['is_cluster_data'] = True
+        test_dict['cluster_dict'] = 1.0
+        DoubleMLFramework(test_dict)
+
+    msg = ('The cluster_dict must contain the following keys: smpls, smpls_cluster,'
+           ' cluster_vars, n_folds_per_cluster. Got: cluster_ids.')
+    with pytest.raises(ValueError, match=msg):
+        test_dict = copy.deepcopy(doubleml_dict)
+        test_dict['is_cluster_data'] = True
+        test_dict['cluster_dict'] = {'cluster_ids': np.ones(shape=(n_obs, n_rep))}
+        DoubleMLFramework(test_dict)
+
 
 def test_operation_exceptions():
     # addition

From de4d9648279ffa0870cfdd3944aafb312b781023 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 18 Jun 2024 08:37:37 +0200
Subject: [PATCH 24/38] fix exception test

---
 doubleml/tests/test_framework_exceptions.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index aa94b7bea..8e814e177 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -233,6 +233,10 @@ def test_operation_exceptions():
     with pytest.raises(NotImplementedError, match=msg):
         doubleml_dict_cluster = generate_dml_dict(psi_a_2, psi_b_2)
         doubleml_dict_cluster['is_cluster_data'] = True
+        doubleml_dict_cluster['cluster_dict'] = {'smpls': np.ones(shape=(n_obs, n_rep)),
+                                                 'smpls_cluster': np.ones(shape=(n_obs, n_rep)),
+                                                 'cluster_vars': np.ones(shape=(n_obs, n_rep)),
+                                                 'n_folds_per_cluster': 2}
         dml_framework_obj_cluster = DoubleMLFramework(doubleml_dict_cluster)
         _ = concat([dml_framework_obj_2, dml_framework_obj_cluster])
 

From b738cda1ac992480699e75d846ed51b72453359b Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 18 Jun 2024 08:44:57 +0200
Subject: [PATCH 25/38] formatting

---
 doubleml/tests/test_framework_exceptions.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index 8e814e177..e922df7b0 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -233,10 +233,12 @@ def test_operation_exceptions():
     with pytest.raises(NotImplementedError, match=msg):
         doubleml_dict_cluster = generate_dml_dict(psi_a_2, psi_b_2)
         doubleml_dict_cluster['is_cluster_data'] = True
-        doubleml_dict_cluster['cluster_dict'] = {'smpls': np.ones(shape=(n_obs, n_rep)),
-                                                 'smpls_cluster': np.ones(shape=(n_obs, n_rep)),
-                                                 'cluster_vars': np.ones(shape=(n_obs, n_rep)),
-                                                 'n_folds_per_cluster': 2}
+        doubleml_dict_cluster['cluster_dict'] = {
+            'smpls': np.ones(shape=(n_obs, n_rep)),
+            'smpls_cluster': np.ones(shape=(n_obs, n_rep)),
+            'cluster_vars': np.ones(shape=(n_obs, n_rep)),
+            'n_folds_per_cluster': 2
+        }
         dml_framework_obj_cluster = DoubleMLFramework(doubleml_dict_cluster)
         _ = concat([dml_framework_obj_2, dml_framework_obj_cluster])
 

From 2a337174bbd05d83ecc7e92f614a2d16288adca7 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 18 Jun 2024 09:04:16 +0200
Subject: [PATCH 26/38] add cluster compatiblitly for operations

---
 doubleml/double_ml_framework.py             | 21 ++++++++++-----------
 doubleml/tests/test_framework_exceptions.py |  7 ++++++-
 doubleml/utils/_checks.py                   |  4 ++++
 3 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index c82797a4d..c54ed4ab4 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -215,8 +215,6 @@ def __add__(self, other):
             all_ses = np.sqrt(sigma2_hat)
             thetas, ses = _aggregate_coefs_and_ses(all_thetas, all_ses, var_scaling_factors)
 
-            is_cluster_data = self._is_cluster_data or other._is_cluster_data
-
             doubleml_dict = {
                 'thetas': thetas,
                 'ses': ses,
@@ -224,7 +222,8 @@ def __add__(self, other):
                 'all_ses': all_ses,
                 'var_scaling_factors': var_scaling_factors,
                 'scaled_psi': scaled_psi,
-                'is_cluster_data': is_cluster_data,
+                'is_cluster_data': self._is_cluster_data,
+                'cluster_dict': self._cluster_dict,
             }
 
             # sensitivity combination only available for same outcome and cond. expectation (e.g. IRM)
@@ -275,7 +274,6 @@ def __sub__(self, other):
             all_ses = np.sqrt(sigma2_hat)
             thetas, ses = _aggregate_coefs_and_ses(all_thetas, all_ses, var_scaling_factors)
 
-            is_cluster_data = self._is_cluster_data or other._is_cluster_data
             doubleml_dict = {
                 'thetas': thetas,
                 'ses': ses,
@@ -283,7 +281,8 @@ def __sub__(self, other):
                 'all_ses': all_ses,
                 'var_scaling_factors': var_scaling_factors,
                 'scaled_psi': scaled_psi,
-                'is_cluster_data': is_cluster_data,
+                'is_cluster_data': self._is_cluster_data,
+                'cluster_dict': self._cluster_dict,
             }
 
             # sensitivity combination only available for same outcome and cond. expectation (e.g. IRM)
@@ -323,7 +322,6 @@ def __mul__(self, other):
             all_ses = np.multiply(other, self._all_ses)
             scaled_psi = np.multiply(other, self._scaled_psi)
 
-            is_cluster_data = self._is_cluster_data
             doubleml_dict = {
                 'thetas': thetas,
                 'ses': ses,
@@ -331,7 +329,8 @@ def __mul__(self, other):
                 'all_ses': all_ses,
                 'var_scaling_factors': var_scaling_factors,
                 'scaled_psi': scaled_psi,
-                'is_cluster_data': is_cluster_data,
+                'is_cluster_data': self._is_cluster_data,
+                'cluster_dict': self._cluster_dict,
             }
 
             # sensitivity combination only available for linear models
@@ -409,10 +408,10 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
                     smpls_cluster = None
                     n_folds_per_cluster = None
                 else:
-                    smpls = self._smpls[i_rep]
-                    cluster_vars = self._dml_data.cluster_vars
-                    smpls_cluster = self._smpls_cluster[i_rep]
-                    n_folds_per_cluster = self._n_folds_per_cluster
+                    smpls = self._cluster_dict['smpls'][i_rep]
+                    cluster_vars = self._cluster_dict['cluster_vars']
+                    smpls_cluster = self._cluster_dict['smpls_cluster'][i_rep]
+                    n_folds_per_cluster = self._cluster_dict['n_folds_per_cluster']
 
                 sigma2_lower_hat, _ = _var_est(psi=psi_lower[:, i_theta, i_rep],
                                                psi_deriv=np.ones_like(psi_lower[:, i_theta, i_rep]),
diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index e922df7b0..43e906fff 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -240,7 +240,12 @@ def test_operation_exceptions():
             'n_folds_per_cluster': 2
         }
         dml_framework_obj_cluster = DoubleMLFramework(doubleml_dict_cluster)
-        _ = concat([dml_framework_obj_2, dml_framework_obj_cluster])
+        _ = concat([dml_framework_obj_cluster, dml_framework_obj_cluster])
+
+    # cluster compatibility
+    msg = "The cluster structure in DoubleMLFrameworks must be the same. Got False and True."
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_2 + dml_framework_obj_cluster
 
 
 @pytest.mark.ci
diff --git a/doubleml/utils/_checks.py b/doubleml/utils/_checks.py
index aa736a0af..a99549037 100644
--- a/doubleml/utils/_checks.py
+++ b/doubleml/utils/_checks.py
@@ -348,6 +348,10 @@ def _check_framework_compatibility(dml_framework_1, dml_framework_2, check_treat
             raise ValueError('The number of parameters theta in DoubleMLFrameworks must be the same. '
                              f'Got {str(dml_framework_1.n_thetas)} and {str(dml_framework_2.n_thetas)}.')
 
+    if dml_framework_1._is_cluster_data != dml_framework_2._is_cluster_data:
+        raise ValueError('The cluster structure in DoubleMLFrameworks must be the same. '
+                         f'Got {str(dml_framework_1._is_cluster_data)} and {str(dml_framework_2._is_cluster_data)}.')
+
 
 def _check_set(x):
     return {x} if x is not None else {}

From b88727269794f6aafdd7f594c3a8e93ceffb9e63 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 18 Jun 2024 09:19:58 +0200
Subject: [PATCH 27/38] Update double_ml_framework.py

---
 doubleml/double_ml_framework.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index c54ed4ab4..b779a4237 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -643,6 +643,8 @@ def p_adjust(self, method='romano-wolf'):
         return df_p_vals, all_p_vals_corrected
 
     def _check_and_set_cluster_data(self, doubleml_dict):
+        self._cluster_dict = None
+
         if "is_cluster_data" in doubleml_dict.keys():
             _check_bool(doubleml_dict['is_cluster_data'], 'is_cluster_data')
             self._is_cluster_data = doubleml_dict['is_cluster_data']

From a9529f0d7b4263977de62c6353042ac5b05a92ae Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 18 Jun 2024 09:37:24 +0200
Subject: [PATCH 28/38] add simple dataset

---
 doubleml/double_ml_framework.py              | 27 +++++++++++++++++++-
 doubleml/tests/conftest.py                   | 25 ++++++++++++++++++
 doubleml/tests/test_framework_sensitivity.py |  6 ++---
 3 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index b779a4237..5dde4dedc 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -431,7 +431,32 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
                 all_sigma_lower[i_theta, i_rep] = np.sqrt(sigma2_lower_hat)
                 all_sigma_upper[i_theta, i_rep] = np.sqrt(sigma2_upper_hat)
 
-        return
+        # aggregate coefs and ses over n_rep
+        theta_lower, sigma_lower = _aggregate_coefs_and_ses(all_theta_lower, all_sigma_lower, self._var_scaling_factors)
+        theta_upper, sigma_upper = _aggregate_coefs_and_ses(all_theta_upper, all_sigma_upper, self._var_scaling_factors)
+
+        # per repetition confidence intervals
+        quant = norm.ppf(level)
+        all_ci_lower = all_theta_lower - np.multiply(quant, all_sigma_lower)
+        all_ci_upper = all_theta_upper + np.multiply(quant, all_sigma_upper)
+
+        ci_lower = np.median(all_ci_lower, axis=1)
+        ci_upper = np.median(all_ci_upper, axis=1)
+
+        theta_dict = {'lower': theta_lower,
+                      'upper': theta_upper}
+
+        se_dict = {'lower': sigma_lower,
+                   'upper': sigma_upper}
+
+        ci_dict = {'lower': ci_lower,
+                   'upper': ci_upper}
+
+        res_dict = {'theta': theta_dict,
+                    'se': se_dict,
+                    'ci': ci_dict}
+
+        return res_dict
 
     def _calc_robustness_value(self, null_hypothesis, level, rho, idx_treatment):
         _check_float(null_hypothesis, "null_hypothesis")
diff --git a/doubleml/tests/conftest.py b/doubleml/tests/conftest.py
index 27666a72f..328a1211f 100644
--- a/doubleml/tests/conftest.py
+++ b/doubleml/tests/conftest.py
@@ -9,6 +9,8 @@
 from doubleml.datasets import make_plr_turrell2018, make_irm_data, \
     make_pliv_CHS2015
 
+from doubleml import DoubleMLData
+
 
 def _g(x):
     return np.power(np.sin(x), 2)
@@ -22,6 +24,29 @@ def _m2(x):
     return np.power(x, 2)
 
 
+@pytest.fixture(scope='session',
+                params=[(500, 5)])
+def generate_data_simple(request):
+    n_p = request.param
+    np.random.seed(1111)
+    # setting parameters
+    n = n_p[0]
+    p = n_p[1]
+    theta = 1.0
+
+    # generating data
+    D1 = 1.0 * (np.random.uniform(size=n) > 0.5)
+    D2 = 1.0 * (np.random.uniform(size=n) > 0.5)
+    X = np.random.normal(size=(n, p))
+    Y = theta * D1 + np.dot(X, np.ones(p)) + np.random.normal(size=n)
+    df = pd.DataFrame(np.column_stack((X, Y, D1, D2)),
+                      columns=[f'X{i + 1}' for i in np.arange(p)] + ['Y', 'D1', 'D2'])
+    data_d1 = DoubleMLData(df, 'Y', 'D1')
+    data_d2 = DoubleMLData(df, 'Y', 'D2')
+
+    return data_d1, data_d2
+
+
 @pytest.fixture(scope='session',
                 params=[(500, 10),
                         (1000, 20),
diff --git a/doubleml/tests/test_framework_sensitivity.py b/doubleml/tests/test_framework_sensitivity.py
index adaccb75f..5b1be8bd5 100644
--- a/doubleml/tests/test_framework_sensitivity.py
+++ b/doubleml/tests/test_framework_sensitivity.py
@@ -1,6 +1,5 @@
 import pytest
 
-from doubleml.datasets import make_irm_data
 from doubleml.irm.irm import DoubleMLIRM
 from doubleml.double_ml_framework import concat
 
@@ -14,8 +13,8 @@ def n_rep(request):
 
 
 @pytest.fixture(scope='module')
-def dml_framework_sensitivity_fixture(n_rep):
-    dml_data = make_irm_data()
+def dml_framework_sensitivity_fixture(n_rep, generate_data_simple):
+    dml_data, dml_data_2 = generate_data_simple
 
     ml_g = LinearRegression()
     ml_m = LogisticRegression()
@@ -29,7 +28,6 @@ def dml_framework_sensitivity_fixture(n_rep):
     dml_framework_obj_add_obj = dml_framework_obj + dml_framework_obj
 
     # substract objects
-    dml_data_2 = make_irm_data()
     dml_irm_obj_2 = DoubleMLIRM(dml_data_2, ml_g, ml_m, n_rep=n_rep)
     dml_irm_obj_2.fit()
     dml_framework_obj_2 = dml_irm_obj_2.construct_framework()

From 2fd1434a22f94fbb434be7a476a7a064c75788f8 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 18 Jun 2024 10:00:04 +0200
Subject: [PATCH 29/38] add rv and senstivity analysis

---
 doubleml/double_ml_framework.py | 62 ++++++++++++++++++++++++++++++---
 1 file changed, 58 insertions(+), 4 deletions(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 5dde4dedc..da02eb979 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -2,6 +2,7 @@
 import pandas as pd
 
 from scipy.stats import norm
+from scipy.optimize import minimize_scalar
 from statsmodels.stats.multitest import multipletests
 
 from .utils._estimation import _draw_weights, _aggregate_coefs_and_ses, _var_est
@@ -462,9 +463,21 @@ def _calc_robustness_value(self, null_hypothesis, level, rho, idx_treatment):
         _check_float(null_hypothesis, "null_hypothesis")
         _check_integer(idx_treatment, "idx_treatment", lower_bound=0, upper_bound=self._n_thetas-1)
 
-        cf_y = 0.03
-        cf_d = 0.03
-        sensitivity_dict = self._calc_sensitivity_analysis(cf_y=cf_y, cf_d=cf_d, rho=rho, level=level)
+        # check which side is relvant
+        bound = 'upper' if (null_hypothesis > self.thetas[idx_treatment]) else 'lower'
+
+        # minimize the square to find boundary solutions
+        def rv_fct(value, param):
+            res = self._calc_sensitivity_analysis(cf_y=value,
+                                                  cf_d=value,
+                                                  rho=rho,
+                                                  level=level)[param][bound][idx_treatment] - null_hypothesis
+            return np.square(res)
+
+        rv = minimize_scalar(rv_fct, bounds=(0, 0.9999), method='bounded', args=('theta', )).x
+        rva = minimize_scalar(rv_fct, bounds=(0, 0.9999), method='bounded', args=('ci', )).x
+
+        return rv, rva
 
     def sensitivity_analysis(self, cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95, null_hypothesis=0.0):
         """
@@ -502,9 +515,49 @@ def sensitivity_analysis(self, cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95, null_h
         -------
         self : object
         """
+
+        # input checks
+        if isinstance(null_hypothesis, float):
+            null_hypothesis_vec = np.full(shape=self._n_thetas, fill_value=null_hypothesis)
+        elif isinstance(null_hypothesis, np.ndarray):
+            if null_hypothesis.shape == (self._n_thetas,):
+                null_hypothesis_vec = null_hypothesis
+            else:
+                raise ValueError("null_hypothesis is numpy.ndarray but does not have the required "
+                                 f"shape ({self._n_thetas},). "
+                                 f'Array of shape {str(null_hypothesis.shape)} was passed.')
+        else:
+            raise TypeError("null_hypothesis has to be of type float or np.ndarry. "
+                            f"{str(null_hypothesis)} of type {str(type(null_hypothesis))} was passed.")
+
         # compute sensitivity analysis
         sensitivity_dict = self._calc_sensitivity_analysis(cf_y=cf_y, cf_d=cf_d, rho=rho, level=level)
-        _ = self._calc_robustness_value(null_hypothesis=null_hypothesis, level=level, rho=rho, idx_treatment=0)
+
+        # compute robustess values with respect to null_hypothesis
+        rv = np.full(shape=self._n_thetas, fill_value=np.nan)
+        rva = np.full(shape=self._n_thetas, fill_value=np.nan)
+
+        for i_theta in range(self._n_thetas):
+            rv[i_theta], rva[i_theta] = self._calc_robustness_value(
+                null_hypothesis=null_hypothesis_vec[i_theta],
+                level=level,
+                rho=rho,
+                idx_treatment=i_theta
+            )
+
+        sensitivity_dict['rv'] = rv
+        sensitivity_dict['rva'] = rva
+
+        # add all input parameters
+        input_params = {'cf_y': cf_y,
+                        'cf_d': cf_d,
+                        'rho': rho,
+                        'level': level,
+                        'null_hypothesis': null_hypothesis_vec}
+        sensitivity_dict['input'] = input_params
+
+        self._sensitivity_params = sensitivity_dict
+        return self
 
     def confint(self, joint=False, level=0.95):
         """
@@ -719,6 +772,7 @@ def _check_and_set_sensitivity_elements(self, doubleml_dict):
 
         self._sensitivity_implemented = sensitivity_implemented
         self._sensitivity_elements = sensitivity_elements
+        self._sensitivity_params = None
 
         return
 

From 78deba686f9910857aa16f8267e978e26eb2acbb Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 18 Jun 2024 10:33:40 +0200
Subject: [PATCH 30/38] add hypothesis exception test

---
 doubleml/double_ml_framework.py             | 3 +--
 doubleml/tests/test_framework_exceptions.py | 6 ++++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index da02eb979..5ba4d2ed6 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -515,8 +515,7 @@ def sensitivity_analysis(self, cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95, null_h
         -------
         self : object
         """
-
-        # input checks
+        # check null_hypothesis
         if isinstance(null_hypothesis, float):
             null_hypothesis_vec = np.full(shape=self._n_thetas, fill_value=null_hypothesis)
         elif isinstance(null_hypothesis, np.ndarray):
diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index 43e906fff..44a062e1c 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -343,6 +343,12 @@ def test_sensitivity_exceptions():
         _ = dml_framework_obj_1._calc_robustness_value(rho=1.0, level=0.0, null_hypothesis=0.0, idx_treatment=0)
 
     # test null_hypothesis
+    msg = "null_hypothesis has to be of type float or np.ndarry. 1 of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_1.sensitivity_analysis(null_hypothesis=1)
+    msg = r"null_hypothesis is numpy.ndarray but does not have the required shape \(2,\). Array of shape \(3,\) was passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_1.sensitivity_analysis(null_hypothesis=np.array([1, 2, 3]))
     msg = "null_hypothesis must be of float type. 1 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = dml_framework_obj_1._calc_robustness_value(null_hypothesis=1, level=0.95, rho=1.0, idx_treatment=0)

From d892bb5c8639b5e8d5b08089e07c8b4b585d21c8 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 18 Jun 2024 12:21:33 +0200
Subject: [PATCH 31/38] move sensitivity_analysis() to framework

---
 doubleml/double_ml.py                         | 61 ++++++-------------
 .../_utils_doubleml_sensitivity_manual.py     |  8 ++-
 doubleml/tests/test_exceptions.py             |  3 +-
 doubleml/tests/test_model_defaults.py         |  2 +-
 4 files changed, 29 insertions(+), 45 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index be7bcc464..bf38f29f7 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -344,7 +344,11 @@ def sensitivity_params(self):
         If available (e.g., PLR, IRM) a dictionary with entries ``theta``, ``se``, ``ci``, ``rv``
         and ``rva``.
         """
-        return self._sensitivity_params
+        if self._framework is None:
+            sensitivity_params = None
+        else:
+            sensitivity_params = self._framework.sensitivity_params
+        return sensitivity_params
 
     @property
     def coef(self):
@@ -1559,42 +1563,17 @@ def sensitivity_analysis(self, cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95, null_h
         -------
         self : object
         """
-        # compute sensitivity analysis
-        sensitivity_dict = self._calc_sensitivity_analysis(cf_y=cf_y, cf_d=cf_d, rho=rho, level=level)
-
-        if isinstance(null_hypothesis, float):
-            null_hypothesis_vec = np.full(shape=self._dml_data.n_treat, fill_value=null_hypothesis)
-        elif isinstance(null_hypothesis, np.ndarray):
-            if null_hypothesis.shape == (self._dml_data.n_treat,):
-                null_hypothesis_vec = null_hypothesis
-            else:
-                raise ValueError("null_hypothesis is numpy.ndarray but does not have the required "
-                                 f"shape ({self._dml_data.n_treat},). "
-                                 f'Array of shape {str(null_hypothesis.shape)} was passed.')
-        else:
-            raise TypeError("null_hypothesis has to be of type float or np.ndarry. "
-                            f"{str(null_hypothesis)} of type {str(type(null_hypothesis))} was passed.")
 
-        # compute robustess values with respect to null_hypothesis
-        rv = np.full(shape=self._dml_data.n_treat, fill_value=np.nan)
-        rva = np.full(shape=self._dml_data.n_treat, fill_value=np.nan)
-
-        for i_treat in range(self._dml_data.n_treat):
-            rv[i_treat], rva[i_treat] = self._calc_robustness_value(null_hypothesis=null_hypothesis_vec[i_treat],
-                                                                    level=level, rho=rho, idx_treatment=i_treat)
-
-        sensitivity_dict['rv'] = rv
-        sensitivity_dict['rva'] = rva
-
-        # add all input parameters
-        input_params = {'cf_y': cf_y,
-                        'cf_d': cf_d,
-                        'rho': rho,
-                        'level': level,
-                        'null_hypothesis': null_hypothesis_vec}
-        sensitivity_dict['input'] = input_params
+        if self._framework is None:
+            raise ValueError('Apply fit() before bootstrap().')
+        self._framework.sensitivity_analysis(
+            cf_y=cf_y,
+            cf_d=cf_d,
+            rho=rho,
+            level=level,
+            null_hypothesis=null_hypothesis
+        )
 
-        self._sensitivity_params = sensitivity_dict
         return self
 
     @property
@@ -1617,19 +1596,19 @@ def sensitivity_summary(self):
                               f'rho={self.sensitivity_params["input"]["rho"]}'
 
             theta_and_ci_col_names = ['CI lower', 'theta lower', ' theta', 'theta upper', 'CI upper']
-            theta_and_ci = np.transpose(np.vstack((self._sensitivity_params['ci']['lower'],
-                                                   self._sensitivity_params['theta']['lower'],
+            theta_and_ci = np.transpose(np.vstack((self.sensitivity_params['ci']['lower'],
+                                                   self.sensitivity_params['theta']['lower'],
                                                    self.coef,
-                                                   self._sensitivity_params['theta']['upper'],
-                                                   self._sensitivity_params['ci']['upper'])))
+                                                   self.sensitivity_params['theta']['upper'],
+                                                   self.sensitivity_params['ci']['upper'])))
             df_theta_and_ci = pd.DataFrame(theta_and_ci,
                                            columns=theta_and_ci_col_names,
                                            index=self._dml_data.d_cols)
             theta_and_ci_summary = str(df_theta_and_ci)
 
             rvs_col_names = ['H_0', 'RV (%)', 'RVa (%)']
-            rvs = np.transpose(np.vstack((self._sensitivity_params['rv'],
-                                          self._sensitivity_params['rva']))) * 100
+            rvs = np.transpose(np.vstack((self.sensitivity_params['rv'],
+                                          self.sensitivity_params['rva']))) * 100
 
             df_rvs = pd.DataFrame(np.column_stack((self.sensitivity_params["input"]["null_hypothesis"], rvs)),
                                   columns=rvs_col_names,
diff --git a/doubleml/tests/_utils_doubleml_sensitivity_manual.py b/doubleml/tests/_utils_doubleml_sensitivity_manual.py
index a5de4ae6a..c20e2e4bc 100644
--- a/doubleml/tests/_utils_doubleml_sensitivity_manual.py
+++ b/doubleml/tests/_utils_doubleml_sensitivity_manual.py
@@ -35,8 +35,12 @@ def doubleml_sensitivity_manual(sensitivity_elements, all_coefs, psi, psi_deriv,
     theta_upper, sigma_upper = _aggregate_coefs_and_ses(all_theta_upper, all_sigma_upper, var_scaling_factor)
 
     quant = norm.ppf(level)
-    ci_lower = theta_lower - np.multiply(quant, sigma_lower)
-    ci_upper = theta_upper + np.multiply(quant, sigma_upper)
+
+    all_ci_lower = all_theta_lower - np.multiply(quant, all_sigma_lower)
+    all_ci_upper = all_theta_upper + np.multiply(quant, all_sigma_upper)
+
+    ci_lower = np.median(all_ci_lower, axis=1)
+    ci_upper = np.median(all_ci_upper, axis=1)
 
     theta_dict = {'lower': theta_lower,
                   'upper': theta_upper}
diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py
index 57ebba8f3..ef768143a 100644
--- a/doubleml/tests/test_exceptions.py
+++ b/doubleml/tests/test_exceptions.py
@@ -972,10 +972,11 @@ def test_doubleml_sensitivity_not_yet_implemented():
 
     dml_pliv = DoubleMLPLIV(dml_data_pliv, ml_g, ml_m, ml_r)
     dml_pliv.fit()
-    msg = "Sensitivity analysis not yet implemented for DoubleMLPLIV."
+    msg = 'Sensitivity analysis is not implemented for this model.'
     with pytest.raises(NotImplementedError, match=msg):
         _ = dml_pliv.sensitivity_analysis()
 
+    msg = 'Sensitivity analysis not yet implemented for DoubleMLPLIV.'
     with pytest.raises(NotImplementedError, match=msg):
         _ = dml_pliv.sensitivity_benchmark(benchmarking_set=["X1"])
 
diff --git a/doubleml/tests/test_model_defaults.py b/doubleml/tests/test_model_defaults.py
index 4c3a2f61e..5da2fbcaf 100644
--- a/doubleml/tests/test_model_defaults.py
+++ b/doubleml/tests/test_model_defaults.py
@@ -195,7 +195,7 @@ def test_sensitivity_defaults():
                   'null_hypothesis': np.array([0.])}
 
     dml_plr.sensitivity_analysis()
-    assert dml_plr._sensitivity_params['input'] == input_dict
+    assert dml_plr.sensitivity_params['input'] == input_dict
 
 
 @pytest.mark.ci

From e492c6bb13558fc48eba427b8ebf7d3ec845967e Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 18 Jun 2024 12:52:09 +0200
Subject: [PATCH 32/38] move _calc_sensitivity_analysis and
 _calc_robustness_value to framework

---
 doubleml/double_ml.py               | 137 +++-------------------------
 doubleml/tests/test_exceptions.py   |  44 ---------
 doubleml/tests/test_return_types.py |  16 ++--
 3 files changed, 21 insertions(+), 176 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index bf38f29f7..c143c0025 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1411,122 +1411,6 @@ def _set_sensitivity_elements(self, sensitivity_elements, i_rep, i_treat):
             self.sensitivity_elements[key][:, i_rep, i_treat] = sensitivity_elements[key]
         return
 
-    def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
-        if self._sensitivity_elements is None:
-            raise NotImplementedError(f'Sensitivity analysis not yet implemented for {self.__class__.__name__}.')
-
-        # checks
-        _check_in_zero_one(cf_y, 'cf_y', include_one=False)
-        _check_in_zero_one(cf_d, 'cf_d', include_one=False)
-        if not isinstance(rho, float):
-            raise TypeError(f'rho must be of float type. '
-                            f'{str(rho)} of type {str(type(rho))} was passed.')
-        _check_in_zero_one(abs(rho), 'The absolute value of rho')
-        _check_in_zero_one(level, 'The confidence level', include_zero=False, include_one=False)
-
-        # set elements for readability
-        sigma2 = self.sensitivity_elements['sigma2']
-        nu2 = self.sensitivity_elements['nu2']
-        psi_sigma = self.sensitivity_elements['psi_sigma2']
-        psi_nu = self.sensitivity_elements['psi_nu2']
-        psi_scaled = np.divide(self.psi, np.mean(self.psi_deriv, axis=0))
-
-        if (np.any(sigma2 < 0)) | (np.any(nu2 < 0)):
-            raise ValueError('sensitivity_elements sigma2 and nu2 have to be positive. '
-                             f"Got sigma2 {str(sigma2)} and nu2 {str(nu2)}. "
-                             'Most likely this is due to low quality learners (especially propensity scores).')
-
-        # elementwise operations
-        confounding_strength = np.multiply(np.abs(rho), np.sqrt(np.multiply(cf_y, np.divide(cf_d, 1.0-cf_d))))
-        S = np.sqrt(np.multiply(sigma2, nu2))
-
-        # sigma2 and nu2 are of shape (1, n_rep, n_coefs), whereas the all_coefs is of shape (n_coefs, n_reps)
-        all_theta_lower = self.all_coef - np.multiply(np.transpose(np.squeeze(S, axis=0)), confounding_strength)
-        all_theta_upper = self.all_coef + np.multiply(np.transpose(np.squeeze(S, axis=0)), confounding_strength)
-
-        psi_S2 = np.multiply(sigma2, psi_nu) + np.multiply(nu2, psi_sigma)
-        psi_bias = np.multiply(np.divide(confounding_strength, np.multiply(2.0, S)), psi_S2)
-        psi_lower = psi_scaled - psi_bias
-        psi_upper = psi_scaled + psi_bias
-
-        # transpose to obtain shape (n_coefs, n_reps); includes scaling with n^{-1/2}
-        all_sigma_lower = np.full_like(all_theta_lower, fill_value=np.nan)
-        all_sigma_upper = np.full_like(all_theta_upper, fill_value=np.nan)
-        for i_rep in range(self.n_rep):
-            self._i_rep = i_rep
-            for i_d in range(self._dml_data.n_treat):
-                self._i_treat = i_d
-
-                if not self._is_cluster_data:
-                    cluster_vars = None
-                    smpls_cluster = None
-                    n_folds_per_cluster = None
-                else:
-                    cluster_vars = self._dml_data.cluster_vars
-                    smpls_cluster = self.__smpls_cluster
-                    n_folds_per_cluster = self._n_folds_per_cluster
-
-                sigma2_lower_hat, _ = _var_est(psi=psi_lower[:, i_rep, i_d],
-                                               psi_deriv=np.ones_like(psi_lower[:, i_rep, i_d]),
-                                               smpls=self.__smpls,
-                                               is_cluster_data=self._is_cluster_data,
-                                               cluster_vars=cluster_vars,
-                                               smpls_cluster=smpls_cluster,
-                                               n_folds_per_cluster=n_folds_per_cluster)
-                sigma2_upper_hat, _ = _var_est(psi=psi_upper[:, i_rep, i_d],
-                                               psi_deriv=np.ones_like(psi_upper[:, i_rep, i_d]),
-                                               smpls=self.__smpls,
-                                               is_cluster_data=self._is_cluster_data,
-                                               cluster_vars=cluster_vars,
-                                               smpls_cluster=smpls_cluster,
-                                               n_folds_per_cluster=n_folds_per_cluster)
-
-                all_sigma_lower[self._i_treat, self._i_rep] = np.sqrt(sigma2_lower_hat)
-                all_sigma_upper[self._i_treat, self._i_rep] = np.sqrt(sigma2_upper_hat)
-
-        # aggregate coefs and ses over n_rep
-        theta_lower, sigma_lower = _aggregate_coefs_and_ses(all_theta_lower, all_sigma_lower, self._var_scaling_factors)
-        theta_upper, sigma_upper = _aggregate_coefs_and_ses(all_theta_upper, all_sigma_upper, self._var_scaling_factors)
-
-        quant = norm.ppf(level)
-        ci_lower = theta_lower - np.multiply(quant, sigma_lower)
-        ci_upper = theta_upper + np.multiply(quant, sigma_upper)
-
-        theta_dict = {'lower': theta_lower,
-                      'upper': theta_upper}
-
-        se_dict = {'lower': sigma_lower,
-                   'upper': sigma_upper}
-
-        ci_dict = {'lower': ci_lower,
-                   'upper': ci_upper}
-
-        res_dict = {'theta': theta_dict,
-                    'se': se_dict,
-                    'ci': ci_dict}
-
-        return res_dict
-
-    def _calc_robustness_value(self, null_hypothesis, level, rho, idx_treatment):
-        _check_float(null_hypothesis, "null_hypothesis")
-        _check_integer(idx_treatment, "idx_treatment", lower_bound=0, upper_bound=self._dml_data.n_treat-1)
-
-        # check which side is relvant
-        bound = 'upper' if (null_hypothesis > self.coef[idx_treatment]) else 'lower'
-
-        # minimize the square to find boundary solutions
-        def rv_fct(value, param):
-            res = self._calc_sensitivity_analysis(cf_y=value,
-                                                  cf_d=value,
-                                                  rho=rho,
-                                                  level=level)[param][bound][idx_treatment] - null_hypothesis
-            return np.square(res)
-
-        rv = minimize_scalar(rv_fct, bounds=(0, 0.9999), method='bounded', args=('theta', )).x
-        rva = minimize_scalar(rv_fct, bounds=(0, 0.9999), method='bounded', args=('ci', )).x
-
-        return rv, rva
-
     def sensitivity_analysis(self, cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95, null_hypothesis=0.0):
         """
         Performs a sensitivity analysis to account for unobserved confounders.
@@ -1697,10 +1581,13 @@ def sensitivity_plot(self, idx_treatment=0, value='theta', include_scenario=True
         contour_values = np.full(shape=(grid_size, grid_size), fill_value=np.nan)
         for i_cf_d_grid, cf_d_grid in enumerate(cf_d_vec):
             for i_cf_y_grid, cf_y_grid in enumerate(cf_y_vec):
-                sens_dict = self._calc_sensitivity_analysis(cf_y=cf_y_grid,
-                                                            cf_d=cf_d_grid,
-                                                            rho=self.sensitivity_params['input']['rho'],
-                                                            level=self.sensitivity_params['input']['level'])
+
+                sens_dict = self.framework._calc_sensitivity_analysis(
+                    cf_y=cf_y_grid,
+                    cf_d=cf_d_grid,
+                    rho=self.sensitivity_params['input']['rho'],
+                    level=self.sensitivity_params['input']['level']
+                )
                 contour_values[i_cf_d_grid, i_cf_y_grid] = sens_dict[value][bound][idx_treatment]
 
         # get the correct unadjusted value for confidence bands
@@ -1720,10 +1607,12 @@ def sensitivity_plot(self, idx_treatment=0, value='theta', include_scenario=True
             n_benchmarks = len(benchmarks['name'])
             benchmark_values = np.full(shape=(n_benchmarks,), fill_value=np.nan)
             for benchmark_idx in range(len(benchmarks['name'])):
-                sens_dict_bench = self._calc_sensitivity_analysis(cf_y=benchmarks['cf_y'][benchmark_idx],
-                                                                  cf_d=benchmarks['cf_y'][benchmark_idx],
-                                                                  rho=self.sensitivity_params['input']['rho'],
-                                                                  level=self.sensitivity_params['input']['level'])
+                sens_dict_bench = self.framework._calc_sensitivity_analysis(
+                    cf_y=benchmarks['cf_y'][benchmark_idx],
+                    cf_d=benchmarks['cf_y'][benchmark_idx],
+                    rho=self.sensitivity_params['input']['rho'],
+                    level=self.sensitivity_params['input']['level']
+                )
                 benchmark_values[benchmark_idx] = sens_dict_bench[value][bound][idx_treatment]
             benchmark_dict['value'] = benchmark_values
         fig = _sensitivity_contour_plot(x=cf_d_vec,
diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py
index ef768143a..7d4f2b2f5 100644
--- a/doubleml/tests/test_exceptions.py
+++ b/doubleml/tests/test_exceptions.py
@@ -990,77 +990,45 @@ def test_doubleml_sensitivity_inputs():
     msg = "cf_y must be of float type. 1 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = dml_irm.sensitivity_analysis(cf_y=1)
-    with pytest.raises(TypeError, match=msg):
-        _ = dml_irm._calc_sensitivity_analysis(cf_y=1, cf_d=0.03, rho=1.0, level=0.95)
 
     msg = r'cf_y must be in \[0,1\). 1.0 was passed.'
     with pytest.raises(ValueError, match=msg):
         _ = dml_irm.sensitivity_analysis(cf_y=1.0)
-    with pytest.raises(ValueError, match=msg):
-        _ = dml_irm._calc_sensitivity_analysis(cf_y=1.0, cf_d=0.03, rho=1.0, level=0.95)
 
     # test cf_d
     msg = "cf_d must be of float type. 1 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = dml_irm.sensitivity_analysis(cf_y=0.1, cf_d=1)
-    with pytest.raises(TypeError, match=msg):
-        _ = dml_irm._calc_sensitivity_analysis(cf_y=0.1, cf_d=1, rho=1.0, level=0.95)
 
     msg = r'cf_d must be in \[0,1\). 1.0 was passed.'
     with pytest.raises(ValueError, match=msg):
         _ = dml_irm.sensitivity_analysis(cf_y=0.1, cf_d=1.0)
-    with pytest.raises(ValueError, match=msg):
-        _ = dml_irm._calc_sensitivity_analysis(cf_y=0.1, cf_d=1.0, rho=1.0, level=0.95)
 
     # test rho
     msg = "rho must be of float type. 1 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = dml_irm.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1)
-    with pytest.raises(TypeError, match=msg):
-        _ = dml_irm._calc_sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1, level=0.95)
-    with pytest.raises(TypeError, match=msg):
-        _ = dml_irm._calc_robustness_value(rho=1, null_hypothesis=0.0, level=0.95, idx_treatment=0)
 
     msg = "rho must be of float type. 1 of type <class 'str'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = dml_irm.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho="1")
-    with pytest.raises(TypeError, match=msg):
-        _ = dml_irm._calc_sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho="1", level=0.95)
-    with pytest.raises(TypeError, match=msg):
-        _ = dml_irm._calc_robustness_value(rho="1", null_hypothesis=0.0, level=0.95, idx_treatment=0)
 
     msg = r'The absolute value of rho must be in \[0,1\]. 1.1 was passed.'
     with pytest.raises(ValueError, match=msg):
         _ = dml_irm.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.1)
-    with pytest.raises(ValueError, match=msg):
-        _ = dml_irm._calc_sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.1, level=0.95)
-    with pytest.raises(ValueError, match=msg):
-        _ = dml_irm._calc_robustness_value(rho=1.1, null_hypothesis=0.0, level=0.95, idx_treatment=0)
 
     # test level
     msg = "The confidence level must be of float type. 1 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = dml_irm.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.0, level=1)
-    with pytest.raises(TypeError, match=msg):
-        _ = dml_irm._calc_sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.0, level=1)
-    with pytest.raises(TypeError, match=msg):
-        _ = dml_irm._calc_robustness_value(rho=1.0, level=1, null_hypothesis=0.0, idx_treatment=0)
 
     msg = r'The confidence level must be in \(0,1\). 1.0 was passed.'
     with pytest.raises(ValueError, match=msg):
         _ = dml_irm.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.0, level=1.0)
-    with pytest.raises(ValueError, match=msg):
-        _ = dml_irm._calc_sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.0, level=1.0)
-    with pytest.raises(ValueError, match=msg):
-        _ = dml_irm._calc_robustness_value(rho=1.0, level=1.0, null_hypothesis=0.0, idx_treatment=0)
 
     msg = r'The confidence level must be in \(0,1\). 0.0 was passed.'
     with pytest.raises(ValueError, match=msg):
         _ = dml_irm.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.0, level=0.0)
-    with pytest.raises(ValueError, match=msg):
-        _ = dml_irm._calc_sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.0, level=0.0)
-    with pytest.raises(ValueError, match=msg):
-        _ = dml_irm._calc_robustness_value(rho=1.0, level=0.0, null_hypothesis=0.0, idx_treatment=0)
 
     # test null_hypothesis
     msg = "null_hypothesis has to be of type float or np.ndarry. 1 of type <class 'int'> was passed."
@@ -1069,30 +1037,18 @@ def test_doubleml_sensitivity_inputs():
     msg = r"null_hypothesis is numpy.ndarray but does not have the required shape \(1,\). Array of shape \(2,\) was passed."
     with pytest.raises(ValueError, match=msg):
         _ = dml_irm.sensitivity_analysis(null_hypothesis=np.array([1, 2]))
-    msg = "null_hypothesis must be of float type. 1 of type <class 'int'> was passed."
-    with pytest.raises(TypeError, match=msg):
-        _ = dml_irm._calc_robustness_value(null_hypothesis=1, level=0.95, rho=1.0, idx_treatment=0)
-    msg = r"null_hypothesis must be of float type. \[1\] of type <class 'numpy.ndarray'> was passed."
-    with pytest.raises(TypeError, match=msg):
-        _ = dml_irm._calc_robustness_value(null_hypothesis=np.array([1]), level=0.95, rho=1.0, idx_treatment=0)
 
     # test idx_treatment
     dml_irm.sensitivity_analysis()
     msg = "idx_treatment must be an integer. 0.0 of type <class 'float'> was passed."
-    with pytest.raises(TypeError, match=msg):
-        _ = dml_irm._calc_robustness_value(idx_treatment=0.0, null_hypothesis=0.0, level=0.95, rho=1.0)
     with pytest.raises(TypeError, match=msg):
         _ = dml_irm.sensitivity_plot(idx_treatment=0.0)
 
     msg = "idx_treatment must be larger or equal to 0. -1 was passed."
-    with pytest.raises(ValueError, match=msg):
-        _ = dml_irm._calc_robustness_value(idx_treatment=-1, null_hypothesis=0.0, level=0.95, rho=1.0)
     with pytest.raises(ValueError, match=msg):
         _ = dml_irm.sensitivity_plot(idx_treatment=-1)
 
     msg = "idx_treatment must be smaller or equal to 0. 1 was passed."
-    with pytest.raises(ValueError, match=msg):
-        _ = dml_irm._calc_robustness_value(idx_treatment=1, null_hypothesis=0.0, level=0.95, rho=1.0)
     with pytest.raises(ValueError, match=msg):
         _ = dml_irm.sensitivity_plot(idx_treatment=1)
 
diff --git a/doubleml/tests/test_return_types.py b/doubleml/tests/test_return_types.py
index 975c203e7..d5f39d3d8 100644
--- a/doubleml/tests/test_return_types.py
+++ b/doubleml/tests/test_return_types.py
@@ -416,8 +416,8 @@ def test_sensitivity():
     assert isinstance(plr_obj.sensitivity_summary, str)
     assert isinstance(plr_obj.sensitivity_plot(), plotly.graph_objs._figure.Figure)
     assert isinstance(plr_obj.sensitivity_plot(value='ci', benchmarks=benchmarks), plotly.graph_objs._figure.Figure)
-    assert isinstance(plr_obj._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95), dict)
-    assert isinstance(plr_obj._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0), tuple)
+    assert isinstance(plr_obj.framework._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95), dict)
+    assert isinstance(plr_obj.framework._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0), tuple)
     plr_benchmark = plr_obj.sensitivity_benchmark(benchmarking_set=["X1"])
     assert isinstance(plr_benchmark, pd.DataFrame)
 
@@ -435,8 +435,8 @@ def test_sensitivity():
     assert isinstance(irm_obj.sensitivity_summary, str)
     assert isinstance(irm_obj.sensitivity_plot(), plotly.graph_objs._figure.Figure)
     assert isinstance(irm_obj.sensitivity_plot(value='ci', benchmarks=benchmarks), plotly.graph_objs._figure.Figure)
-    assert isinstance(irm_obj._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95), dict)
-    assert isinstance(irm_obj._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0), tuple)
+    assert isinstance(irm_obj.framework._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95), dict)
+    assert isinstance(irm_obj.framework._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0), tuple)
     irm_benchmark = irm_obj.sensitivity_benchmark(benchmarking_set=["X1"])
     assert isinstance(irm_benchmark, pd.DataFrame)
 
@@ -454,8 +454,8 @@ def test_sensitivity():
     assert isinstance(did_obj.sensitivity_summary, str)
     assert isinstance(did_obj.sensitivity_plot(), plotly.graph_objs._figure.Figure)
     assert isinstance(did_obj.sensitivity_plot(value='ci', benchmarks=benchmarks), plotly.graph_objs._figure.Figure)
-    assert isinstance(did_obj._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95), dict)
-    assert isinstance(did_obj._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0), tuple)
+    assert isinstance(did_obj.framework._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95), dict)
+    assert isinstance(did_obj.framework._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0), tuple)
     did_benchmark = did_obj.sensitivity_benchmark(benchmarking_set=['Z1'])
     assert isinstance(did_benchmark, pd.DataFrame)
 
@@ -473,8 +473,8 @@ def test_sensitivity():
     assert isinstance(did_cs_obj.sensitivity_summary, str)
     assert isinstance(did_cs_obj.sensitivity_plot(), plotly.graph_objs._figure.Figure)
     assert isinstance(did_cs_obj.sensitivity_plot(value='ci', benchmarks=benchmarks), plotly.graph_objs._figure.Figure)
-    assert isinstance(did_cs_obj._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95), dict)
-    assert isinstance(did_cs_obj._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0), tuple)
+    assert isinstance(did_cs_obj.framework._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95), dict)
+    assert isinstance(did_cs_obj.framework._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0), tuple)
     did_cs_benchmark = did_cs_obj.sensitivity_benchmark(benchmarking_set=['Z1'])
     assert isinstance(did_cs_benchmark, pd.DataFrame)
 

From beac0c9cc16edfaf0fa55b0fa8a870018482d388 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 18 Jun 2024 12:54:58 +0200
Subject: [PATCH 33/38] fix format

---
 doubleml/double_ml.py               |  3 +--
 doubleml/tests/test_return_types.py | 19 +++++++++++++++----
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index c143c0025..3c47eff8e 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -8,14 +8,13 @@
 from scipy.stats import norm
 
 from abc import ABC, abstractmethod
-from scipy.optimize import minimize_scalar
 
 from .double_ml_data import DoubleMLBaseData, DoubleMLClusterData
 from .double_ml_framework import DoubleMLFramework
 
 from .utils.resampling import DoubleMLResampling, DoubleMLClusterResampling
 from .utils._estimation import _rmse, _aggregate_coefs_and_ses, _var_est, _set_external_predictions
-from .utils._checks import _check_in_zero_one, _check_integer, _check_float, _check_bool, _check_is_partition, \
+from .utils._checks import _check_in_zero_one, _check_integer, _check_bool, _check_is_partition, \
     _check_all_smpls, _check_smpl_split, _check_smpl_split_tpl, _check_benchmarks, _check_external_predictions
 from .utils._plots import _sensitivity_contour_plot
 from .utils.gain_statistics import gain_statistics
diff --git a/doubleml/tests/test_return_types.py b/doubleml/tests/test_return_types.py
index d5f39d3d8..a7d2b5b56 100644
--- a/doubleml/tests/test_return_types.py
+++ b/doubleml/tests/test_return_types.py
@@ -417,7 +417,9 @@ def test_sensitivity():
     assert isinstance(plr_obj.sensitivity_plot(), plotly.graph_objs._figure.Figure)
     assert isinstance(plr_obj.sensitivity_plot(value='ci', benchmarks=benchmarks), plotly.graph_objs._figure.Figure)
     assert isinstance(plr_obj.framework._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95), dict)
-    assert isinstance(plr_obj.framework._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0), tuple)
+    assert isinstance(
+        plr_obj.framework._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0),
+        tuple)
     plr_benchmark = plr_obj.sensitivity_benchmark(benchmarking_set=["X1"])
     assert isinstance(plr_benchmark, pd.DataFrame)
 
@@ -436,7 +438,10 @@ def test_sensitivity():
     assert isinstance(irm_obj.sensitivity_plot(), plotly.graph_objs._figure.Figure)
     assert isinstance(irm_obj.sensitivity_plot(value='ci', benchmarks=benchmarks), plotly.graph_objs._figure.Figure)
     assert isinstance(irm_obj.framework._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95), dict)
-    assert isinstance(irm_obj.framework._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0), tuple)
+    assert isinstance(
+        irm_obj.framework._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0),
+        tuple
+    )
     irm_benchmark = irm_obj.sensitivity_benchmark(benchmarking_set=["X1"])
     assert isinstance(irm_benchmark, pd.DataFrame)
 
@@ -455,7 +460,10 @@ def test_sensitivity():
     assert isinstance(did_obj.sensitivity_plot(), plotly.graph_objs._figure.Figure)
     assert isinstance(did_obj.sensitivity_plot(value='ci', benchmarks=benchmarks), plotly.graph_objs._figure.Figure)
     assert isinstance(did_obj.framework._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95), dict)
-    assert isinstance(did_obj.framework._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0), tuple)
+    assert isinstance(
+        did_obj.framework._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0),
+        tuple
+    )
     did_benchmark = did_obj.sensitivity_benchmark(benchmarking_set=['Z1'])
     assert isinstance(did_benchmark, pd.DataFrame)
 
@@ -474,7 +482,10 @@ def test_sensitivity():
     assert isinstance(did_cs_obj.sensitivity_plot(), plotly.graph_objs._figure.Figure)
     assert isinstance(did_cs_obj.sensitivity_plot(value='ci', benchmarks=benchmarks), plotly.graph_objs._figure.Figure)
     assert isinstance(did_cs_obj.framework._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95), dict)
-    assert isinstance(did_cs_obj.framework._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0), tuple)
+    assert isinstance(
+        did_cs_obj.framework._calc_robustness_value(null_hypothesis=0.0, level=0.95, rho=1.0, idx_treatment=0),
+        tuple
+    )
     did_cs_benchmark = did_cs_obj.sensitivity_benchmark(benchmarking_set=['Z1'])
     assert isinstance(did_cs_benchmark, pd.DataFrame)
 

From 32fa7b8901c8ece25be40470ff18aa7769d409bd Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 18 Jun 2024 13:32:15 +0200
Subject: [PATCH 34/38] add plot to framework

---
 doubleml/double_ml_framework.py             | 135 +++++++++++++++++++-
 doubleml/tests/test_framework_exceptions.py |  85 ++++++++++++
 2 files changed, 219 insertions(+), 1 deletion(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 5ba4d2ed6..2ab3d1702 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -1,5 +1,6 @@
 import numpy as np
 import pandas as pd
+import copy
 
 from scipy.stats import norm
 from scipy.optimize import minimize_scalar
@@ -7,7 +8,8 @@
 
 from .utils._estimation import _draw_weights, _aggregate_coefs_and_ses, _var_est
 from .utils._checks import _check_bootstrap, _check_framework_compatibility, _check_in_zero_one, \
-    _check_float, _check_integer, _check_bool
+    _check_float, _check_integer, _check_bool, _check_benchmarks
+from .utils._plots import _sensitivity_contour_plot
 
 
 class DoubleMLFramework():
@@ -719,6 +721,137 @@ def p_adjust(self, method='romano-wolf'):
 
         return df_p_vals, all_p_vals_corrected
 
+    def sensitivity_plot(self, idx_treatment=0, value='theta', rho=1.0, level=0.95, null_hypothesis=0.0,
+                         include_scenario=True, benchmarks=None, fill=True, grid_bounds=(0.15, 0.15), grid_size=100):
+        """
+        Contour plot of the sensivity with respect to latent/confounding variables.
+
+        Parameters
+        ----------
+        idx_treatment : int
+            Index of the treatment to perform the sensitivity analysis.
+            Default is ``0``.
+
+        value : str
+            Determines which contours to plot. Valid values are ``'theta'`` (refers to the bounds)
+            and ``'ci'`` (refers to the bounds including statistical uncertainty).
+            Default is ``'theta'``.
+
+        rho: float
+            The correlation between the differences in short and long representations in the main regression and
+            Riesz representer. Has to be in [-1,1]. The absolute value determines the adversarial strength of the
+            confounding (maximizes at 1.0).
+            Default is ``1.0``.
+
+        level : float
+            The confidence level.
+            Default is ``0.95``.
+
+        include_scenario : bool
+            Indicates whether to highlight the scenario from the call of :meth:`sensitivity_analysis`.
+            Default is ``True``.
+
+        null_hypothesis : float
+            Null hypothesis for the effect. Determines the direction of the contour lines.
+
+        benchmarks : dict or None
+            Dictionary of benchmarks to be included in the plot. The keys are ``cf_y``, ``cf_d`` and ``name``.
+            Default is ``None``.
+
+        fill : bool
+            Indicates whether to use a heatmap style or only contour lines.
+            Default is ``True``.
+
+        grid_bounds : tuple
+            Determines the evaluation bounds of the grid for ``cf_d`` and ``cf_y``. Has to contain two floats in [0, 1).
+            Default is ``(0.15, 0.15)``.
+
+        grid_size : int
+            Determines the number of evaluation points of the grid.
+            Default is ``100``.
+
+        Returns
+        -------
+        fig : object
+            Plotly figure of the sensitivity contours.
+        """
+        _check_integer(idx_treatment, "idx_treatment", lower_bound=0, upper_bound=self.n_thetas-1)
+        if not isinstance(value, str):
+            raise TypeError('value must be a string. '
+                            f'{str(value)} of type {type(value)} was passed.')
+        valid_values = ['theta', 'ci']
+        if value not in valid_values:
+            raise ValueError('Invalid value ' + value + '. ' +
+                             'Valid values ' + ' or '.join(valid_values) + '.')
+        _check_float(null_hypothesis, "null_hypothesis")
+        _check_bool(include_scenario, 'include_scenario')
+        if include_scenario and self.sensitivity_params is None:
+            raise ValueError('Apply sensitivity_analysis() to include senario in sensitivity_plot. ')
+        _check_benchmarks(benchmarks)
+        _check_bool(fill, 'fill')
+        _check_in_zero_one(grid_bounds[0], "grid_bounds", include_zero=False, include_one=False)
+        _check_in_zero_one(grid_bounds[1], "grid_bounds", include_zero=False, include_one=False)
+        _check_integer(grid_size, "grid_size", lower_bound=10)
+
+        null_hypothesis = self.sensitivity_params['input']['null_hypothesis'][idx_treatment]
+        unadjusted_theta = self.thetas[idx_treatment]
+        # check which side is relvant
+        bound = 'upper' if (null_hypothesis > unadjusted_theta) else 'lower'
+
+        # create evaluation grid
+        cf_d_vec = np.linspace(0, grid_bounds[0], grid_size)
+        cf_y_vec = np.linspace(0, grid_bounds[1], grid_size)
+
+        # compute contour values
+        contour_values = np.full(shape=(grid_size, grid_size), fill_value=np.nan)
+        for i_cf_d_grid, cf_d_grid in enumerate(cf_d_vec):
+            for i_cf_y_grid, cf_y_grid in enumerate(cf_y_vec):
+
+                sens_dict = self._calc_sensitivity_analysis(
+                    cf_y=cf_y_grid,
+                    cf_d=cf_d_grid,
+                    rho=rho,
+                    level=level,
+                )
+                contour_values[i_cf_d_grid, i_cf_y_grid] = sens_dict[value][bound][idx_treatment]
+
+        # get the correct unadjusted value for confidence bands
+        if value == 'theta':
+            unadjusted_value = unadjusted_theta
+        else:
+            assert value == 'ci'
+            ci = self.confint(level=self.sensitivity_params['input']['level'])
+            if bound == 'upper':
+                unadjusted_value = ci.iloc[idx_treatment, 1]
+            else:
+                unadjusted_value = ci.iloc[idx_treatment, 0]
+
+        # compute the values for the benchmarks
+        benchmark_dict = copy.deepcopy(benchmarks)
+        if benchmarks is not None:
+            n_benchmarks = len(benchmarks['name'])
+            benchmark_values = np.full(shape=(n_benchmarks,), fill_value=np.nan)
+            for benchmark_idx in range(len(benchmarks['name'])):
+                sens_dict_bench = self._calc_sensitivity_analysis(
+                    cf_y=benchmarks['cf_y'][benchmark_idx],
+                    cf_d=benchmarks['cf_y'][benchmark_idx],
+                    rho=self.sensitivity_params['input']['rho'],
+                    level=self.sensitivity_params['input']['level']
+                )
+                benchmark_values[benchmark_idx] = sens_dict_bench[value][bound][idx_treatment]
+            benchmark_dict['value'] = benchmark_values
+        fig = _sensitivity_contour_plot(x=cf_d_vec,
+                                        y=cf_y_vec,
+                                        contour_values=contour_values,
+                                        unadjusted_value=unadjusted_value,
+                                        scenario_x=self.sensitivity_params['input']['cf_d'],
+                                        scenario_y=self.sensitivity_params['input']['cf_y'],
+                                        scenario_value=self.sensitivity_params[value][bound][idx_treatment],
+                                        include_scenario=include_scenario,
+                                        benchmarks=benchmark_dict,
+                                        fill=fill)
+        return fig
+
     def _check_and_set_cluster_data(self, doubleml_dict):
         self._cluster_dict = None
 
diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index 44a062e1c..33813bc78 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -366,6 +366,20 @@ def test_sensitivity_exceptions():
     }
     dml_framework_sensitivity = DoubleMLFramework(sensitivity_dict)
 
+    # test idx_treatment
+    dml_framework_obj_1.sensitivity_analysis()
+    msg = "idx_treatment must be an integer. 0.0 of type <class 'float'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_1.sensitivity_plot(idx_treatment=0.0)
+
+    msg = "idx_treatment must be larger or equal to 0. -1 was passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_1.sensitivity_plot(idx_treatment=-1)
+
+    msg = "idx_treatment must be smaller or equal to 1. 2 was passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_1.sensitivity_plot(idx_treatment=2)
+
     # test variances
     msg = (
         r'sensitivity_elements sigma2 and nu2 have to be positive\. '
@@ -377,3 +391,74 @@ def test_sensitivity_exceptions():
         _ = dml_framework_sensitivity._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95)
     with pytest.raises(ValueError, match=msg):
         dml_framework_sensitivity.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.0, level=0.95)
+
+
+@pytest.mark.ci
+def test_framework_sensitivity_plot_input():
+    dml_framework_obj_plot = DoubleMLFramework(doubleml_dict)
+
+    msg = (r'Apply sensitivity_analysis\(\) to include senario in sensitivity_plot. ')
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_plot.sensitivity_plot()
+
+    dml_framework_obj_plot.sensitivity_analysis()
+    msg = "null_hypothesis must be of float type. 1 of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_plot.sensitivity_plot(null_hypothesis=1)
+
+    msg = "include_scenario has to be boolean. True of type <class 'str'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_plot.sensitivity_plot(include_scenario="True")
+
+    msg = "benchmarks has to be either None or a dictionary. True of type <class 'str'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks="True")
+    msg = r"benchmarks has to be a dictionary with keys cf_y, cf_d and name. Got dict_keys\(\['cf_y', 'cf_d'\]\)."
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={'cf_y': 0.1, 'cf_d': 0.15})
+    msg = r"benchmarks has to be a dictionary with values of same length. Got \[1, 2, 2\]."
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={'cf_y': [0.1], 'cf_d': [0.15, 0.2], 'name': ['test', 'test2']})
+    msg = "benchmarks cf_y must be of float type. 2 of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={'cf_y': [0.1, 2], 'cf_d': [0.15, 0.2], 'name': ['test', 'test2']})
+    msg = r'benchmarks cf_y must be in \[0,1\). 1.0 was passed.'
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={'cf_y': [0.1, 1.0], 'cf_d': [0.15, 0.2], 'name': ['test', 'test2']})
+    msg = "benchmarks name must be of string type. 2 of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={'cf_y': [0.1, 0.2], 'cf_d': [0.15, 0.2], 'name': [2, 2]})
+
+    msg = "value must be a string. 2 of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_plot.sensitivity_plot(value=2)
+    msg = "Invalid value test. Valid values theta or ci."
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_plot.sensitivity_plot(value='test')
+
+    msg = "fill has to be boolean. True of type <class 'str'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_plot.sensitivity_plot(fill="True")
+
+    msg = "grid_size must be an integer. 0.0 of type <class 'float'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_plot.sensitivity_plot(grid_size=0.0)
+    msg = "grid_size must be larger or equal to 10. 9 was passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_plot.sensitivity_plot(grid_size=9)
+
+    msg = "grid_bounds must be of float type. 1 of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_plot.sensitivity_plot(grid_bounds=(0.15, 1))
+    with pytest.raises(TypeError, match=msg):
+        _ = dml_framework_obj_plot.sensitivity_plot(grid_bounds=(1, 0.15))
+    msg = r'grid_bounds must be in \(0,1\). 1.0 was passed.'
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_plot.sensitivity_plot(grid_bounds=(1.0, 0.15))
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_plot.sensitivity_plot(grid_bounds=(0.15, 1.0))
+    msg = r'grid_bounds must be in \(0,1\). 0.0 was passed.'
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_plot.sensitivity_plot(grid_bounds=(0.0, 0.15))
+    with pytest.raises(ValueError, match=msg):
+        _ = dml_framework_obj_plot.sensitivity_plot(grid_bounds=(0.15, 0.0))
\ No newline at end of file

From 0db982bdfc73f29fabaa23c7f0cc27d8aa62641c Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 18 Jun 2024 13:32:50 +0200
Subject: [PATCH 35/38] fix format

---
 doubleml/tests/test_framework_exceptions.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index 33813bc78..7dc8849b2 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -418,16 +418,20 @@ def test_framework_sensitivity_plot_input():
         _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={'cf_y': 0.1, 'cf_d': 0.15})
     msg = r"benchmarks has to be a dictionary with values of same length. Got \[1, 2, 2\]."
     with pytest.raises(ValueError, match=msg):
-        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={'cf_y': [0.1], 'cf_d': [0.15, 0.2], 'name': ['test', 'test2']})
+        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={'cf_y': [0.1], 'cf_d': [0.15, 0.2],
+                                                                'name': ['test', 'test2']})
     msg = "benchmarks cf_y must be of float type. 2 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
-        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={'cf_y': [0.1, 2], 'cf_d': [0.15, 0.2], 'name': ['test', 'test2']})
+        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={'cf_y': [0.1, 2], 'cf_d': [0.15, 0.2],
+                                                                'name': ['test', 'test2']})
     msg = r'benchmarks cf_y must be in \[0,1\). 1.0 was passed.'
     with pytest.raises(ValueError, match=msg):
-        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={'cf_y': [0.1, 1.0], 'cf_d': [0.15, 0.2], 'name': ['test', 'test2']})
+        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={'cf_y': [0.1, 1.0], 'cf_d': [0.15, 0.2],
+                                                                'name': ['test', 'test2']})
     msg = "benchmarks name must be of string type. 2 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
-        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={'cf_y': [0.1, 0.2], 'cf_d': [0.15, 0.2], 'name': [2, 2]})
+        _ = dml_framework_obj_plot.sensitivity_plot(benchmarks={'cf_y': [0.1, 0.2], 'cf_d': [0.15, 0.2],
+                                                                'name': [2, 2]})
 
     msg = "value must be a string. 2 of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
@@ -461,4 +465,4 @@ def test_framework_sensitivity_plot_input():
     with pytest.raises(ValueError, match=msg):
         _ = dml_framework_obj_plot.sensitivity_plot(grid_bounds=(0.0, 0.15))
     with pytest.raises(ValueError, match=msg):
-        _ = dml_framework_obj_plot.sensitivity_plot(grid_bounds=(0.15, 0.0))
\ No newline at end of file
+        _ = dml_framework_obj_plot.sensitivity_plot(grid_bounds=(0.15, 0.0))

From 2feb34a7c3b1023b7f225cd7c9704fafc692dacb Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 18 Jun 2024 13:50:37 +0200
Subject: [PATCH 36/38] move sensivitiy_plot to framework

---
 doubleml/double_ml.py             | 114 +++++++++---------------------
 doubleml/tests/test_exceptions.py |   3 +-
 2 files changed, 34 insertions(+), 83 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 3c47eff8e..f9be56d5d 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -14,9 +14,8 @@
 
 from .utils.resampling import DoubleMLResampling, DoubleMLClusterResampling
 from .utils._estimation import _rmse, _aggregate_coefs_and_ses, _var_est, _set_external_predictions
-from .utils._checks import _check_in_zero_one, _check_integer, _check_bool, _check_is_partition, \
-    _check_all_smpls, _check_smpl_split, _check_smpl_split_tpl, _check_benchmarks, _check_external_predictions
-from .utils._plots import _sensitivity_contour_plot
+from .utils._checks import _check_is_partition, _check_all_smpls, _check_smpl_split, _check_smpl_split_tpl, \
+    _check_external_predictions
 from .utils.gain_statistics import gain_statistics
 
 _implemented_data_backends = ['DoubleMLData', 'DoubleMLClusterData']
@@ -1448,7 +1447,7 @@ def sensitivity_analysis(self, cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95, null_h
         """
 
         if self._framework is None:
-            raise ValueError('Apply fit() before bootstrap().')
+            raise ValueError('Apply fit() before sensitivity_analysis().')
         self._framework.sensitivity_analysis(
             cf_y=cf_y,
             cf_d=cf_d,
@@ -1508,8 +1507,8 @@ def sensitivity_summary(self):
 
         return res
 
-    def sensitivity_plot(self, idx_treatment=0, value='theta', include_scenario=True, benchmarks=None,
-                         fill=True, grid_bounds=(0.15, 0.15), grid_size=100):
+    def sensitivity_plot(self, idx_treatment=0, value='theta', rho=1.0, level=0.95, null_hypothesis=0.0,
+                         include_scenario=True, benchmarks=None, fill=True, grid_bounds=(0.15, 0.15), grid_size=100):
         """
         Contour plot of the sensivity with respect to latent/confounding variables.
 
@@ -1524,10 +1523,23 @@ def sensitivity_plot(self, idx_treatment=0, value='theta', include_scenario=True
             and ``'ci'`` (refers to the bounds including statistical uncertainty).
             Default is ``'theta'``.
 
+        rho: float
+            The correlation between the differences in short and long representations in the main regression and
+            Riesz representer. Has to be in [-1,1]. The absolute value determines the adversarial strength of the
+            confounding (maximizes at 1.0).
+            Default is ``1.0``.
+
+        level : float
+            The confidence level.
+            Default is ``0.95``.
+
         include_scenario : bool
             Indicates whether to highlight the scenario from the call of :meth:`sensitivity_analysis`.
             Default is ``True``.
 
+        null_hypothesis : float
+            Null hypothesis for the effect. Determines the direction of the contour lines.
+
         benchmarks : dict or None
             Dictionary of benchmarks to be included in the plot. The keys are ``cf_y``, ``cf_d`` and ``name``.
             Default is ``None``.
@@ -1549,81 +1561,21 @@ def sensitivity_plot(self, idx_treatment=0, value='theta', include_scenario=True
         fig : object
             Plotly figure of the sensitivity contours.
         """
-        if self.sensitivity_params is None:
-            raise ValueError('Apply sensitivity_analysis() to include senario in sensitivity_plot. '
-                             'The values of rho and the level are used for the scenario.')
-        _check_integer(idx_treatment, "idx_treatment", lower_bound=0, upper_bound=self._dml_data.n_treat-1)
-        if not isinstance(value, str):
-            raise TypeError('value must be a string. '
-                            f'{str(value)} of type {type(value)} was passed.')
-        valid_values = ['theta', 'ci']
-        if value not in valid_values:
-            raise ValueError('Invalid value ' + value + '. ' +
-                             'Valid values ' + ' or '.join(valid_values) + '.')
-        _check_bool(include_scenario, 'include_scenario')
-        _check_benchmarks(benchmarks)
-        _check_bool(fill, 'fill')
-        _check_in_zero_one(grid_bounds[0], "grid_bounds", include_zero=False, include_one=False)
-        _check_in_zero_one(grid_bounds[1], "grid_bounds", include_zero=False, include_one=False)
-        _check_integer(grid_size, "grid_size", lower_bound=10)
-
-        null_hypothesis = self.sensitivity_params['input']['null_hypothesis'][idx_treatment]
-        unadjusted_theta = self.coef[idx_treatment]
-        # check which side is relvant
-        bound = 'upper' if (null_hypothesis > unadjusted_theta) else 'lower'
-
-        # create evaluation grid
-        cf_d_vec = np.linspace(0, grid_bounds[0], grid_size)
-        cf_y_vec = np.linspace(0, grid_bounds[1], grid_size)
-
-        # compute contour values
-        contour_values = np.full(shape=(grid_size, grid_size), fill_value=np.nan)
-        for i_cf_d_grid, cf_d_grid in enumerate(cf_d_vec):
-            for i_cf_y_grid, cf_y_grid in enumerate(cf_y_vec):
-
-                sens_dict = self.framework._calc_sensitivity_analysis(
-                    cf_y=cf_y_grid,
-                    cf_d=cf_d_grid,
-                    rho=self.sensitivity_params['input']['rho'],
-                    level=self.sensitivity_params['input']['level']
-                )
-                contour_values[i_cf_d_grid, i_cf_y_grid] = sens_dict[value][bound][idx_treatment]
-
-        # get the correct unadjusted value for confidence bands
-        if value == 'theta':
-            unadjusted_value = unadjusted_theta
-        else:
-            assert value == 'ci'
-            ci = self.confint(level=self.sensitivity_params['input']['level'])
-            if bound == 'upper':
-                unadjusted_value = ci.iloc[idx_treatment, 1]
-            else:
-                unadjusted_value = ci.iloc[idx_treatment, 0]
-
-        # compute the values for the benchmarks
-        benchmark_dict = copy.deepcopy(benchmarks)
-        if benchmarks is not None:
-            n_benchmarks = len(benchmarks['name'])
-            benchmark_values = np.full(shape=(n_benchmarks,), fill_value=np.nan)
-            for benchmark_idx in range(len(benchmarks['name'])):
-                sens_dict_bench = self.framework._calc_sensitivity_analysis(
-                    cf_y=benchmarks['cf_y'][benchmark_idx],
-                    cf_d=benchmarks['cf_y'][benchmark_idx],
-                    rho=self.sensitivity_params['input']['rho'],
-                    level=self.sensitivity_params['input']['level']
-                )
-                benchmark_values[benchmark_idx] = sens_dict_bench[value][bound][idx_treatment]
-            benchmark_dict['value'] = benchmark_values
-        fig = _sensitivity_contour_plot(x=cf_d_vec,
-                                        y=cf_y_vec,
-                                        contour_values=contour_values,
-                                        unadjusted_value=unadjusted_value,
-                                        scenario_x=self.sensitivity_params['input']['cf_d'],
-                                        scenario_y=self.sensitivity_params['input']['cf_y'],
-                                        scenario_value=self.sensitivity_params[value][bound][idx_treatment],
-                                        include_scenario=include_scenario,
-                                        benchmarks=benchmark_dict,
-                                        fill=fill)
+        if self._framework is None:
+            raise ValueError('Apply fit() before sensitivity_plot().')
+        fig = self._framework.sensitivity_plot(
+            idx_treatment=idx_treatment,
+            value=value,
+            rho=rho,
+            level=level,
+            null_hypothesis=null_hypothesis,
+            include_scenario=include_scenario,
+            benchmarks=benchmarks,
+            fill=fill,
+            grid_bounds=grid_bounds,
+            grid_size=grid_size
+        )
+
         return fig
 
     def sensitivity_benchmark(self, benchmarking_set, fit_args=None):
diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py
index 7d4f2b2f5..8c1d10fa4 100644
--- a/doubleml/tests/test_exceptions.py
+++ b/doubleml/tests/test_exceptions.py
@@ -1097,8 +1097,7 @@ def test_doubleml_sensitivity_plot_input():
     dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1)
     dml_irm.fit()
 
-    msg = (r'Apply sensitivity_analysis\(\) to include senario in sensitivity_plot. '
-           'The values of rho and the level are used for the scenario.')
+    msg = (r'Apply sensitivity_analysis\(\) to include senario in sensitivity_plot. ')
     with pytest.raises(ValueError, match=msg):
         _ = dml_irm.sensitivity_plot()
 

From 09ea568e4b6fb705799d9053657eeb04ba2b4841 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 18 Jun 2024 13:57:55 +0200
Subject: [PATCH 37/38] update sensitivity_plot docstring

---
 doubleml/double_ml.py           | 6 +++---
 doubleml/double_ml_framework.py | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index f9be56d5d..46e18825f 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1533,13 +1533,13 @@ def sensitivity_plot(self, idx_treatment=0, value='theta', rho=1.0, level=0.95,
             The confidence level.
             Default is ``0.95``.
 
+        null_hypothesis : float
+            Null hypothesis for the effect. Determines the direction of the contour lines.
+
         include_scenario : bool
             Indicates whether to highlight the scenario from the call of :meth:`sensitivity_analysis`.
             Default is ``True``.
 
-        null_hypothesis : float
-            Null hypothesis for the effect. Determines the direction of the contour lines.
-
         benchmarks : dict or None
             Dictionary of benchmarks to be included in the plot. The keys are ``cf_y``, ``cf_d`` and ``name``.
             Default is ``None``.
diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 2ab3d1702..c528fef84 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -747,13 +747,13 @@ def sensitivity_plot(self, idx_treatment=0, value='theta', rho=1.0, level=0.95,
             The confidence level.
             Default is ``0.95``.
 
+        null_hypothesis : float
+            Null hypothesis for the effect. Determines the direction of the contour lines.
+
         include_scenario : bool
             Indicates whether to highlight the scenario from the call of :meth:`sensitivity_analysis`.
             Default is ``True``.
 
-        null_hypothesis : float
-            Null hypothesis for the effect. Determines the direction of the contour lines.
-
         benchmarks : dict or None
             Dictionary of benchmarks to be included in the plot. The keys are ``cf_y``, ``cf_d`` and ``name``.
             Default is ``None``.

From 399decfcaa2c43025961e4f3f13a194ff45e341c Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Fri, 9 Aug 2024 07:43:18 +0200
Subject: [PATCH 38/38] fix benchmark  in sensitivity plot

---
 doubleml/double_ml_framework.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index c528fef84..feedee419 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -834,7 +834,7 @@ def sensitivity_plot(self, idx_treatment=0, value='theta', rho=1.0, level=0.95,
             for benchmark_idx in range(len(benchmarks['name'])):
                 sens_dict_bench = self._calc_sensitivity_analysis(
                     cf_y=benchmarks['cf_y'][benchmark_idx],
-                    cf_d=benchmarks['cf_y'][benchmark_idx],
+                    cf_d=benchmarks['cf_d'][benchmark_idx],
                     rho=self.sensitivity_params['input']['rho'],
                     level=self.sensitivity_params['input']['level']
                 )