py-why · nparent1 · Dec 30, 2024 · Dec 30, 2024 · Dec 30, 2024 · Dec 30, 2024
diff --git a/.../source/example_notebooks/dowhy_generalized_covariate_adjustment_estimation_example.ipynb b/.../source/example_notebooks/dowhy_generalized_covariate_adjustment_estimation_example.ipynb
diff --git a/dowhy/causal_estimator.py b/dowhy/causal_estimator.py
@@ -960,6 +960,7 @@ def __init__(self, identified_estimand, estimator_name):
         self.treatment_variable = identified_estimand.treatment_variable
         self.outcome_variable = identified_estimand.outcome_variable
         self.backdoor_variables = identified_estimand.get_backdoor_variables()
+        self.general_adjustment_variables = identified_estimand.get_general_adjustment_variables()
         self.instrumental_variables = identified_estimand.instrumental_variables
         self.estimand_type = identified_estimand.estimand_type
         self.estimand_expression = None

diff --git a/dowhy/causal_estimators/causalml.py b/dowhy/causal_estimators/causalml.py
@@ -118,10 +118,10 @@ def fit(
         self._set_effect_modifiers(data, effect_modifier_names)
 
         # Check the backdoor variables being used
-        self.logger.debug("Back-door variables used:" + ",".join(self._target_estimand.get_backdoor_variables()))
+        self.logger.debug("Adjustment set variables used:" + ",".join(self._target_estimand.get_adjustment_set()))
 
         # Add the observed confounders and one hot encode the categorical variables
-        self._observed_common_causes_names = self._target_estimand.get_backdoor_variables()
+        self._observed_common_causes_names = self._target_estimand.get_adjustment_set()
         if self._observed_common_causes_names:
             # Get the data of the unobserved confounders
             self._observed_common_causes = data[self._observed_common_causes_names]
@@ -220,6 +220,6 @@ def construct_symbolic_estimator(self, estimand):
         expr = "b: " + ",".join(estimand.outcome_variable) + "~"
         # TODO we are conditioning on a postive treatment
         # TODO create an expression corresponding to each estimator used
-        var_list = estimand.treatment_variable + estimand.get_backdoor_variables()
+        var_list = estimand.treatment_variable + estimand.get_adjustment_set()
         expr += "+".join(var_list)
         return expr
diff --git a/dowhy/causal_estimators/distance_matching_estimator.py b/dowhy/causal_estimators/distance_matching_estimator.py
@@ -130,9 +130,9 @@ def fit(self, data: pd.DataFrame, effect_modifier_names: Optional[List[str]] = N
             self.logger.error(error_msg)
             raise Exception(error_msg)
 
-        self.logger.debug("Back-door variables used:" + ",".join(self._target_estimand.get_backdoor_variables()))
+        self.logger.debug("Adjustment set variables used:" + ",".join(self._target_estimand.get_adjustment_set()))
 
-        self._observed_common_causes_names = self._target_estimand.get_backdoor_variables()
+        self._observed_common_causes_names = self._target_estimand.get_adjustment_set()
         if self._observed_common_causes_names:
             if self.exact_match_cols is not None:
                 self._observed_common_causes_names = [
@@ -307,6 +307,6 @@ def estimate_effect(
 
     def construct_symbolic_estimator(self, estimand):
         expr = "b: " + ", ".join(estimand.outcome_variable) + "~"
-        var_list = estimand.treatment_variable + estimand.get_backdoor_variables()
+        var_list = estimand.treatment_variable + estimand.get_adjustment_set()
         expr += "+".join(var_list)
         return expr
diff --git a/dowhy/causal_estimators/econml.py b/dowhy/causal_estimators/econml.py
@@ -120,7 +120,7 @@ def fit(
         self._econml_fit_params = kwargs
         self._fit_params = kwargs
 
-        self._observed_common_causes_names = self._target_estimand.get_backdoor_variables().copy()
+        self._observed_common_causes_names = self._target_estimand.get_adjustment_set().copy()
 
         # Enforcing this ordering is necessary to feed through the propensity values from dataset
         self._observed_common_causes_names = [

diff --git a/dowhy/causal_estimators/generalized_linear_model_estimator.py b/dowhy/causal_estimators/generalized_linear_model_estimator.py
@@ -127,7 +127,7 @@ def predict_fn(self, data: pd.DataFrame, model, features):
 
     def construct_symbolic_estimator(self, estimand):
         expr = "b: " + ",".join(estimand.outcome_variable) + "~" + "Sigmoid("
-        var_list = estimand.treatment_variable + estimand.get_backdoor_variables()
+        var_list = estimand.treatment_variable + estimand.get_adjustment_set()
         expr += "+".join(var_list)
         if self._effect_modifier_names:
             interaction_terms = [

diff --git a/dowhy/causal_estimators/linear_regression_estimator.py b/dowhy/causal_estimators/linear_regression_estimator.py
@@ -87,7 +87,7 @@ def fit(
 
     def construct_symbolic_estimator(self, estimand):
         expr = "b: " + ",".join(estimand.outcome_variable) + "~"
-        var_list = estimand.treatment_variable + estimand.get_backdoor_variables()
+        var_list = estimand.treatment_variable + estimand.get_adjustment_set()
         expr += "+".join(var_list)
         if self._effect_modifier_names:
             interaction_terms = [

diff --git a/dowhy/causal_estimators/propensity_score_estimator.py b/dowhy/causal_estimators/propensity_score_estimator.py
@@ -96,8 +96,8 @@ def fit(
         self.reset_encoders()  # Forget any existing encoders
         self._set_effect_modifiers(data, effect_modifier_names)
 
-        self.logger.debug("Back-door variables used:" + ",".join(self._target_estimand.get_backdoor_variables()))
-        self._observed_common_causes_names = self._target_estimand.get_backdoor_variables()
+        self.logger.debug("Adjustment set variables used:" + ",".join(self._target_estimand.get_adjustment_set()))
+        self._observed_common_causes_names = self._target_estimand.get_adjustment_set()
 
         if self._observed_common_causes_names:
             self._observed_common_causes = data[self._observed_common_causes_names]

diff --git a/dowhy/causal_estimators/propensity_score_matching_estimator.py b/dowhy/causal_estimators/propensity_score_matching_estimator.py
@@ -180,6 +180,6 @@ def estimate_effect(
     def construct_symbolic_estimator(self, estimand):
         expr = "b: " + ", ".join(estimand.outcome_variable) + "~"
         # TODO -- fix: we are actually conditioning on positive treatment (d=1)
-        var_list = estimand.treatment_variable + estimand.get_backdoor_variables()
+        var_list = estimand.treatment_variable + estimand.get_adjustment_set()
         expr += "+".join(var_list)
         return expr
diff --git a/dowhy/causal_estimators/propensity_score_stratification_estimator.py b/dowhy/causal_estimators/propensity_score_stratification_estimator.py
@@ -264,6 +264,6 @@ def _get_strata(self, data: pd.DataFrame, num_strata, clipping_threshold):
     def construct_symbolic_estimator(self, estimand):
         expr = "b: " + ",".join(estimand.outcome_variable) + "~"
         # TODO -- fix: we are actually conditioning on positive treatment (d=1)
-        var_list = estimand.treatment_variable + estimand.get_backdoor_variables()
+        var_list = estimand.treatment_variable + estimand.get_adjustment_set()
         expr += "+".join(var_list)
         return expr
diff --git a/dowhy/causal_estimators/propensity_score_weighting_estimator.py b/dowhy/causal_estimators/propensity_score_weighting_estimator.py
@@ -263,6 +263,6 @@ def estimate_effect(
     def construct_symbolic_estimator(self, estimand):
         expr = "b: " + ",".join(estimand.outcome_variable) + "~"
         # TODO -- fix: we are actually conditioning on positive treatment (d=1)
-        var_list = estimand.treatment_variable + estimand.get_backdoor_variables()
+        var_list = estimand.treatment_variable + estimand.get_adjustment_set()
         expr += "+".join(var_list)
         return expr
diff --git a/dowhy/causal_estimators/regression_estimator.py b/dowhy/causal_estimators/regression_estimator.py
@@ -87,8 +87,8 @@ def fit(
         self.reset_encoders()  # Forget any existing encoders
         self._set_effect_modifiers(data, effect_modifier_names)
 
-        self.logger.debug("Back-door variables used:" + ",".join(self._target_estimand.get_backdoor_variables()))
-        self._observed_common_causes_names = self._target_estimand.get_backdoor_variables()
+        self.logger.debug("Adjustment set variables used:" + ",".join(self._target_estimand.get_adjustment_set()))
+        self._observed_common_causes_names = self._target_estimand.get_adjustment_set()
         if len(self._observed_common_causes_names) > 0:
             self._observed_common_causes = data[self._observed_common_causes_names]
             self._observed_common_causes = self._encode(self._observed_common_causes, "observed_common_causes")

diff --git a/dowhy/causal_identifier/identified_estimand.py b/dowhy/causal_identifier/identified_estimand.py
@@ -85,9 +85,24 @@ def get_instrumental_variables(self):
     def get_general_adjustment_variables(self, key: Optional[str] = None):
         """Return a list containing general adjustment variables."""
         if key is None:
-            return self.general_adjustment_variables[self.default_adjustment_set_id]
+            return self.general_adjustment_variables.get(self.default_adjustment_set_id, None)
         else:
-            return self.general_adjustment_variables[key]
+            return self.general_adjustment_variables.get(key, None)
+
+    def set_general_adjustment_variables(self, variables_arr: List, key: Optional[str] = None):
+        if key is None:
+            key = self.identifier_method
+        self.general_adjustment_variables[key] = variables_arr
+
+    def get_adjustment_set(self, key: Optional[str] = None):
+        if self.identifier_method == "general_adjustment":
+            return self.get_general_adjustment_variables(key)
+        return self.get_backdoor_variables(key)
+
+    def set_adjustment_set(self, variables_arr: List, key: Optional[str] = None):
+        if self.identifier_method == "general_adjustment":
+            return self.set_general_adjustment_variables(variables_arr, key)
+        return self.set_backdoor_variables(variables_arr, key)
 
     def __deepcopy__(self, memo):
         return IdentifiedEstimand(

diff --git a/dowhy/causal_refuter.py b/dowhy/causal_refuter.py
@@ -54,7 +54,7 @@ def __init__(self, data, identified_estimand, estimate, **kwargs):
         # Concatenate the confounders, instruments and effect modifiers
         try:
             self._variables_of_interest = (
-                self._target_estimand.get_backdoor_variables()
+                self._target_estimand.get_adjustment_set()
                 + self._target_estimand.instrumental_variables
                 + self._estimate.estimator._effect_modifier_names
             )

diff --git a/dowhy/causal_refuters/add_unobserved_common_cause.py b/dowhy/causal_refuters/add_unobserved_common_cause.py
@@ -207,9 +207,9 @@ def preprocess_observed_common_causes(
     no_common_causes_error_message: str,
 ):
     """
-    Preprocesses backdoor variables (observed common causes) and returns the pre-processed matrix.
+    Preprocesses adjustment variables (observed common causes) and returns the pre-processed matrix.
 
-    At least one backdoor (common cause) variable is required. Raises an exception if none present.
+    At least one covariate (common cause) variable is required. Raises an exception if none present.
 
     Preprocessing has two steps:
     1. Categorical encoding.
@@ -222,7 +222,7 @@ def preprocess_observed_common_causes(
     """
 
     # 1. Categorical encoding of relevant variables
-    observed_common_causes_names = target_estimand.get_backdoor_variables()
+    observed_common_causes_names = target_estimand.get_adjustment_set()
     if len(observed_common_causes_names) > 0:
         # The encoded data is only used to calculate a parameter, so the encoder can be discarded.
         observed_common_causes = data[observed_common_causes_names]

diff --git a/dowhy/causal_refuters/assess_overlap.py b/dowhy/causal_refuters/assess_overlap.py
@@ -41,6 +41,7 @@ def __init__(self, *args, **kwargs):
         """
         super().__init__(*args, **kwargs)
         # TODO: Check that the target estimand has backdoor variables?
+        # TODO: Is this algorithm compatible with other adjustment criterions, besides backdoor?
         self._backdoor_vars = self._target_estimand.get_backdoor_variables()
         self._cat_feats = kwargs.pop("cat_feats", [])
         self._support_config = kwargs.pop("support_config", None)

diff --git a/dowhy/causal_refuters/bootstrap_refuter.py b/dowhy/causal_refuters/bootstrap_refuter.py
@@ -191,7 +191,7 @@ def refute_bootstrap(
 
     chosen_variables = choose_variables(
         required_variables,
-        target_estimand.get_backdoor_variables()
+        target_estimand.get_adjustment_set()
         + target_estimand.instrumental_variables
         + estimate.estimator._effect_modifier_names,
     )

diff --git a/dowhy/causal_refuters/dummy_outcome_refuter.py b/dowhy/causal_refuters/dummy_outcome_refuter.py
@@ -55,7 +55,7 @@ class DummyOutcomeRefuter(CausalRefuter):
     then we can add an arbitrary function h(t) to the dummy outcome's
     generation process and then the causal effect becomes h(t=1)-h(t=0).
 
-    Note that this general procedure only works for the backdoor criterion.
+    Note that this general procedure only works for covariate adjustment.
 
     1. We find f(W) for a each value of treatment. That is, keeping the treatment
     constant, we fit a predictor to estimate the effect of confounders W on
@@ -108,7 +108,7 @@ class DummyOutcomeRefuter(CausalRefuter):
         * function argument: function ``pd.Dataframe -> np.ndarray``
 
         It takes in a function that takes the input data frame as the input and outputs the outcome
-        variable. This allows us to create an output varable that only depends on the covariates and does not depend
+        variable. This allows us to create an output variable that only depends on the covariates and does not depend
         on the treatment variable.
 
         * string argument
@@ -271,7 +271,7 @@ def refute_dummy_outcome(
     then we can add an arbitrary function h(t) to the dummy outcome's
     generation process and then the causal effect becomes h(t=1)-h(t=0).
 
-    Note that this general procedure only works for the backdoor criterion.
+    Note that this general procedure only works for covariate adjustment.
 
     1. We find f(W) for a each value of treatment. That is, keeping the treatment
     constant, we fit a predictor to estimate the effect of confounders W on
@@ -438,7 +438,7 @@ def refute_dummy_outcome(
     estimator_present = _has_estimator(transformation_list)
     chosen_variables = choose_variables(
         required_variables,
-        target_estimand.get_backdoor_variables()
+        target_estimand.get_adjustment_set()
         + target_estimand.instrumental_variables
         + estimate.estimator._effect_modifier_names,
     )

diff --git a/dowhy/causal_refuters/evalue_sensitivity_analyzer.py b/dowhy/causal_refuters/evalue_sensitivity_analyzer.py
@@ -251,13 +251,13 @@ def benchmark(self, data: pd.DataFrame):
         new_lo = []
         new_hi = []
         observed_covariate_e_values = []
-        backdoor_vars = self.estimand.get_backdoor_variables()
-        for drop_var in backdoor_vars:
+        covariates = self.estimand.get_adjustment_set()
+        for drop_var in covariates:
 
             # new estimator
-            new_backdoor_vars = [var for var in backdoor_vars if var != drop_var]
+            new_covariate_vars = [var for var in covariates if var != drop_var]
             new_estimand = copy.deepcopy(self.estimand)
-            new_estimand.set_backdoor_variables(new_backdoor_vars)
+            new_estimand.set_adjustment_set(new_covariate_vars)
             new_estimator = self.estimate.estimator.get_new_estimator_object(new_estimand)
             new_estimator.fit(
                 self.data,
@@ -296,7 +296,7 @@ def benchmark(self, data: pd.DataFrame):
 
         self.benchmarking_results = pd.DataFrame(
             {
-                "dropped_covariate": backdoor_vars,
+                "dropped_covariate": covariates,
                 "converted_est": new_ests,
                 "converted_lower_ci": new_lo,
                 "converted_upper_ci": new_hi,

diff --git a/dowhy/causal_refuters/random_common_cause.py b/dowhy/causal_refuters/random_common_cause.py
@@ -107,10 +107,10 @@ def refute_random_common_cause(
     """
     logger.info("Refutation over {} simulated datasets, each with a random common cause added".format(num_simulations))
 
-    new_backdoor_variables = target_estimand.get_backdoor_variables() + ["w_random"]
+    new_adjustment_variables = target_estimand.get_adjustment_set() + ["w_random"]
     identified_estimand = copy.deepcopy(target_estimand)
     # Adding a new backdoor variable to the identified estimand
-    identified_estimand.set_backdoor_variables(new_backdoor_variables)
+    identified_estimand.set_adjustment_set(new_adjustment_variables)
 
     if isinstance(random_state, int):
         random_state = np.random.RandomState(seed=random_state)