Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEA] Support Generalized Adjustment Criterion for Estimation plus Add Example Notebook #1297

Open
wants to merge 27 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions dowhy/causal_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -960,6 +960,7 @@ def __init__(self, identified_estimand, estimator_name):
self.treatment_variable = identified_estimand.treatment_variable
self.outcome_variable = identified_estimand.outcome_variable
self.backdoor_variables = identified_estimand.get_backdoor_variables()
self.general_adjustment_variables = identified_estimand.get_general_adjustment_variables()
self.instrumental_variables = identified_estimand.instrumental_variables
self.estimand_type = identified_estimand.estimand_type
self.estimand_expression = None
Expand Down
6 changes: 3 additions & 3 deletions dowhy/causal_estimators/causalml.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,10 @@ def fit(
self._set_effect_modifiers(data, effect_modifier_names)

# Check the backdoor variables being used
self.logger.debug("Back-door variables used:" + ",".join(self._target_estimand.get_backdoor_variables()))
self.logger.debug("Adjustment set variables used:" + ",".join(self._target_estimand.get_adjustment_set()))

# Add the observed confounders and one hot encode the categorical variables
self._observed_common_causes_names = self._target_estimand.get_backdoor_variables()
self._observed_common_causes_names = self._target_estimand.get_adjustment_set()
if self._observed_common_causes_names:
# Get the data of the unobserved confounders
self._observed_common_causes = data[self._observed_common_causes_names]
Expand Down Expand Up @@ -220,6 +220,6 @@ def construct_symbolic_estimator(self, estimand):
expr = "b: " + ",".join(estimand.outcome_variable) + "~"
# TODO we are conditioning on a postive treatment
# TODO create an expression corresponding to each estimator used
var_list = estimand.treatment_variable + estimand.get_backdoor_variables()
var_list = estimand.treatment_variable + estimand.get_adjustment_set()
expr += "+".join(var_list)
return expr
6 changes: 3 additions & 3 deletions dowhy/causal_estimators/distance_matching_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,9 @@ def fit(self, data: pd.DataFrame, effect_modifier_names: Optional[List[str]] = N
self.logger.error(error_msg)
raise Exception(error_msg)

self.logger.debug("Back-door variables used:" + ",".join(self._target_estimand.get_backdoor_variables()))
self.logger.debug("Adjustment set variables used:" + ",".join(self._target_estimand.get_adjustment_set()))

self._observed_common_causes_names = self._target_estimand.get_backdoor_variables()
self._observed_common_causes_names = self._target_estimand.get_adjustment_set()
if self._observed_common_causes_names:
if self.exact_match_cols is not None:
self._observed_common_causes_names = [
Expand Down Expand Up @@ -307,6 +307,6 @@ def estimate_effect(

def construct_symbolic_estimator(self, estimand):
expr = "b: " + ", ".join(estimand.outcome_variable) + "~"
var_list = estimand.treatment_variable + estimand.get_backdoor_variables()
var_list = estimand.treatment_variable + estimand.get_adjustment_set()
expr += "+".join(var_list)
return expr
2 changes: 1 addition & 1 deletion dowhy/causal_estimators/econml.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def fit(
self._econml_fit_params = kwargs
self._fit_params = kwargs

self._observed_common_causes_names = self._target_estimand.get_backdoor_variables().copy()
self._observed_common_causes_names = self._target_estimand.get_adjustment_set().copy()

# Enforcing this ordering is necessary to feed through the propensity values from dataset
self._observed_common_causes_names = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def predict_fn(self, data: pd.DataFrame, model, features):

def construct_symbolic_estimator(self, estimand):
expr = "b: " + ",".join(estimand.outcome_variable) + "~" + "Sigmoid("
var_list = estimand.treatment_variable + estimand.get_backdoor_variables()
var_list = estimand.treatment_variable + estimand.get_adjustment_set()
expr += "+".join(var_list)
if self._effect_modifier_names:
interaction_terms = [
Expand Down
2 changes: 1 addition & 1 deletion dowhy/causal_estimators/linear_regression_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def fit(

def construct_symbolic_estimator(self, estimand):
expr = "b: " + ",".join(estimand.outcome_variable) + "~"
var_list = estimand.treatment_variable + estimand.get_backdoor_variables()
var_list = estimand.treatment_variable + estimand.get_adjustment_set()
expr += "+".join(var_list)
if self._effect_modifier_names:
interaction_terms = [
Expand Down
4 changes: 2 additions & 2 deletions dowhy/causal_estimators/propensity_score_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ def fit(
self.reset_encoders() # Forget any existing encoders
self._set_effect_modifiers(data, effect_modifier_names)

self.logger.debug("Back-door variables used:" + ",".join(self._target_estimand.get_backdoor_variables()))
self._observed_common_causes_names = self._target_estimand.get_backdoor_variables()
self.logger.debug("Adjustment set variables used:" + ",".join(self._target_estimand.get_adjustment_set()))
self._observed_common_causes_names = self._target_estimand.get_adjustment_set()

if self._observed_common_causes_names:
self._observed_common_causes = data[self._observed_common_causes_names]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,6 @@ def estimate_effect(
def construct_symbolic_estimator(self, estimand):
expr = "b: " + ", ".join(estimand.outcome_variable) + "~"
# TODO -- fix: we are actually conditioning on positive treatment (d=1)
var_list = estimand.treatment_variable + estimand.get_backdoor_variables()
var_list = estimand.treatment_variable + estimand.get_adjustment_set()
expr += "+".join(var_list)
return expr
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,6 @@ def _get_strata(self, data: pd.DataFrame, num_strata, clipping_threshold):
def construct_symbolic_estimator(self, estimand):
expr = "b: " + ",".join(estimand.outcome_variable) + "~"
# TODO -- fix: we are actually conditioning on positive treatment (d=1)
var_list = estimand.treatment_variable + estimand.get_backdoor_variables()
var_list = estimand.treatment_variable + estimand.get_adjustment_set()
expr += "+".join(var_list)
return expr
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,6 @@ def estimate_effect(
def construct_symbolic_estimator(self, estimand):
expr = "b: " + ",".join(estimand.outcome_variable) + "~"
# TODO -- fix: we are actually conditioning on positive treatment (d=1)
var_list = estimand.treatment_variable + estimand.get_backdoor_variables()
var_list = estimand.treatment_variable + estimand.get_adjustment_set()
expr += "+".join(var_list)
return expr
4 changes: 2 additions & 2 deletions dowhy/causal_estimators/regression_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ def fit(
self.reset_encoders() # Forget any existing encoders
self._set_effect_modifiers(data, effect_modifier_names)

self.logger.debug("Back-door variables used:" + ",".join(self._target_estimand.get_backdoor_variables()))
self._observed_common_causes_names = self._target_estimand.get_backdoor_variables()
self.logger.debug("Adjustment set variables used:" + ",".join(self._target_estimand.get_adjustment_set()))
self._observed_common_causes_names = self._target_estimand.get_adjustment_set()
if len(self._observed_common_causes_names) > 0:
self._observed_common_causes = data[self._observed_common_causes_names]
self._observed_common_causes = self._encode(self._observed_common_causes, "observed_common_causes")
Expand Down
19 changes: 17 additions & 2 deletions dowhy/causal_identifier/identified_estimand.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,24 @@ def get_instrumental_variables(self):
def get_general_adjustment_variables(self, key: Optional[str] = None):
"""Return a list containing general adjustment variables."""
if key is None:
return self.general_adjustment_variables[self.default_adjustment_set_id]
return self.general_adjustment_variables.get(self.default_adjustment_set_id, None)
else:
return self.general_adjustment_variables[key]
return self.general_adjustment_variables.get(key, None)

def set_general_adjustment_variables(self, variables_arr: List, key: Optional[str] = None):
if key is None:
key = self.identifier_method
self.general_adjustment_variables[key] = variables_arr

def get_adjustment_set(self, key: Optional[str] = None):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the core change. IdentifiedEstimand now has a generic get_adjustment_set method which routes to one of get_backdoor_variables or get_general_adjustment_variables.

The wrapper method get_adjustment_set should thus be called instead of get_adjustment_set or get_general_adjustment_variables throughout the codebase.

if self.identifier_method == "general_adjustment":
return self.get_general_adjustment_variables(key)
return self.get_backdoor_variables(key)

def set_adjustment_set(self, variables_arr: List, key: Optional[str] = None):
if self.identifier_method == "general_adjustment":
return self.set_general_adjustment_variables(variables_arr, key)
return self.set_backdoor_variables(variables_arr, key)

def __deepcopy__(self, memo):
return IdentifiedEstimand(
Expand Down
2 changes: 1 addition & 1 deletion dowhy/causal_refuter.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def __init__(self, data, identified_estimand, estimate, **kwargs):
# Concatenate the confounders, instruments and effect modifiers
try:
self._variables_of_interest = (
self._target_estimand.get_backdoor_variables()
self._target_estimand.get_adjustment_set()
+ self._target_estimand.instrumental_variables
+ self._estimate.estimator._effect_modifier_names
)
Expand Down
6 changes: 3 additions & 3 deletions dowhy/causal_refuters/add_unobserved_common_cause.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,9 @@ def preprocess_observed_common_causes(
no_common_causes_error_message: str,
):
"""
Preprocesses backdoor variables (observed common causes) and returns the pre-processed matrix.
Preprocesses adjustment variables (observed common causes) and returns the pre-processed matrix.

At least one backdoor (common cause) variable is required. Raises an exception if none present.
At least one covariate (common cause) variable is required. Raises an exception if none present.

Preprocessing has two steps:
1. Categorical encoding.
Expand All @@ -222,7 +222,7 @@ def preprocess_observed_common_causes(
"""

# 1. Categorical encoding of relevant variables
observed_common_causes_names = target_estimand.get_backdoor_variables()
observed_common_causes_names = target_estimand.get_adjustment_set()
if len(observed_common_causes_names) > 0:
# The encoded data is only used to calculate a parameter, so the encoder can be discarded.
observed_common_causes = data[observed_common_causes_names]
Expand Down
1 change: 1 addition & 0 deletions dowhy/causal_refuters/assess_overlap.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def __init__(self, *args, **kwargs):
"""
super().__init__(*args, **kwargs)
# TODO: Check that the target estimand has backdoor variables?
# TODO: Is this algorithm compatible with other adjustment criterions, besides backdoor?
self._backdoor_vars = self._target_estimand.get_backdoor_variables()
self._cat_feats = kwargs.pop("cat_feats", [])
self._support_config = kwargs.pop("support_config", None)
Expand Down
2 changes: 1 addition & 1 deletion dowhy/causal_refuters/bootstrap_refuter.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def refute_bootstrap(

chosen_variables = choose_variables(
required_variables,
target_estimand.get_backdoor_variables()
target_estimand.get_adjustment_set()
+ target_estimand.instrumental_variables
+ estimate.estimator._effect_modifier_names,
)
Expand Down
8 changes: 4 additions & 4 deletions dowhy/causal_refuters/dummy_outcome_refuter.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class DummyOutcomeRefuter(CausalRefuter):
then we can add an arbitrary function h(t) to the dummy outcome's
generation process and then the causal effect becomes h(t=1)-h(t=0).

Note that this general procedure only works for the backdoor criterion.
Note that this general procedure only works for covariate adjustment.

1. We find f(W) for a each value of treatment. That is, keeping the treatment
constant, we fit a predictor to estimate the effect of confounders W on
Expand Down Expand Up @@ -108,7 +108,7 @@ class DummyOutcomeRefuter(CausalRefuter):
* function argument: function ``pd.Dataframe -> np.ndarray``

It takes in a function that takes the input data frame as the input and outputs the outcome
variable. This allows us to create an output varable that only depends on the covariates and does not depend
variable. This allows us to create an output variable that only depends on the covariates and does not depend
on the treatment variable.

* string argument
Expand Down Expand Up @@ -271,7 +271,7 @@ def refute_dummy_outcome(
then we can add an arbitrary function h(t) to the dummy outcome's
generation process and then the causal effect becomes h(t=1)-h(t=0).

Note that this general procedure only works for the backdoor criterion.
Note that this general procedure only works for covariate adjustment.

1. We find f(W) for a each value of treatment. That is, keeping the treatment
constant, we fit a predictor to estimate the effect of confounders W on
Expand Down Expand Up @@ -438,7 +438,7 @@ def refute_dummy_outcome(
estimator_present = _has_estimator(transformation_list)
chosen_variables = choose_variables(
required_variables,
target_estimand.get_backdoor_variables()
target_estimand.get_adjustment_set()
+ target_estimand.instrumental_variables
+ estimate.estimator._effect_modifier_names,
)
Expand Down
10 changes: 5 additions & 5 deletions dowhy/causal_refuters/evalue_sensitivity_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,13 +251,13 @@ def benchmark(self, data: pd.DataFrame):
new_lo = []
new_hi = []
observed_covariate_e_values = []
backdoor_vars = self.estimand.get_backdoor_variables()
for drop_var in backdoor_vars:
covariates = self.estimand.get_adjustment_set()
for drop_var in covariates:

# new estimator
new_backdoor_vars = [var for var in backdoor_vars if var != drop_var]
new_covariate_vars = [var for var in covariates if var != drop_var]
new_estimand = copy.deepcopy(self.estimand)
new_estimand.set_backdoor_variables(new_backdoor_vars)
new_estimand.set_adjustment_set(new_covariate_vars)
new_estimator = self.estimate.estimator.get_new_estimator_object(new_estimand)
new_estimator.fit(
self.data,
Expand Down Expand Up @@ -296,7 +296,7 @@ def benchmark(self, data: pd.DataFrame):

self.benchmarking_results = pd.DataFrame(
{
"dropped_covariate": backdoor_vars,
"dropped_covariate": covariates,
"converted_est": new_ests,
"converted_lower_ci": new_lo,
"converted_upper_ci": new_hi,
Expand Down
4 changes: 2 additions & 2 deletions dowhy/causal_refuters/random_common_cause.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,10 @@ def refute_random_common_cause(
"""
logger.info("Refutation over {} simulated datasets, each with a random common cause added".format(num_simulations))

new_backdoor_variables = target_estimand.get_backdoor_variables() + ["w_random"]
new_adjustment_variables = target_estimand.get_adjustment_set() + ["w_random"]
identified_estimand = copy.deepcopy(target_estimand)
# Adding a new backdoor variable to the identified estimand
identified_estimand.set_backdoor_variables(new_backdoor_variables)
identified_estimand.set_adjustment_set(new_adjustment_variables)

if isinstance(random_state, int):
random_state = np.random.RandomState(seed=random_state)
Expand Down
Loading
Loading