Skip to content

API change for the SyntheticControl experiment class #460

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 31 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
7bbff4f
initial efforts
drbenvincent Apr 21, 2025
7ece785
remove print statement
drbenvincent Apr 21, 2025
98127fd
obs_indx -> obs_ind (see #459)
drbenvincent Apr 21, 2025
82d041d
update API in tests for SyntheticControl class
drbenvincent Apr 21, 2025
3bbabee
Merge branch 'main' into sc-api-change
drbenvincent Apr 21, 2025
1f4d17e
tidy up + fixes
drbenvincent Apr 21, 2025
182aac0
get deprecation tests working again
drbenvincent Apr 21, 2025
876c154
bug fixes
drbenvincent Apr 22, 2025
3d29fef
fix bug with SyntheticControl.get_plot_data_bayesian
drbenvincent Apr 22, 2025
3eb24a9
use new API in scikit-learn integration test
drbenvincent Apr 22, 2025
bd9beaa
update the pymc synthetic control notebooks
drbenvincent Apr 22, 2025
ed62f0a
remove test_api_stability
drbenvincent Apr 22, 2025
a148ec3
fix bugs
drbenvincent Apr 22, 2025
15454b2
bug fixing
drbenvincent Apr 22, 2025
b77c3f0
remove api backward compatibility and deprecation tests
drbenvincent Apr 22, 2025
c89a147
more deprecation removal
drbenvincent Apr 22, 2025
fad78d8
add additional asserts to integration tests to detect shape problems
drbenvincent Apr 22, 2025
2137091
remove asserts which weren't doing the job I intended
drbenvincent Apr 22, 2025
45f1b1a
start embracing xarray to handle broadcasting
drbenvincent Apr 22, 2025
2726484
formatting
drbenvincent Apr 22, 2025
b920207
store data in xarray objects in more experiments
drbenvincent Apr 22, 2025
a28c5da
attempt to make LinearRegression doctest pass
drbenvincent Apr 22, 2025
642f651
revert a change which seems no longer required
drbenvincent Apr 22, 2025
a17f5c9
fix some failing tests
drbenvincent Apr 23, 2025
9941c02
all tests now passing 🎉 (one failing doctest)
drbenvincent Apr 23, 2025
6db026e
update api calls in multi cell geolift notebook
drbenvincent Apr 23, 2025
b49ed7e
undo plot colour change that I made when debugging
drbenvincent Apr 23, 2025
1f753e9
final doctest now passes 😍
drbenvincent Apr 23, 2025
d855557
update notebook on scikit-learn synthetic control example
drbenvincent May 7, 2025
c92c117
rerun synthetic control notebook
drbenvincent May 7, 2025
2fcb3ba
fix minor issue in multi-cell geolift notebook
drbenvincent May 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions causalpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@
# limitations under the License.
import arviz as az

import causalpy.pymc_experiments as pymc_experiments # to be deprecated
import causalpy.pymc_models as pymc_models
import causalpy.skl_experiments as skl_experiments # to be deprecated
import causalpy.skl_models as skl_models
from causalpy.skl_models import create_causalpy_compatible_class
from causalpy.version import __version__
Expand All @@ -41,11 +39,9 @@
"InversePropensityWeighting",
"load_data",
"PrePostNEGD",
"pymc_experiments", # to be deprecated
"pymc_models",
"RegressionDiscontinuity",
"RegressionKink",
"skl_experiments", # to be deprecated
"skl_models",
"SyntheticControl",
]
25 changes: 22 additions & 3 deletions causalpy/experiments/diff_in_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import numpy as np
import pandas as pd
import seaborn as sns
import xarray as xr
from matplotlib import pyplot as plt
from patsy import build_design_matrices, dmatrices
from sklearn.base import RegressorMixin
Expand Down Expand Up @@ -87,7 +88,8 @@ def __init__(
**kwargs,
) -> None:
super().__init__(model=model)

# rename the index to "obs_ind"
data.index.name = "obs_ind"
self.data = data
self.expt_type = "Difference in Differences"
self.formula = formula
Expand All @@ -102,6 +104,21 @@ def __init__(
self.y, self.X = np.asarray(y), np.asarray(X)
self.outcome_variable_name = y.design_info.column_names[0]

# turn into xarray.DataArray's
self.X = xr.DataArray(
self.X,
dims=["obs_ind", "coeffs"],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dims of X as coeffs? Should surely be covariates... coeffs is the output, and while they should be 1:1 naming is a little misleading no?

coords={
"obs_ind": np.arange(self.X.shape[0]),
"coeffs": self.labels,
},
)
self.y = xr.DataArray(
self.y[:, 0],
dims=["obs_ind"],
coords={"obs_ind": np.arange(self.y.shape[0])},
)

# fit model
if isinstance(self.model, PyMCModel):
COORDS = {"coeffs": self.labels, "obs_ind": np.arange(self.X.shape[0])}
Expand Down Expand Up @@ -183,13 +200,15 @@ def __init__(
)
elif isinstance(self.model, RegressorMixin):
# This is the coefficient on the interaction term
# TODO: THIS IS NOT YET CORRECT ?????
# TODO: CHECK FOR CORRECTNESS
self.causal_impact = (
self.y_pred_treatment[1] - self.y_pred_counterfactual[0]
)[0]
)
else:
raise ValueError("Model type not recognized")

return

def input_validation(self):
"""Validate the input data and model formula for correctness"""
if "post_treatment" not in self.formula:
Expand Down
36 changes: 32 additions & 4 deletions causalpy/experiments/interrupted_time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import arviz as az
import numpy as np
import pandas as pd
import xarray as xr
from matplotlib import pyplot as plt
from patsy import build_design_matrices, dmatrices
from sklearn.base import RegressorMixin
Expand Down Expand Up @@ -84,6 +85,8 @@ def __init__(
**kwargs,
) -> None:
super().__init__(model=model)
# rename the index to "obs_ind"
data.index.name = "obs_ind"
self.input_validation(data, treatment_time)
self.treatment_time = treatment_time
# set experiment type - usually done in subclasses
Expand All @@ -107,6 +110,33 @@ def __init__(
)
self.post_X = np.asarray(new_x)
self.post_y = np.asarray(new_y)
# turn into xarray.DataArray's
self.pre_X = xr.DataArray(
self.pre_X,
dims=["obs_ind", "coeffs"],
coords={
"obs_ind": self.datapre.index,
"coeffs": self.labels,
},
)
self.pre_y = xr.DataArray(
self.pre_y[:, 0],
dims=["obs_ind"],
coords={"obs_ind": self.datapre.index},
)
self.post_X = xr.DataArray(
self.post_X,
dims=["obs_ind", "coeffs"],
coords={
"obs_ind": self.datapost.index,
"coeffs": self.labels,
},
)
self.post_y = xr.DataArray(
self.post_y[:, 0],
dims=["obs_ind"],
coords={"obs_ind": self.datapost.index},
)

# fit the model to the observed (pre-intervention) data
if isinstance(self.model, PyMCModel):
Expand All @@ -125,10 +155,8 @@ def __init__(

# calculate the counterfactual
self.post_pred = self.model.predict(X=self.post_X)
self.pre_impact = self.model.calculate_impact(self.pre_y[:, 0], self.pre_pred)
self.post_impact = self.model.calculate_impact(
self.post_y[:, 0], self.post_pred
)
self.pre_impact = self.model.calculate_impact(self.pre_y, self.pre_pred)
self.post_impact = self.model.calculate_impact(self.post_y, self.post_pred)
self.post_impact_cumulative = self.model.calculate_cumulative_impact(
self.post_impact
)
Expand Down
16 changes: 16 additions & 0 deletions causalpy/experiments/prepostnegd.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import numpy as np
import pandas as pd
import seaborn as sns
import xarray as xr
from matplotlib import pyplot as plt
from patsy import build_design_matrices, dmatrices
from sklearn.base import RegressorMixin
Expand Down Expand Up @@ -111,6 +112,21 @@ def __init__(
self.y, self.X = np.asarray(y), np.asarray(X)
self.outcome_variable_name = y.design_info.column_names[0]

# turn into xarray.DataArray's
self.X = xr.DataArray(
self.X,
dims=["obs_ind", "coeffs"],
coords={
"obs_ind": np.arange(self.X.shape[0]),
"coeffs": self.labels,
},
)
self.y = xr.DataArray(
self.y[:, 0],
dims=["obs_ind"],
coords={"obs_ind": self.data.index},
)

# fit the model to the observed (pre-intervention) data
if isinstance(self.model, PyMCModel):
COORDS = {"coeffs": self.labels, "obs_ind": np.arange(self.X.shape[0])}
Expand Down
17 changes: 16 additions & 1 deletion causalpy/experiments/regression_discontinuity.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from matplotlib import pyplot as plt
from patsy import build_design_matrices, dmatrices
from sklearn.base import RegressorMixin

import xarray as xr
from causalpy.custom_exceptions import (
DataException,
FormulaException,
Expand Down Expand Up @@ -121,6 +121,21 @@ def __init__(
self.y, self.X = np.asarray(y), np.asarray(X)
self.outcome_variable_name = y.design_info.column_names[0]

# turn into xarray.DataArray's
self.X = xr.DataArray(
self.X,
dims=["obs_ind", "coeffs"],
coords={
"obs_ind": np.arange(self.X.shape[0]),
"coeffs": self.labels,
},
)
self.y = xr.DataArray(
self.y[:, 0],
dims=["obs_ind"],
coords={"obs_ind": np.arange(self.y.shape[0])},
)

# fit model
if isinstance(self.model, PyMCModel):
# fit the model to the observed (pre-intervention) data
Expand Down
17 changes: 16 additions & 1 deletion causalpy/experiments/regression_kink.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import pandas as pd
import seaborn as sns
from patsy import build_design_matrices, dmatrices

import xarray as xr
from causalpy.plot_utils import plot_xY

from .base import BaseExperiment
Expand Down Expand Up @@ -84,6 +84,21 @@ def __init__(
self.y, self.X = np.asarray(y), np.asarray(X)
self.outcome_variable_name = y.design_info.column_names[0]

# turn into xarray.DataArray's
self.X = xr.DataArray(
self.X,
dims=["obs_ind", "coeffs"],
coords={
"obs_ind": np.arange(self.X.shape[0]),
"coeffs": self.labels,
},
)
self.y = xr.DataArray(
self.y[:, 0],
dims=["obs_ind"],
coords={"obs_ind": np.arange(self.y.shape[0])},
)

COORDS = {"coeffs": self.labels, "obs_ind": np.arange(self.X.shape[0])}
self.model.fit(X=self.X, y=self.y, coords=COORDS)

Expand Down
Loading