Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support PyMC 5.13 and fix bayeux related issues #803

Merged
merged 4 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

* Fix bug in predictions with models using HSGP (#780)
* Fix `get_model_covariates()` utility function (#801)
* Upgrade PyMC dependency to >= 5.13 (#803)
* Use `pm.compute_deterministics()` to compute deterministics when bayeux based samplers are used (#803)

### Documentation

Expand Down
40 changes: 25 additions & 15 deletions bambi/backend/pymc.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,45 +253,55 @@ def _run_mcmc(
return idata

def _clean_results(self, idata, omit_offsets, include_mean, idata_from):
for group in idata.groups():
# Before doing anything, make sure we compute deterministics.
if idata_from == "bayeux":
idata.posterior = pm.compute_deterministics(
idata.posterior, model=self.model, merge_dataset=True, progressbar=False
)

for group in idata.groups():
getattr(idata, group).attrs["modeling_interface"] = "bambi"
getattr(idata, group).attrs["modeling_interface_version"] = __version__

if omit_offsets:
offset_vars = [var for var in idata.posterior.data_vars if var.endswith("_offset")]
idata.posterior = idata.posterior.drop_vars(offset_vars)

# Drop variables and dimensions associated with LKJ prior
vars_to_drop = [var for var in idata.posterior.data_vars if var.startswith("_LKJ")]
dims_to_drop = [dim for dim in idata.posterior.dims if dim.startswith("_LKJ")]
# NOTE:
# This has not had an effect for a while since we haven't been supporting LKJ prior lately.

idata.posterior = idata.posterior.drop_vars(vars_to_drop)
idata.posterior = idata.posterior.drop_dims(dims_to_drop)
# Drop variables and dimensions associated with LKJ prior
# vars_to_drop = [var for var in idata.posterior.data_vars if var.startswith("_LKJ")]
# dims_to_drop = [dim for dim in idata.posterior.dims if dim.startswith("_LKJ")]
# idata.posterior = idata.posterior.drop_vars(vars_to_drop)
# idata.posterior = idata.posterior.drop_dims(dims_to_drop)

dims_original = list(self.model.coords)

# Identify bayeux idata and rename dims and coordinates to match PyMC model
if idata_from == "bayeux":
pymc_model_dims = [dim for dim in dims_original if "_obs" not in dim]
bayeux_dims = [
dim for dim in idata.posterior.dims if not dim.startswith(("chain", "draw"))
]
cleaned_dims = dict(zip(bayeux_dims, pymc_model_dims))
cleaned_dims = {
f"{dim}_0": dim
for dim in dims_original
if not dim.endswith("_obs") and f"{dim}_0" in idata.posterior.dims
}
idata = idata.rename(cleaned_dims)

# Discard dims that are in the model but unused in the posterior
# Don't select dims that are in the model but unused in the posterior
dims_original = [dim for dim in dims_original if dim in idata.posterior.dims]

# This does not add any new coordinate, it just changes the order so the ones
# ending in "__factor_dim" are placed after the others.
dims_group = [c for c in dims_original if c.endswith("__factor_dim")]
dims_group = [dim for dim in dims_original if dim.endswith("__factor_dim")]

# Keep the original order in dims_original
dims_original_set = set(dims_original) - set(dims_group)
dims_original = [c for c in dims_original if c in dims_original_set]
dims_original = [dim for dim in dims_original if dim in dims_original_set]
dims_new = ["chain", "draw"] + dims_original + dims_group
idata.posterior = idata.posterior.transpose(*dims_new)

# Drop unused dimensions before transposing
dims_to_drop = [dim for dim in idata.posterior.dims if dim not in dims_new]
idata.posterior = idata.posterior.drop_dims(dims_to_drop).transpose(*dims_new)

# Compute the actual intercept in all distributional components that have an intercept
for pymc_component in self.distributional_components.values():
Expand Down
6 changes: 5 additions & 1 deletion bambi/backend/terms.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,13 @@ def build(self, spec):
response_dims = list(spec.response_component.response_term.coords)

dims = list(self.coords) + response_dims
coef = self.build_distribution(self.term.prior, label, dims=dims, **kwargs)

# Squeeze ensures we don't have a shape of (n, 1) when we mean (n, )
# This happens with categorical predictors with two levels and intercept.
coef = self.build_distribution(self.term.prior, label, dims=dims, **kwargs).squeeze()
# See https://github.com/pymc-devs/pymc/issues/7246
if len(coef.shape.eval()) == 2 and coef.shape.eval()[-1] == 1:
coef = pt.specify_broadcastable(coef, 1).squeeze()
coef = coef[self.term.group_index]

return coef, predictor
Expand Down
2 changes: 1 addition & 1 deletion bambi/families/univariate.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ def transform_backend_eta(eta, kwargs):
def transform_backend_kwargs(kwargs):
# P(Y = k) = F(threshold_k - eta) * \prod_{j=1}^{k-1}{1 - F(threshold_j - eta)}
p = kwargs.pop("p")
n_columns = p.type.shape[-1]
n_columns = p.shape.eval()[-1]
p = pt.concatenate(
[
pt.shape_padright(p[..., 0]),
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies = [
"formulae>=0.5.3",
"graphviz",
"pandas>=1.0.0",
"pymc>=5.12.0,<5.13.0",
"pymc>=5.13.0",
]

[project.optional-dependencies]
Expand Down