Skip to content

Commit

Permalink
Support PyMC 5.13 and fix bayeux related issues (bambinos#803)
Browse files Browse the repository at this point in the history
  • Loading branch information
tomicapretto authored and GStechschulte committed Apr 14, 2024
1 parent dd43450 commit 5610dbb
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 18 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

* Fix bug in predictions with models using HSGP (#780)
* Fix `get_model_covariates()` utility function (#801)
* Upgrade PyMC dependency to >= 5.13 (#803)
* Use `pm.compute_deterministics()` to compute deterministics when bayeux based samplers are used (#803)

### Documentation

Expand Down
40 changes: 25 additions & 15 deletions bambi/backend/pymc.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,45 +253,55 @@ def _run_mcmc(
return idata

def _clean_results(self, idata, omit_offsets, include_mean, idata_from):
for group in idata.groups():
# Before doing anything, make sure we compute deterministics.
if idata_from == "bayeux":
idata.posterior = pm.compute_deterministics(
idata.posterior, model=self.model, merge_dataset=True, progressbar=False
)

for group in idata.groups():
getattr(idata, group).attrs["modeling_interface"] = "bambi"
getattr(idata, group).attrs["modeling_interface_version"] = __version__

if omit_offsets:
offset_vars = [var for var in idata.posterior.data_vars if var.endswith("_offset")]
idata.posterior = idata.posterior.drop_vars(offset_vars)

# Drop variables and dimensions associated with LKJ prior
vars_to_drop = [var for var in idata.posterior.data_vars if var.startswith("_LKJ")]
dims_to_drop = [dim for dim in idata.posterior.dims if dim.startswith("_LKJ")]
# NOTE:
# This has not had an effect for a while since we haven't been supporting LKJ prior lately.

idata.posterior = idata.posterior.drop_vars(vars_to_drop)
idata.posterior = idata.posterior.drop_dims(dims_to_drop)
# Drop variables and dimensions associated with LKJ prior
# vars_to_drop = [var for var in idata.posterior.data_vars if var.startswith("_LKJ")]
# dims_to_drop = [dim for dim in idata.posterior.dims if dim.startswith("_LKJ")]
# idata.posterior = idata.posterior.drop_vars(vars_to_drop)
# idata.posterior = idata.posterior.drop_dims(dims_to_drop)

dims_original = list(self.model.coords)

# Identify bayeux idata and rename dims and coordinates to match PyMC model
if idata_from == "bayeux":
pymc_model_dims = [dim for dim in dims_original if "_obs" not in dim]
bayeux_dims = [
dim for dim in idata.posterior.dims if not dim.startswith(("chain", "draw"))
]
cleaned_dims = dict(zip(bayeux_dims, pymc_model_dims))
cleaned_dims = {
f"{dim}_0": dim
for dim in dims_original
if not dim.endswith("_obs") and f"{dim}_0" in idata.posterior.dims
}
idata = idata.rename(cleaned_dims)

# Discard dims that are in the model but unused in the posterior
# Don't select dims that are in the model but unused in the posterior
dims_original = [dim for dim in dims_original if dim in idata.posterior.dims]

# This does not add any new coordinate, it just changes the order so the ones
# ending in "__factor_dim" are placed after the others.
dims_group = [c for c in dims_original if c.endswith("__factor_dim")]
dims_group = [dim for dim in dims_original if dim.endswith("__factor_dim")]

# Keep the original order in dims_original
dims_original_set = set(dims_original) - set(dims_group)
dims_original = [c for c in dims_original if c in dims_original_set]
dims_original = [dim for dim in dims_original if dim in dims_original_set]
dims_new = ["chain", "draw"] + dims_original + dims_group
idata.posterior = idata.posterior.transpose(*dims_new)

# Drop unused dimensions before transposing
dims_to_drop = [dim for dim in idata.posterior.dims if dim not in dims_new]
idata.posterior = idata.posterior.drop_dims(dims_to_drop).transpose(*dims_new)

# Compute the actual intercept in all distributional components that have an intercept
for pymc_component in self.distributional_components.values():
Expand Down
6 changes: 5 additions & 1 deletion bambi/backend/terms.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,13 @@ def build(self, spec):
response_dims = list(spec.response_component.response_term.coords)

dims = list(self.coords) + response_dims
coef = self.build_distribution(self.term.prior, label, dims=dims, **kwargs)

# Squeeze ensures we don't have a shape of (n, 1) when we mean (n, )
# This happens with categorical predictors with two levels and intercept.
coef = self.build_distribution(self.term.prior, label, dims=dims, **kwargs).squeeze()
# See https://github.com/pymc-devs/pymc/issues/7246
if len(coef.shape.eval()) == 2 and coef.shape.eval()[-1] == 1:
coef = pt.specify_broadcastable(coef, 1).squeeze()
coef = coef[self.term.group_index]

return coef, predictor
Expand Down
2 changes: 1 addition & 1 deletion bambi/families/univariate.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ def transform_backend_eta(eta, kwargs):
def transform_backend_kwargs(kwargs):
# P(Y = k) = F(threshold_k - eta) * \prod_{j=1}^{k-1}{1 - F(threshold_j - eta)}
p = kwargs.pop("p")
n_columns = p.type.shape[-1]
n_columns = p.shape.eval()[-1]
p = pt.concatenate(
[
pt.shape_padright(p[..., 0]),
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies = [
"formulae>=0.5.3",
"graphviz",
"pandas>=1.0.0",
"pymc>=5.12.0,<5.13.0",
"pymc>=5.13.0",
]

[project.optional-dependencies]
Expand Down

0 comments on commit 5610dbb

Please sign in to comment.