From 6fec04ff3614b017cf9f23895a580b3220e73965 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1s=20Capretto?= <tomicapretto@gmail.com>
Date: Fri, 13 Jan 2023 17:57:19 -0300
Subject: [PATCH] Add default handler for posterior predictive distribution 
 (#625)

* Ideas on how to generalize pps

* Make posterior predictive more general

* Remove commented code

* Make predict work with multivariate families

* Add docstrings

* update changelog

* Make posterior predictive more robust. Make categorical a univariate family. Remove cylic import issues

* For some things, Categorical family must be treated as multivariate
---
 Changelog.md                      |   1 +
 bambi/backend/model_components.py |   3 +-
 bambi/backend/terms.py            |   7 +-
 bambi/defaults/families.py        |   3 +-
 bambi/families/family.py          | 129 +++++++++++++++++++++++++-
 bambi/families/multivariate.py    | 100 +--------------------
 bambi/families/univariate.py      | 144 ++++++++++++------------------
 bambi/model_components.py         |   2 +-
 tests/test_predict.py             |  12 ++-
 9 files changed, 210 insertions(+), 191 deletions(-)

diff --git a/Changelog.md b/Changelog.md
index 08f4367d1..60f0e41e3 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -5,6 +5,7 @@
 ### New features
 
 * Refactored the codebase to support distributional models (#607)
+* Added a default method to handle posterior predictive sampling for custom families (#625)
 
 ### Maintenance and fixes
 
diff --git a/bambi/backend/model_components.py b/bambi/backend/model_components.py
index ac22a9f61..6fe2f8568 100644
--- a/bambi/backend/model_components.py
+++ b/bambi/backend/model_components.py
@@ -5,6 +5,7 @@
 from bambi.backend.terms import CommonTerm, GroupSpecificTerm, InterceptTerm, ResponseTerm
 from bambi.backend.utils import get_distribution
 from bambi.families.multivariate import MultivariateFamily
+from bambi.families.univariate import Categorical
 from bambi.utils import get_aliased_name
 
 
@@ -118,7 +119,7 @@ def build_group_specific_terms(self, pymc_backend, bmb_model):
             if predictor.ndim > 1:
                 for col in range(predictor.shape[1]):
                     self.output += coef[:, col] * predictor[:, col]
-            elif isinstance(bmb_model.family, MultivariateFamily):
+            elif isinstance(bmb_model.family, (MultivariateFamily, Categorical)):
                 self.output += coef * predictor[:, np.newaxis]
             else:
                 self.output += coef * predictor
diff --git a/bambi/backend/terms.py b/bambi/backend/terms.py
index 874ba3bc2..07c5b2ba5 100644
--- a/bambi/backend/terms.py
+++ b/bambi/backend/terms.py
@@ -5,6 +5,7 @@
 
 from bambi.backend.utils import has_hyperprior, get_distribution
 from bambi.families.multivariate import MultivariateFamily
+from bambi.families.univariate import Categorical
 from bambi.priors import Prior
 from bambi.utils import get_aliased_name
 
@@ -36,7 +37,7 @@ def build(self, spec):
 
         # Dims of the response variable
         response_dims = []
-        if isinstance(spec.family, MultivariateFamily):
+        if isinstance(spec.family, (MultivariateFamily, Categorical)):
             response_dims = list(spec.response_component.response_term.coords)
             response_dims_n = len(spec.response_component.response_term.coords[response_dims[0]])
             # Arguments may be of shape (a,) but we need them to be of shape (a, b)
@@ -99,7 +100,7 @@ def build(self, spec):
 
         # Dims of the response variable (e.g. categorical)
         response_dims = []
-        if isinstance(spec.family, MultivariateFamily):
+        if isinstance(spec.family, (MultivariateFamily, Categorical)):
             response_dims = list(spec.response_component.response_term.coords)
 
         dims = list(self.coords) + response_dims
@@ -172,7 +173,7 @@ def build(self, spec):
         dist = get_distribution(self.term.prior.name)
         label = self.name
         # Prepends one dimension if response is multivariate
-        if isinstance(spec.family, MultivariateFamily):
+        if isinstance(spec.family, (MultivariateFamily, Categorical)):
             dims = list(spec.response_component.response_term.coords)
             dist = dist(label, dims=dims, **self.term.prior.args)[np.newaxis, :]
         else:
diff --git a/bambi/defaults/families.py b/bambi/defaults/families.py
index a259603a8..567611ddb 100644
--- a/bambi/defaults/families.py
+++ b/bambi/defaults/families.py
@@ -4,6 +4,7 @@
     Bernoulli,
     Beta,
     Binomial,
+    Categorical,
     Gamma,
     Gaussian,
     NegativeBinomial,
@@ -13,7 +14,7 @@
     VonMises,
     Wald,
 )
-from bambi.families.multivariate import Categorical, Multinomial
+from bambi.families.multivariate import Multinomial
 
 
 # fmt: off
diff --git a/bambi/families/family.py b/bambi/families/family.py
index b3d5ab5c8..f3b8cec26 100644
--- a/bambi/families/family.py
+++ b/bambi/families/family.py
@@ -1,7 +1,11 @@
 from typing import Dict, Union
 
+import numpy as np
+import pymc as pm
+import xarray as xr
+
 from bambi.families.link import Link
-from bambi.utils import get_auxiliary_parameters
+from bambi.utils import get_auxiliary_parameters, get_aliased_name
 
 
 class Family:
@@ -101,9 +105,132 @@ def set_default_priors(self, priors):
         priors = {k: v for k, v in priors.items() if k in auxiliary_parameters}
         self.default_priors.update(priors)
 
+    def posterior_predictive(self, model, posterior, **kwargs):  # pylint: disable = unused-argument
+        """Get draws from the posterior predictive distribution
+
+        This function works for almost all the families. It grabs the draws for the parameters
+        needed in the response distribution, and then gets samples from the posterior predictive
+        distribution using `pm.draw()`. It won't work when the response distribution requires
+        parameters that are not available in `posterior`.
+
+        Parameters
+        ----------
+        model : bambi.Model
+            The model
+        posterior : xr.Dataset
+            The xarray dataset that contains the draws for all the parameters in the posterior.
+            It must contain the parameters that are needed in the distribution of the response, or
+            the parameters that allow to derive them.
+
+        Returns
+        -------
+        xr.DataArray
+            A data array with the draws from the posterior predictive distribution
+        """
+        response_dist = get_response_dist(model.family)
+        params = model.family.likelihood.params
+        response_aliased_name = get_aliased_name(model.response_component.response_term)
+
+        kwargs = {}
+        output_dataset_list = []
+
+        # In the posterior xr.Dataset we need to consider aliases.
+        # But we don't use aliases when passing kwargs to the PyMC distribution
+        for param in params:
+            # Extract posterior draws for the parent parameter
+            if param == model.family.likelihood.parent:
+                component = model.components[model.response_name]
+                var_name = f"{response_aliased_name}_mean"
+                kwargs[param] = posterior[var_name].to_numpy()
+                output_dataset_list.append(posterior[var_name])
+            else:
+                # Extract posterior draws for non-parent parameters
+                component = model.components[param]
+                component_aliased_name = component.alias if component.alias else param
+                var_name = f"{response_aliased_name}_{component_aliased_name}"
+                if var_name in posterior:
+                    kwargs[param] = posterior[var_name].to_numpy()
+                    output_dataset_list.append(posterior[var_name])
+                elif hasattr(component, "prior") and isinstance(component.prior, (int, float)):
+                    kwargs[param] = np.asarray(component.prior)
+
+        # Determine the array with largest number of dimensions
+        ndims_max = max(x.ndim for x in kwargs.values())
+
+        # Append a dimension when needed. Required to make `pm.draw()` work.
+        for key, values in kwargs.items():
+            kwargs[key] = expand_array(values, ndims_max)
+
+        # NOTE: Wouldn't it be better to always use parametrizations compatible with PyMC?
+        # The current approach allows more flexibility, but it's more painful.
+        if hasattr(model.family, "transform_backend_kwargs"):
+            kwargs = model.family.transform_backend_kwargs(kwargs)
+
+        output_array = pm.draw(response_dist.dist(**kwargs))
+        output_coords_all = xr.merge(output_dataset_list).coords
+
+        if hasattr(model.family, "KIND") and model.family.KIND == "Multivariate":
+            coord_names = (
+                "chain",
+                "draw",
+                response_aliased_name + "_obs",
+                response_aliased_name + "_mean_dim",
+            )
+        else:  # Assume it's univariate family
+            coord_names = ("chain", "draw", response_aliased_name + "_obs")
+
+        output_coords = {}
+        for coord_name in coord_names:
+            output_coords[coord_name] = output_coords_all[coord_name]
+        return xr.DataArray(output_array, coords=output_coords)
+
     def __str__(self):
         msg_list = [f"Family: {self.name}", f"Likelihood: {self.likelihood}", f"Link: {self.link}"]
         return "\n".join(msg_list)
 
     def __repr__(self):
         return self.__str__()
+
+
+def get_response_dist(family):
+    """Get the PyMC distribution for the response
+
+    Parameters
+    ----------
+    family : bambi.Family
+        The family for which the response distribution is wanted
+
+    Returns
+    -------
+    pm.Distribution
+        The response distribution
+    """
+    if family.likelihood.dist:
+        dist = family.likelihood.dist
+    else:
+        dist = getattr(pm, family.likelihood.name)
+    return dist
+
+
+def expand_array(x, ndim):
+    """Add dimensions to an array to match the number of desired dimensions
+
+    If x.ndim < ndim, it adds ndim - x.ndim dimensions after the last axis. If not, it is left
+    untouched.
+
+    Parameters
+    ----------
+    x : np.ndarray
+        The array
+    ndim : int
+        The number of desired dimensions
+
+    Returns
+    -------
+    np.ndarray
+        The array with the expanded dimensions
+    """
+    if x.ndim == ndim:
+        return x
+    dims_to_expand = tuple(range(ndim - 1, x.ndim - 1, -1))
+    return np.expand_dims(x, dims_to_expand)
diff --git a/bambi/families/multivariate.py b/bambi/families/multivariate.py
index 17f2dc5d6..d8ac71364 100644
--- a/bambi/families/multivariate.py
+++ b/bambi/families/multivariate.py
@@ -1,94 +1,14 @@
 # pylint: disable=unused-argument
-import pytensor.tensor as pt
 import numpy as np
 import xarray as xr
+import pytensor.tensor as pt
 
 from bambi.families.family import Family
 from bambi.utils import extract_argument_names, extra_namespace, get_aliased_name
 
 
 class MultivariateFamily(Family):
-    def posterior_predictive(self, model, posterior):
-        raise NotImplementedError
-
-
-class Categorical(MultivariateFamily):
-    SUPPORTED_LINKS = {"p": ["softmax"]}
-    UFUNC_KWARGS = {"axis": -1}
-
-    def transform_linear_predictor(self, model, linear_predictor):
-        response_name = get_aliased_name(model.response_component.response_term)
-        response_levels_dim = response_name + "_dim"
-        linear_predictor = linear_predictor.pad({response_levels_dim: (1, 0)}, constant_values=0)
-        return linear_predictor
-
-    def transform_coords(self, model, mean):
-        # The mean has the reference level in the dimension, a new name is needed
-        response_name = get_aliased_name(model.response_component.response_term)
-        response_levels_dim = response_name + "_dim"
-        response_levels_dim_complete = response_name + "_mean_dim"
-        levels_complete = model.response_component.response_term.levels
-        mean = mean.rename({response_levels_dim: response_levels_dim_complete})
-        mean = mean.assign_coords({response_levels_dim_complete: levels_complete})
-        return mean
-
-    # NOTE: Check posterior predictive
-    def posterior_predictive(self, model, posterior, **kwargs):
-        def draw_categorical_samples(probability_matrix, items):
-            # https://stackoverflow.com/questions/34187130
-            # probability_matrix is a matrix of shape (n_chain * n_draw, n_levels)
-            cumsum = probability_matrix.cumsum(axis=1)
-            idx = np.random.rand(probability_matrix.shape[0])[:, np.newaxis]
-            idx = (cumsum < idx).sum(axis=1)
-            return items[idx]
-
-        response_name = get_aliased_name(model.response_component.response_term)
-        response_dim = response_name + "_obs"
-        response_levels = np.arange(len(model.response_component.response_term.levels))
-        mean = posterior[response_name + "_mean"]
-
-        mean = mean.to_numpy()
-        shape = mean.shape
-
-        # Stack chains and draws
-        mean = mean.reshape((mean.shape[0] * mean.shape[1], mean.shape[2], mean.shape[3]))
-        draws_n = mean.shape[0]
-        obs_n = mean.shape[1]
-
-        pps = np.empty((draws_n, obs_n), dtype=int)
-        for idx in range(obs_n):
-            pps[:, idx] = draw_categorical_samples(mean[:, idx, :], response_levels)
-
-        pps = pps.reshape((shape[0], shape[1], obs_n))
-        pps = xr.DataArray(
-            pps,
-            coords={
-                "chain": np.arange(shape[0]),
-                "draw": np.arange(shape[1]),
-                response_dim: np.arange(obs_n),
-            },
-        )
-        return pps
-
-    def get_data(self, response):
-        return np.nonzero(response.term.data)[1]
-
-    def get_coords(self, response):
-        name = response.name + "_dim"
-        return {name: [level for level in response.levels if level != response.reference]}
-
-    def get_reference(self, response):
-        return get_reference_level(response.term)
-
-    @staticmethod
-    def transform_backend_nu(nu, data):
-        # Add column of zeros to the linear predictor for the reference level (the first one)
-        shape = (data.shape[0], 1)
-
-        # The first line makes sure the intercept-only models work
-        nu = np.ones(shape) * nu  # (response_levels, ) -> (n, response_levels)
-        nu = pt.concatenate([np.zeros(shape), nu], axis=1)
-        return nu
+    KIND = "Multivariate"
 
 
 class Multinomial(MultivariateFamily):
@@ -176,19 +96,3 @@ def transform_backend_nu(nu, data):
         nu = np.ones(shape) * nu  # (response_levels, ) -> (n, response_levels)
         nu = pt.concatenate([np.zeros(shape), nu], axis=1)
         return nu
-
-
-# pylint: disable = protected-access
-def get_reference_level(term):
-    if term.kind != "categoric":
-        return None
-
-    if term.levels is None:
-        return None
-
-    levels = term.levels
-    intermediate_data = term.components[0]._intermediate_data
-    if hasattr(intermediate_data, "_contrast"):
-        return intermediate_data._contrast.reference
-
-    return levels[0]
diff --git a/bambi/families/univariate.py b/bambi/families/univariate.py
index 21bf1c70a..f177cc477 100644
--- a/bambi/families/univariate.py
+++ b/bambi/families/univariate.py
@@ -1,14 +1,13 @@
 import numpy as np
 import xarray as xr
-from scipy import stats
+import pytensor.tensor as pt
 
 from bambi.families.family import Family
 from bambi.utils import get_aliased_name
 
 
 class UnivariateFamily(Family):
-    def posterior_predictive(self, model, posterior, **kwargs):
-        raise NotImplementedError
+    KIND = "Univariate"
 
 
 class AsymmetricLaplace(UnivariateFamily):
@@ -19,24 +18,10 @@ class AsymmetricLaplace(UnivariateFamily):
         "q": ["logit", "probit", "cloglog"],
     }
 
-    def posterior_predictive(self, model, posterior, **kwargs):
-        """Sample from posterior predictive distribution"""
-        response_name = get_aliased_name(model.response_component.response_term)
-        mean = posterior[response_name + "_mean"]
-        b = posterior[response_name + "_b"]
-        kappa = posterior[response_name + "_kappa"]
-        return xr.apply_ufunc(stats.laplace_asymmetric.rvs, kappa, mean, b)
-
 
 class Bernoulli(UnivariateFamily):
     SUPPORTED_LINKS = {"p": ["identity", "logit", "probit", "cloglog"]}
 
-    def posterior_predictive(self, model, posterior, **kwargs):
-        """Sample from posterior predictive distribution"""
-        response_name = get_aliased_name(model.response_component.response_term)
-        mean = posterior[response_name + "_mean"]
-        return xr.apply_ufunc(np.random.binomial, 1, mean)
-
     def get_data(self, response):
         if response.term.data.ndim == 1:
             return response.term.data
@@ -52,14 +37,6 @@ def get_success_level(self, response):
 class Beta(UnivariateFamily):
     SUPPORTED_LINKS = {"mu": ["logit", "probit", "cloglog"], "kappa": ["log"]}
 
-    def posterior_predictive(self, model, posterior, **kwargs):
-        response_name = get_aliased_name(model.response_component.response_term)
-        mean = posterior[response_name + "_mean"]
-        kappa = posterior[response_name + "_kappa"]
-        alpha = mean * kappa
-        beta = (1 - mean) * kappa
-        return xr.apply_ufunc(np.random.beta, alpha, beta)
-
     @staticmethod
     def transform_backend_kwargs(kwargs):
         mu = kwargs.pop("mu")
@@ -92,15 +69,49 @@ def transform_backend_kwargs(kwargs):
         return kwargs
 
 
-class Gamma(UnivariateFamily):
-    SUPPORTED_LINKS = {"mu": ["identity", "log", "inverse"], "alpha": ["log"]}
+class Categorical(UnivariateFamily):
+    SUPPORTED_LINKS = {"p": ["softmax"]}
+    UFUNC_KWARGS = {"axis": -1}
 
-    def posterior_predictive(self, model, posterior, **kwargs):
+    def transform_linear_predictor(self, model, linear_predictor):
         response_name = get_aliased_name(model.response_component.response_term)
-        mean = posterior[response_name + "_mean"]
-        alpha = posterior[response_name + "_alpha"]
-        beta = alpha / mean
-        return xr.apply_ufunc(np.random.gamma, alpha, 1 / beta)
+        response_levels_dim = response_name + "_dim"
+        linear_predictor = linear_predictor.pad({response_levels_dim: (1, 0)}, constant_values=0)
+        return linear_predictor
+
+    def transform_coords(self, model, mean):
+        # The mean has the reference level in the dimension, a new name is needed
+        response_name = get_aliased_name(model.response_component.response_term)
+        response_levels_dim = response_name + "_dim"
+        response_levels_dim_complete = response_name + "_mean_dim"
+        levels_complete = model.response_component.response_term.levels
+        mean = mean.rename({response_levels_dim: response_levels_dim_complete})
+        mean = mean.assign_coords({response_levels_dim_complete: levels_complete})
+        return mean
+
+    def get_data(self, response):
+        return np.nonzero(response.term.data)[1]
+
+    def get_coords(self, response):
+        name = response.name + "_dim"
+        return {name: [level for level in response.levels if level != response.reference]}
+
+    def get_reference(self, response):
+        return get_reference_level(response.term)
+
+    @staticmethod
+    def transform_backend_nu(nu, data):
+        # Add column of zeros to the linear predictor for the reference level (the first one)
+        shape = (data.shape[0], 1)
+
+        # The first line makes sure the intercept-only models work
+        nu = np.ones(shape) * nu  # (response_levels, ) -> (n, response_levels)
+        nu = pt.concatenate([np.zeros(shape), nu], axis=1)
+        return nu
+
+
+class Gamma(UnivariateFamily):
+    SUPPORTED_LINKS = {"mu": ["identity", "log", "inverse"], "alpha": ["log"]}
 
     @staticmethod
     def transform_backend_kwargs(kwargs):
@@ -114,83 +125,30 @@ def transform_backend_kwargs(kwargs):
 class Gaussian(UnivariateFamily):
     SUPPORTED_LINKS = {"mu": ["identity", "log", "inverse"], "sigma": ["log"]}
 
-    def posterior_predictive(self, model, posterior, **kwargs):
-        "Sample from posterior predictive distribution"
-        response_name = get_aliased_name(model.response_component.response_term)
-        mean = posterior[response_name + "_mean"]
-        sigma = posterior[response_name + "_sigma"]
-        return xr.apply_ufunc(np.random.normal, mean, sigma)
-
 
 class NegativeBinomial(UnivariateFamily):
     SUPPORTED_LINKS = {"mu": ["identity", "log", "cloglog"], "alpha": ["log"]}
 
-    def posterior_predictive(self, model, posterior, **kwargs):
-        response_name = get_aliased_name(model.response_component.response_term)
-        mean = posterior[response_name + "_mean"]
-        n = posterior[response_name + "_alpha"]
-        p = n / (mean + n)
-        return xr.apply_ufunc(np.random.negative_binomial, n, p)
-
 
 class Laplace(UnivariateFamily):
     SUPPORTED_LINKS = {"mu": ["identity", "log", "inverse"], "b": ["log"]}
 
-    def posterior_predictive(self, model, posterior, **kwargs):
-        "Sample from posterior predictive distribution"
-        response_name = get_aliased_name(model.response_component.response_term)
-        mean = posterior[response_name + "_mean"]
-        b = posterior[response_name + "_b"]
-        return xr.apply_ufunc(np.random.laplace, mean, b)
-
 
 class Poisson(UnivariateFamily):
     SUPPORTED_LINKS = {"mu": ["identity", "log"]}
 
-    def posterior_predictive(self, model, posterior, **kwargs):
-        response_name = get_aliased_name(model.response_component.response_term)
-        mean = posterior[response_name + "_mean"]
-        return xr.apply_ufunc(np.random.poisson, mean)
-
 
 class StudentT(UnivariateFamily):
     SUPPORTED_LINKS = {"mu": ["identity", "log", "inverse"], "sigma": ["log"], "nu": ["log"]}
 
-    def posterior_predictive(self, model, posterior, **kwargs):
-        response_name = get_aliased_name(model.response_component.response_term)
-        mean = posterior[response_name + "_mean"]
-        sigma = posterior[response_name + "_sigma"]
-        nu_component = model.components["nu"]
-
-        # Constant component with fixed value
-        if hasattr(nu_component, "prior") and isinstance(nu_component.prior, (int, float)):
-            nu = nu_component.prior
-        # Either constant or distributional, but non-constant value
-        else:
-            nu = posterior[response_name + "_nu"]
-
-        return xr.apply_ufunc(stats.t.rvs, nu, mean, sigma)
-
 
 class VonMises(UnivariateFamily):
     SUPPORTED_LINKS = {"mu": ["identity", "tan_2"], "kappa": ["log"]}
 
-    def posterior_predictive(self, model, posterior, **kwargs):
-        response_name = get_aliased_name(model.response_component.response_term)
-        mean = posterior[response_name + "_mean"]
-        kappa = posterior[response_name + "_kappa"]
-        return xr.apply_ufunc(np.random.vonmises, mean, kappa)
-
 
 class Wald(UnivariateFamily):
     SUPPORTED_LINKS = {"mu": ["inverse", "inverse_squared", "identity", "log"], "lam": ["log"]}
 
-    def posterior_predictive(self, model, posterior, **kwargs):
-        response_name = get_aliased_name(model.response_component.response_term)
-        mean = posterior[response_name + "_mean"]
-        lam = posterior[response_name + "_lam"]
-        return xr.apply_ufunc(np.random.wald, mean, lam)
-
 
 # pylint: disable = protected-access
 def get_success_level(term):
@@ -206,3 +164,19 @@ def get_success_level(term):
         return intermediate_data._contrast.reference
 
     return levels[0]
+
+
+# pylint: disable = protected-access
+def get_reference_level(term):
+    if term.kind != "categoric":
+        return None
+
+    if term.levels is None:
+        return None
+
+    levels = term.levels
+    intermediate_data = term.components[0]._intermediate_data
+    if hasattr(intermediate_data, "_contrast"):
+        return intermediate_data._contrast.reference
+
+    return levels[0]
diff --git a/bambi/model_components.py b/bambi/model_components.py
index 2c55d72a0..6f04eb4e0 100644
--- a/bambi/model_components.py
+++ b/bambi/model_components.py
@@ -162,7 +162,7 @@ def predict(self, idata, data=None, include_group_specific=True):
         to_stack_dims = ("chain", "draw")
         design_matrix_dims = (response_dim, "__variables__")
 
-        if isinstance(self.spec.family, multivariate.MultivariateFamily):
+        if isinstance(self.spec.family, (multivariate.MultivariateFamily, univariate.Categorical)):
             to_stack_dims = to_stack_dims + (response_levels_dim,)
             linear_predictor_dims = linear_predictor_dims + (response_levels_dim,)
 
diff --git a/tests/test_predict.py b/tests/test_predict.py
index ae1ba6924..22c2c807c 100644
--- a/tests/test_predict.py
+++ b/tests/test_predict.py
@@ -189,6 +189,16 @@ def test_predict_t(data_numeric_xy):
     model.predict(idata, kind="mean", data=data.iloc[:20, :])
     model.predict(idata, kind="pps", data=data.iloc[:20, :])
 
+    # A case where the prior for one of the parameters is constant
+    model = Model("y ~ x", data, family="t", priors={"nu": 4})
+    idata = model.fit(tune=100, draws=100)
+
+    model.predict(idata, kind="mean")
+    model.predict(idata, kind="pps")
+
+    model.predict(idata, kind="mean", data=data.iloc[:20, :])
+    model.predict(idata, kind="pps", data=data.iloc[:20, :])
+
 
 def test_predict_wald(data_gamma):
     data = data_gamma
@@ -233,7 +243,7 @@ def test_posterior_predictive_categorical(inhaler):
     model = Model("rating ~ period", data=inhaler, family="categorical")
     idata = model.fit(tune=100, draws=100)
     model.predict(idata, kind="pps")
-    pps = idata.posterior_predictive["rating"].values
+    pps = idata.posterior_predictive["rating"].to_numpy()
 
     assert pps.shape[-1] == inhaler.shape[0]
     assert (np.unique(pps) == [0, 1, 2, 3]).all()