Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove samples and keep_size from sample_posterior_predictive #6029

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 13 additions & 49 deletions pymc/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
from pymc.backends.base import BaseTrace, MultiTrace
from pymc.backends.ndarray import NDArray
from pymc.blocking import DictToArrayBijection
from pymc.exceptions import IncorrectArgumentsError, SamplingError
from pymc.exceptions import SamplingError
from pymc.initial_point import (
PointType,
StartDict,
Expand Down Expand Up @@ -1769,10 +1769,8 @@ def expand(node):

def sample_posterior_predictive(
trace,
samples: Optional[int] = None,
model: Optional[Model] = None,
var_names: Optional[List[str]] = None,
keep_size: Optional[bool] = None,
random_seed: RandomState = None,
progressbar: bool = True,
return_inferencedata: bool = True,
Expand All @@ -1788,25 +1786,11 @@ def sample_posterior_predictive(
trace : backend, list, xarray.Dataset, arviz.InferenceData, or MultiTrace
Trace generated from MCMC sampling, or a list of dicts (eg. points or from find_MAP()),
or xarray.Dataset (eg. InferenceData.posterior or InferenceData.prior)
samples : int
Number of posterior predictive samples to generate. Defaults to one posterior predictive
sample per posterior sample, that is, the number of draws times the number of chains.

It is not recommended to modify this value; when modified, some chains may not be
represented in the posterior predictive sample. Instead, in cases when generating
posterior predictive samples is too expensive to do it once per posterior sample,
the recommended approach is to thin the ``trace`` argument
before passing it to ``sample_posterior_predictive``. In such cases it
might be advisable to set ``extend_inferencedata`` to ``False`` and extend
the inferencedata manually afterwards.
model : Model (optional if in ``with`` context)
Model to be used to generate the posterior predictive samples. It will
generally be the model used to generate the ``trace``, but it doesn't need to be.
var_names : Iterable[str]
Names of variables for which to compute the posterior predictive samples.
keep_size : bool, default True
Force posterior predictive sample to have the same shape as posterior and sample stats
data: ``(nchains, ndraws, ...)``. Overrides samples parameter.
random_seed : int, RandomState or Generator, optional
Seed for the random number generator.
progressbar : bool
Expand Down Expand Up @@ -1882,38 +1866,18 @@ def sample_posterior_predictive(
else:
raise TypeError(f"Unsupported type for `trace` argument: {type(trace)}.")

if keep_size is None:
# This will allow users to set return_inferencedata=False and
# automatically get the old behaviour instead of needing to
# set both return_inferencedata and keep_size to False
keep_size = return_inferencedata

if keep_size and samples is not None:
raise IncorrectArgumentsError(
"Should not specify both keep_size and samples arguments. "
"See the docstring of the samples argument for more details."
if isinstance(_trace, MultiTrace):
samples = sum(len(v) for v in _trace._straces.values())
elif isinstance(_trace, list):
# this is a list of points
samples = len(_trace)
else:
raise TypeError(
"Do not know how to compute number of samples for trace argument of type %s"
% type(_trace)
)

if samples is None:
if isinstance(_trace, MultiTrace):
samples = sum(len(v) for v in _trace._straces.values())
elif isinstance(_trace, list):
# this is a list of points
samples = len(_trace)
else:
raise TypeError(
"Do not know how to compute number of samples for trace argument of type %s"
% type(_trace)
)

assert samples is not None
if samples < len_trace * nchain:
warnings.warn(
"samples parameter is smaller than nchains times ndraws, some draws "
"and/or chains may not be represented in the returned posterior "
"predictive sample",
stacklevel=2,
)

model = modelcontext(model)

Expand Down Expand Up @@ -2001,9 +1965,9 @@ def sample_posterior_predictive(
pass

ppc_trace = ppc_trace_t.trace_dict
if keep_size:
for k, ary in ppc_trace.items():
ppc_trace[k] = ary.reshape((nchain, len_trace, *ary.shape[1:]))

for k, ary in ppc_trace.items():
ppc_trace[k] = ary.reshape((nchain, len_trace, *ary.shape[1:]))

if not return_inferencedata:
return ppc_trace
Expand Down
26 changes: 3 additions & 23 deletions pymc/tests/backends/test_arviz.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def get_predictions_inference_data(
with data.model:
prior = pm.sample_prior_predictive(return_inferencedata=False)
posterior_predictive = pm.sample_posterior_predictive(
data.obj, keep_size=True, return_inferencedata=False
data.obj, return_inferencedata=False
)

idata = to_inference_data(
Expand All @@ -111,7 +111,7 @@ def make_predictions_inference_data(
) -> Tuple[InferenceData, Dict[str, np.ndarray]]:
with data.model:
posterior_predictive = pm.sample_posterior_predictive(
data.obj, keep_size=True, return_inferencedata=False
data.obj, return_inferencedata=False
)
idata = predictions_to_inference_data(
posterior_predictive,
Expand Down Expand Up @@ -190,7 +190,7 @@ def test_predictions_to_idata_new(self, data, eight_schools_params):
def test_posterior_predictive_keep_size(self, data, chains, draws, eight_schools_params):
with data.model:
posterior_predictive = pm.sample_posterior_predictive(
data.obj, keep_size=True, return_inferencedata=False
data.obj, return_inferencedata=False
)
inference_data = to_inference_data(
trace=data.obj,
Expand All @@ -204,26 +204,6 @@ def test_posterior_predictive_keep_size(self, data, chains, draws, eight_schools
[obs_s == s for obs_s, s in zip(shape, (chains, draws, eight_schools_params["J"]))]
)

def test_posterior_predictive_warning(self, data, eight_schools_params, caplog):
with data.model:
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", ".*smaller than nchains times ndraws.*", UserWarning
)
posterior_predictive = pm.sample_posterior_predictive(
data.obj, 370, return_inferencedata=False, keep_size=False
)
with pytest.warns(UserWarning, match="shape of variables"):
inference_data = to_inference_data(
trace=data.obj,
posterior_predictive=posterior_predictive,
coords={"school": np.arange(eight_schools_params["J"])},
dims={"theta": ["school"], "eta": ["school"]},
)

shape = inference_data.posterior_predictive.obs.shape
assert np.all([obs_s == s for obs_s, s in zip(shape, (1, 370, eight_schools_params["J"]))])

def test_posterior_predictive_thinned(self, data):
with data.model:
draws = 20
Expand Down
21 changes: 14 additions & 7 deletions pymc/tests/distributions/test_mixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,18 +546,18 @@ def test_single_poisson_predictive_sampling_shape(self):
with model:
prior = sample_prior_predictive(samples=n_samples, return_inferencedata=False)
ppc = sample_posterior_predictive(
[self.get_inital_point(model)], samples=n_samples, return_inferencedata=False
n_samples * [self.get_inital_point(model)], return_inferencedata=False
)

assert prior["like0"].shape == (n_samples, 20)
assert prior["like1"].shape == (n_samples, 20)
assert prior["like2"].shape == (n_samples, 20)
assert prior["like3"].shape == (n_samples, 20)

assert ppc["like0"].shape == (n_samples, 20)
assert ppc["like1"].shape == (n_samples, 20)
assert ppc["like2"].shape == (n_samples, 20)
assert ppc["like3"].shape == (n_samples, 20)
assert ppc["like0"].shape == (1, n_samples, 20)
assert ppc["like1"].shape == (1, n_samples, 20)
assert ppc["like2"].shape == (1, n_samples, 20)
assert ppc["like3"].shape == (1, n_samples, 20)

def test_list_mvnormals_predictive_sampling_shape(self):
N = 100 # number of data points
Expand Down Expand Up @@ -592,9 +592,16 @@ def test_list_mvnormals_predictive_sampling_shape(self):
with model:
prior = sample_prior_predictive(samples=n_samples, return_inferencedata=False)
ppc = sample_posterior_predictive(
[self.get_inital_point(model)], samples=n_samples, return_inferencedata=False
n_samples * [self.get_inital_point(model)], return_inferencedata=False
)
assert ppc["x_obs"].shape == (n_samples,) + X.shape
assert (
ppc["x_obs"].shape
== (
1,
n_samples,
)
+ X.shape
)
assert prior["x_obs"].shape == (n_samples,) + X.shape
assert prior["mu0"].shape == (n_samples, D)
assert prior["chol_cov_0"].shape == (n_samples, D * (D + 1) // 2)
Expand Down
6 changes: 2 additions & 4 deletions pymc/tests/distributions/test_simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,7 @@ def test_one_gaussian(self):
with self.SMABC_test:
trace = pm.sample_smc(draws=1000, chains=1, return_inferencedata=False)
pr_p = pm.sample_prior_predictive(1000, return_inferencedata=False)
po_p = pm.sample_posterior_predictive(
trace, keep_size=False, return_inferencedata=False
)
po_p = pm.sample_posterior_predictive(trace, return_inferencedata=False)

assert abs(self.data.mean() - trace["a"].mean()) < 0.05
assert abs(self.data.std() - trace["b"].mean()) < 0.05
Expand All @@ -91,7 +89,7 @@ def test_one_gaussian(self):
assert abs(0 - pr_p["s"].mean()) < 0.15
assert abs(1.4 - pr_p["s"].std()) < 0.10

assert po_p["s"].shape == (1000, 1000)
assert po_p["s"].shape == (1, 1000, 1000)
assert abs(self.data.mean() - po_p["s"].mean()) < 0.10
assert abs(self.data.std() - po_p["s"].std()) < 0.10

Expand Down
13 changes: 9 additions & 4 deletions pymc/tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1263,13 +1263,18 @@ def test_interval_missing_observations():

# Make sure that the observed values are newly generated samples and that
# the observed and deterministic matche
pp_trace = pm.sample_posterior_predictive(
trace, return_inferencedata=False, keep_size=False
pp_idata = pm.sample_posterior_predictive(trace)
pp_trace = pp_idata.posterior_predictive.stack(sample=["chain", "draw"]).transpose(
"sample", ...
)
assert np.all(np.var(pp_trace["theta1"], 0) > 0.0)
assert np.all(np.var(pp_trace["theta2"], 0) > 0.0)
assert np.mean(pp_trace["theta1"][:, ~obs1.mask] - pp_trace["theta1_observed"]) == 0.0
assert np.mean(pp_trace["theta2"][:, ~obs2.mask] - pp_trace["theta2_observed"]) == 0.0
assert np.isclose(
np.mean(pp_trace["theta1"][:, ~obs1.mask] - pp_trace["theta1_observed"]), 0
)
assert np.isclose(
np.mean(pp_trace["theta2"][:, ~obs2.mask] - pp_trace["theta2_observed"]), 0
)


def test_double_counting():
Expand Down
Loading