Skip to content

Commit

Permalink
Remove samples and keep_size from sample_posterior_predictive (#…
Browse files Browse the repository at this point in the history
…6029)

* removed unused imports 

Co-authored-by: vitaliset <carlo_lemos@hotmail.com>

* Update pymc/tests/test_sampling.py

added ricardo's suggestion 2

Co-authored-by: Ricardo Vieira <28983449+ricardoV94@users.noreply.github.com>

* edited `test_mixture.py` and `test_sampling.py`

* try fixing some tests

* try fixing tests

* fix one more test

Co-authored-by: Ricardo Vieira <28983449+ricardoV94@users.noreply.github.com>
Co-authored-by: Oriol (ZBook) <oriol.abril.pla@gmail.com>
  • Loading branch information
3 people authored Oct 11, 2022
1 parent 52a1b3c commit d47dac0
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 185 deletions.
62 changes: 13 additions & 49 deletions pymc/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
from pymc.backends.base import BaseTrace, MultiTrace
from pymc.backends.ndarray import NDArray
from pymc.blocking import DictToArrayBijection
from pymc.exceptions import IncorrectArgumentsError, SamplingError
from pymc.exceptions import SamplingError
from pymc.initial_point import (
PointType,
StartDict,
Expand Down Expand Up @@ -1769,10 +1769,8 @@ def expand(node):

def sample_posterior_predictive(
trace,
samples: Optional[int] = None,
model: Optional[Model] = None,
var_names: Optional[List[str]] = None,
keep_size: Optional[bool] = None,
random_seed: RandomState = None,
progressbar: bool = True,
return_inferencedata: bool = True,
Expand All @@ -1788,25 +1786,11 @@ def sample_posterior_predictive(
trace : backend, list, xarray.Dataset, arviz.InferenceData, or MultiTrace
Trace generated from MCMC sampling, or a list of dicts (eg. points or from find_MAP()),
or xarray.Dataset (eg. InferenceData.posterior or InferenceData.prior)
samples : int
Number of posterior predictive samples to generate. Defaults to one posterior predictive
sample per posterior sample, that is, the number of draws times the number of chains.
It is not recommended to modify this value; when modified, some chains may not be
represented in the posterior predictive sample. Instead, in cases when generating
posterior predictive samples is too expensive to do it once per posterior sample,
the recommended approach is to thin the ``trace`` argument
before passing it to ``sample_posterior_predictive``. In such cases it
might be advisable to set ``extend_inferencedata`` to ``False`` and extend
the inferencedata manually afterwards.
model : Model (optional if in ``with`` context)
Model to be used to generate the posterior predictive samples. It will
generally be the model used to generate the ``trace``, but it doesn't need to be.
var_names : Iterable[str]
Names of variables for which to compute the posterior predictive samples.
keep_size : bool, default True
Force posterior predictive sample to have the same shape as posterior and sample stats
data: ``(nchains, ndraws, ...)``. Overrides samples parameter.
random_seed : int, RandomState or Generator, optional
Seed for the random number generator.
progressbar : bool
Expand Down Expand Up @@ -1882,38 +1866,18 @@ def sample_posterior_predictive(
else:
raise TypeError(f"Unsupported type for `trace` argument: {type(trace)}.")

if keep_size is None:
# This will allow users to set return_inferencedata=False and
# automatically get the old behaviour instead of needing to
# set both return_inferencedata and keep_size to False
keep_size = return_inferencedata

if keep_size and samples is not None:
raise IncorrectArgumentsError(
"Should not specify both keep_size and samples arguments. "
"See the docstring of the samples argument for more details."
if isinstance(_trace, MultiTrace):
samples = sum(len(v) for v in _trace._straces.values())
elif isinstance(_trace, list):
# this is a list of points
samples = len(_trace)
else:
raise TypeError(
"Do not know how to compute number of samples for trace argument of type %s"
% type(_trace)
)

if samples is None:
if isinstance(_trace, MultiTrace):
samples = sum(len(v) for v in _trace._straces.values())
elif isinstance(_trace, list):
# this is a list of points
samples = len(_trace)
else:
raise TypeError(
"Do not know how to compute number of samples for trace argument of type %s"
% type(_trace)
)

assert samples is not None
if samples < len_trace * nchain:
warnings.warn(
"samples parameter is smaller than nchains times ndraws, some draws "
"and/or chains may not be represented in the returned posterior "
"predictive sample",
stacklevel=2,
)

model = modelcontext(model)

Expand Down Expand Up @@ -2001,9 +1965,9 @@ def sample_posterior_predictive(
pass

ppc_trace = ppc_trace_t.trace_dict
if keep_size:
for k, ary in ppc_trace.items():
ppc_trace[k] = ary.reshape((nchain, len_trace, *ary.shape[1:]))

for k, ary in ppc_trace.items():
ppc_trace[k] = ary.reshape((nchain, len_trace, *ary.shape[1:]))

if not return_inferencedata:
return ppc_trace
Expand Down
26 changes: 3 additions & 23 deletions pymc/tests/backends/test_arviz.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def get_predictions_inference_data(
with data.model:
prior = pm.sample_prior_predictive(return_inferencedata=False)
posterior_predictive = pm.sample_posterior_predictive(
data.obj, keep_size=True, return_inferencedata=False
data.obj, return_inferencedata=False
)

idata = to_inference_data(
Expand All @@ -111,7 +111,7 @@ def make_predictions_inference_data(
) -> Tuple[InferenceData, Dict[str, np.ndarray]]:
with data.model:
posterior_predictive = pm.sample_posterior_predictive(
data.obj, keep_size=True, return_inferencedata=False
data.obj, return_inferencedata=False
)
idata = predictions_to_inference_data(
posterior_predictive,
Expand Down Expand Up @@ -190,7 +190,7 @@ def test_predictions_to_idata_new(self, data, eight_schools_params):
def test_posterior_predictive_keep_size(self, data, chains, draws, eight_schools_params):
with data.model:
posterior_predictive = pm.sample_posterior_predictive(
data.obj, keep_size=True, return_inferencedata=False
data.obj, return_inferencedata=False
)
inference_data = to_inference_data(
trace=data.obj,
Expand All @@ -204,26 +204,6 @@ def test_posterior_predictive_keep_size(self, data, chains, draws, eight_schools
[obs_s == s for obs_s, s in zip(shape, (chains, draws, eight_schools_params["J"]))]
)

def test_posterior_predictive_warning(self, data, eight_schools_params, caplog):
with data.model:
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", ".*smaller than nchains times ndraws.*", UserWarning
)
posterior_predictive = pm.sample_posterior_predictive(
data.obj, 370, return_inferencedata=False, keep_size=False
)
with pytest.warns(UserWarning, match="shape of variables"):
inference_data = to_inference_data(
trace=data.obj,
posterior_predictive=posterior_predictive,
coords={"school": np.arange(eight_schools_params["J"])},
dims={"theta": ["school"], "eta": ["school"]},
)

shape = inference_data.posterior_predictive.obs.shape
assert np.all([obs_s == s for obs_s, s in zip(shape, (1, 370, eight_schools_params["J"]))])

def test_posterior_predictive_thinned(self, data):
with data.model:
draws = 20
Expand Down
21 changes: 14 additions & 7 deletions pymc/tests/distributions/test_mixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,18 +546,18 @@ def test_single_poisson_predictive_sampling_shape(self):
with model:
prior = sample_prior_predictive(samples=n_samples, return_inferencedata=False)
ppc = sample_posterior_predictive(
[self.get_inital_point(model)], samples=n_samples, return_inferencedata=False
n_samples * [self.get_inital_point(model)], return_inferencedata=False
)

assert prior["like0"].shape == (n_samples, 20)
assert prior["like1"].shape == (n_samples, 20)
assert prior["like2"].shape == (n_samples, 20)
assert prior["like3"].shape == (n_samples, 20)

assert ppc["like0"].shape == (n_samples, 20)
assert ppc["like1"].shape == (n_samples, 20)
assert ppc["like2"].shape == (n_samples, 20)
assert ppc["like3"].shape == (n_samples, 20)
assert ppc["like0"].shape == (1, n_samples, 20)
assert ppc["like1"].shape == (1, n_samples, 20)
assert ppc["like2"].shape == (1, n_samples, 20)
assert ppc["like3"].shape == (1, n_samples, 20)

def test_list_mvnormals_predictive_sampling_shape(self):
N = 100 # number of data points
Expand Down Expand Up @@ -592,9 +592,16 @@ def test_list_mvnormals_predictive_sampling_shape(self):
with model:
prior = sample_prior_predictive(samples=n_samples, return_inferencedata=False)
ppc = sample_posterior_predictive(
[self.get_inital_point(model)], samples=n_samples, return_inferencedata=False
n_samples * [self.get_inital_point(model)], return_inferencedata=False
)
assert ppc["x_obs"].shape == (n_samples,) + X.shape
assert (
ppc["x_obs"].shape
== (
1,
n_samples,
)
+ X.shape
)
assert prior["x_obs"].shape == (n_samples,) + X.shape
assert prior["mu0"].shape == (n_samples, D)
assert prior["chol_cov_0"].shape == (n_samples, D * (D + 1) // 2)
Expand Down
6 changes: 2 additions & 4 deletions pymc/tests/distributions/test_simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,7 @@ def test_one_gaussian(self):
with self.SMABC_test:
trace = pm.sample_smc(draws=1000, chains=1, return_inferencedata=False)
pr_p = pm.sample_prior_predictive(1000, return_inferencedata=False)
po_p = pm.sample_posterior_predictive(
trace, keep_size=False, return_inferencedata=False
)
po_p = pm.sample_posterior_predictive(trace, return_inferencedata=False)

assert abs(self.data.mean() - trace["a"].mean()) < 0.05
assert abs(self.data.std() - trace["b"].mean()) < 0.05
Expand All @@ -91,7 +89,7 @@ def test_one_gaussian(self):
assert abs(0 - pr_p["s"].mean()) < 0.15
assert abs(1.4 - pr_p["s"].std()) < 0.10

assert po_p["s"].shape == (1000, 1000)
assert po_p["s"].shape == (1, 1000, 1000)
assert abs(self.data.mean() - po_p["s"].mean()) < 0.10
assert abs(self.data.std() - po_p["s"].std()) < 0.10

Expand Down
13 changes: 9 additions & 4 deletions pymc/tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1263,13 +1263,18 @@ def test_interval_missing_observations():

# Make sure that the observed values are newly generated samples and that
# the observed and deterministic matche
pp_trace = pm.sample_posterior_predictive(
trace, return_inferencedata=False, keep_size=False
pp_idata = pm.sample_posterior_predictive(trace)
pp_trace = pp_idata.posterior_predictive.stack(sample=["chain", "draw"]).transpose(
"sample", ...
)
assert np.all(np.var(pp_trace["theta1"], 0) > 0.0)
assert np.all(np.var(pp_trace["theta2"], 0) > 0.0)
assert np.mean(pp_trace["theta1"][:, ~obs1.mask] - pp_trace["theta1_observed"]) == 0.0
assert np.mean(pp_trace["theta2"][:, ~obs2.mask] - pp_trace["theta2_observed"]) == 0.0
assert np.isclose(
np.mean(pp_trace["theta1"][:, ~obs1.mask] - pp_trace["theta1_observed"]), 0
)
assert np.isclose(
np.mean(pp_trace["theta2"][:, ~obs2.mask] - pp_trace["theta2_observed"]), 0
)


def test_double_counting():
Expand Down
Loading

0 comments on commit d47dac0

Please sign in to comment.