From faebc60ff2cb7c9befff5fc1122894ffa5600139 Mon Sep 17 00:00:00 2001 From: Bill Engels Date: Fri, 7 Oct 2022 01:19:59 -0700 Subject: [PATCH] Update docstrings of pm.set_data and model.Data (#6087) * To explain how to avoid shape errors when doing posterior predictive sampling * Rewrite docstring for pm.set_data, fix other comments. --- pymc/data.py | 7 +++++- pymc/model.py | 65 ++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 55 insertions(+), 17 deletions(-) diff --git a/pymc/data.py b/pymc/data.py index a04d5c30ae3..e5e9468db33 100644 --- a/pymc/data.py +++ b/pymc/data.py @@ -592,7 +592,12 @@ def Data( :func:`pymc.set_data`. To set the value of the data container variable, check out - :func:`pymc.Model.set_data`. + :meth:`pymc.Model.set_data`. + + When making predictions or doing posterior predictive sampling, the shape of the + registered data variable will most likely need to be changed. If you encounter an + Aesara shape mismatch error, refer to the documentation for + :meth:`pymc.model.set_data`. For more information, read the notebook :ref:`nb:data_container`. diff --git a/pymc/model.py b/pymc/model.py index 8cf13cc4d5a..0b31679326c 100644 --- a/pymc/model.py +++ b/pymc/model.py @@ -1847,7 +1847,9 @@ def point_logps(self, point=None, round_vals=2): def set_data(new_data, model=None, *, coords=None): - """Sets the value of one or more data container variables. + """Sets the value of one or more data container variables. Note that the shape is also + dynamic, it is updated when the value is changed. See the examples below for two common + use-cases that take advantage of this behavior. Parameters ---------- @@ -1860,25 +1862,56 @@ def set_data(new_data, model=None, *, coords=None): Examples -------- - .. code:: ipython + This example shows how to change the shape of the likelihood to correspond automatically with + `x`, the predictor in a regression model. - >>> import pymc as pm - >>> with pm.Model() as model: - ... x = pm.MutableData('x', [1., 2., 3.]) - ... y = pm.MutableData('y', [1., 2., 3.]) - ... beta = pm.Normal('beta', 0, 1) - ... obs = pm.Normal('obs', x * beta, 1, observed=y) - ... idata = pm.sample(1000, tune=1000) + .. code-block:: python + + import pymc as pm + + with pm.Model() as model: + x = pm.MutableData('x', [1., 2., 3.]) + y = pm.MutableData('y', [1., 2., 3.]) + beta = pm.Normal('beta', 0, 1) + obs = pm.Normal('obs', x * beta, 1, observed=y, shape=x.shape) + idata = pm.sample() + + Then change the value of `x` to predict on new data. + + .. code-block:: python + + with model: + pm.set_data({'x': [5., 6., 9., 12., 15.]}) + y_test = pm.sample_posterior_predictive(idata) + + print(y_test.posterior_predictive['obs'].mean(('chain', 'draw'))) - Set the value of `x` to predict on new data. + >>> array([4.6088569 , 5.54128318, 8.32953844, 11.14044852, 13.94178173]) - .. code:: ipython + This example shows how to reuse the same model without recompiling on a new data set. The + shape of the likelihood, `obs`, automatically tracks the shape of the observed data, `y`. + + .. code-block:: python + + import numpy as np + import pymc as pm + + rng = np.random.default_rng() + data = rng.normal(loc=1.0, scale=2.0, size=100) + + with pm.Model() as model: + y = pm.MutableData('y', data) + theta = pm.Normal('theta', mu=0.0, sigma=10.0) + obs = pm.Normal('obs', theta, 2.0, observed=y, shape=y.shape) + idata = pm.sample() + + Now update the model with a new data set. + + .. code-block:: python - >>> with model: - ... pm.set_data({'x': [5., 6., 9.]}) - ... y_test = pm.sample_posterior_predictive(idata) - >>> y_test.posterior_predictive['obs'].mean(('chain', 'draw')) - array([4.6088569 , 5.54128318, 8.32953844]) + with model: + pm.set_data({'y': rng.normal(loc=1.0, scale=2.0, size=200)}) + idata = pm.sample() """ model = modelcontext(model)